# Test: QC Workflow Functions

## About
- Interactive tests of QC workflow
- **Created:** 2023/01/11
- **Updated:** 2023/01/12

## Globals

In [13]:
# path to local util code module
g_util_module_path = '../util'
# test mets file
g_qc_mets_file = '../data/trade_statistics/trade_statistics.xml'
# test iiif json manifest file
g_qc_iiif_json_file = '../data/trade_statistics/trade_statistics_iiif_manifest.json'
# data file directory
g_qc_data_directory = '../data/trade_statistics'


Add local path to Jupyter system path

In [2]:
import sys
if g_util_module_path not in sys.path:
    sys.path.append(g_util_module_path)

## Modules

In [3]:
import pandas as pd
import pprint
import util # local module

## Metadata Analysis

### Download and Process IIIF Manifest (`JSON` format)

In [None]:
# load the iiif manifest file
iiif_df = util.iiif_to_dataframe(g_qc_iiif_json_file)
print('Num files: {}'.format(len(iiif_df)))
display(iiif_df)

### Download and Process METS File (`XML` format)

In [None]:
# load the mets file
mets_df = util.mets_to_dataframe(g_qc_mets_file)
print('Num files: {}'.format(len(mets_df)))
display(mets_df)

### Create Digital Object and Vendor File Inventories

#### Create digital object inventory
- Based upon IIIF manifest

In [None]:
# create the digital object inventory based uopn the iiif_df
do_inventory_df = util.create_digital_object_inventory(iiif_df)
print('Num files: {}'.format(len(do_inventory_df)))
display(do_inventory_df)

#### Create vendor inventory
- Based upon METS file output. Assumes that vendor filenames are based upon DRS id.

In [None]:
vendor_inventory_df = util.create_vendor_inventory(mets_df, g_qc_data_directory)
print('Num files: {}'.format(len(vendor_inventory_df)))
display(vendor_inventory_df)

### Compare Digital Object Files to Vendor Files

#### Check for missing DRS ids

In [None]:
import importlib
importlib.reload(util)

missing_drs_ids_df = util.find_missing_drs_ids(do_inventory_df, vendor_inventory_df)
print('Num missing DRS ids: {}'.format(len(missing_drs_ids_df)))

**End document.**