# Test: QC Workflow Functions

## About
- Interactive tests of QC workflow
- **Created:** 2023/01/11
- **Updated:** 2023/01/14

## Globals

In [None]:
# path to local util code module
g_util_module_path = '../util'
# test mets file
g_qc_mets_file = '../data/trade_statistics/trade_statistics.xml'
# test iiif json manifest file
g_qc_iiif_json_file = '../data/trade_statistics/trade_statistics_iiif_manifest.json'
# data file directory
g_qc_data_directory = ''
# digital object title
g_qc_digital_object_title = 'trade_statistics'


Add local path to Jupyter system path

In [None]:
import sys
if g_util_module_path not in sys.path:
    sys.path.append(g_util_module_path)

## Modules

In [None]:
import pandas as pd
import pprint
import util # local module

## Metadata Analysis

### Download and Process IIIF Manifest (`JSON` format)

In [None]:
# print function documentation
print('{}'.format(util.iiif_to_dataframe.__doc__))

# load the iiif manifest file
iiif_df = util.iiif_to_dataframe(g_qc_iiif_json_file)
print('Num files: {}'.format(len(iiif_df)))
display(iiif_df)

### Download and Process METS File (`XML` format)

In [None]:
# print function documentation
print('{}'.format(util.mets_to_dataframe.__doc__))

# load the mets file
mets_df = util.mets_to_dataframe(g_qc_mets_file)
print('Num files: {}'.format(len(mets_df)))
display(mets_df)

### Create Digital Object and Vendor File Inventories

#### Create digital object inventory
- Based upon IIIF manifest

In [None]:
# print function documentation
print('{}'.format(util.create_digital_object_inventory.__doc__))

# create the digital object inventory based uopn the iiif_df
do_inventory_df = util.create_digital_object_inventory(iiif_df)
print('Num files: {}'.format(len(do_inventory_df)))
display(do_inventory_df)

#### Create vendor inventory
- Based upon METS file output. Assumes that vendor filenames are based upon DRS id.

In [None]:
# print function documentation
print('{}'.format(util.create_vendor_inventory.__doc__))

vendor_inventory_df = util.create_vendor_inventory(mets_df, g_qc_data_directory)
print('Num files: {}'.format(len(vendor_inventory_df)))
display(vendor_inventory_df)

### Create Transcription Inventories

#### Create `csv` transcription inventory

In [None]:
# print function documentation
print('{}'.format(util.extract_transcription_inventory.__doc__))

csv_inventory_df = util.extract_transcription_inventory(vendor_inventory_df, ttype='csv', path=False)
display(csv_inventory_df)

Generate `csv` transcription inventory report

In [None]:
# print function documentation
print('{}'.format(util.generate_transcription_report.__doc__))
display(util.generate_transcription_report(csv_inventory_df))

#### Create `txt` transcription inventory

In [None]:
# print function documentation
print('{}'.format(util.extract_transcription_inventory.__doc__))

txt_inventory_df = util.extract_transcription_inventory(vendor_inventory_df, ttype='txt', path=False)
display(txt_inventory_df)

Generate `txt` transcription inventory report

In [None]:

# print function documentation
print('{}'.format(util.generate_transcription_report.__doc__))
display(util.generate_transcription_report(txt_inventory_df))

### Compare Digital Object Files to Vendor Files

#### Check for missing DRS ids

In [None]:
# print function documentation
print('{}'.format(util.find_missing_drs_ids.__doc__))

# get digital object drs ids
do_drs_ids = do_inventory_df['drs_id']
# get vendor drs ids
vendor_drs_ids = vendor_inventory_df['drs_id']
# get dataframe of drs ids in digital object that are missing from vendor inventory
missing_drs_ids_df = util.find_missing_drs_ids(do_drs_ids, vendor_drs_ids)
print('Num missing DRS ids: {}'.format(len(missing_drs_ids_df)))
display(missing_drs_ids_df)
if (len(missing_drs_ids_df) > 0):
    missing_drs_ids_df.to_csv(g_qc_digital_object_title + '.csv')

**End document.**