# Test: Util Module Functions

## About
- Interactive tests of util module functions
- **Created**: 2023/01/10
- **Last update**: 2023/01/11

### Globals

In [10]:
# path to local util code module
g_util_module_path = '../util'
# test mets file
g_test_mets_file = '../data/trade_statistics/trade_statistics.xml'
# test iiif json manifest file
g_test_iiif_json_file = '../data/trade_statistics/trade_statistics_iiif_manifest.json'
# osf project id
g_test_osf_project_id = ''
# osf usernames
g_test_osf_username = ''
# osf password
g_test_osf_password = ''
# osf api token
g_test_osf_api_token = ''

Add local path to Jupyter system path

In [2]:
import sys
if g_util_module_path not in sys.path:
    sys.path.append(g_util_module_path)

### Modules


In [4]:
import pandas as pd
import pprint
import util # local module

### Test `util.mets_to_dataframe`

In [11]:
# print function documentation
print('{}'.format(util.mets_to_dataframe.__doc__))

# load the mets file
mets_df = util.mets_to_dataframe(g_test_mets_file)

display(mets_df)


    Read and extract information about files from an XML METS file.

    Parameter
    ---------
    filename : str
        Full path to METS file.

    Return
    ------
    DataFrame

    


Unnamed: 0,@id,file_type,@mimetype,mets_url,filename
0,img_44319541,image,image/jpeg,image/44319541.jpg,44319541.jpg
1,img_44319542,image,image/jpeg,image/44319542.jpg,44319542.jpg
2,img_44319543,image,image/jpeg,image/44319543.jpg,44319543.jpg
3,img_44319544,image,image/jpeg,image/44319544.jpg,44319544.jpg
4,img_44319545,image,image/jpeg,image/44319545.jpg,44319545.jpg
...,...,...,...,...,...
1581,csv_44319948_a,csv,text/csv,csv/44319948_a.csv,44319948_a.csv
1582,csv_44319948_b,csv,text/csv,csv/44319948_b.csv,44319948_b.csv
1583,csv_44319949,csv,text/csv,csv/44319949.csv,44319949.csv
1584,csv_44319950_a,csv,text/csv,csv/44319950_a.csv,44319950_a.csv


### Test `util.iiif_to_dataframe`

In [13]:
# print function documentation
print('{}'.format(util.iiif_to_dataframe.__doc__))

# load the mets file
iiif_df = util.iiif_to_dataframe(g_test_iiif_json_file)

display(iiif_df)


    Given a IIIF JSON manifest, save some of its values to a DataFrame

    Parameter
    ---------
    filename : str
        Full path to IIIF JSON manifest file.

    Return
    ------
    list
        List of dict of metadata about contents of IIIF manifest
    


Unnamed: 0,@id,format,drs_id
0,https://ids.lib.harvard.edu/ids/iiif/44319541/...,image/jpeg,44319541
1,https://ids.lib.harvard.edu/ids/iiif/44319542/...,image/jpeg,44319542
2,https://ids.lib.harvard.edu/ids/iiif/44319543/...,image/jpeg,44319543
3,https://ids.lib.harvard.edu/ids/iiif/44319544/...,image/jpeg,44319544
4,https://ids.lib.harvard.edu/ids/iiif/44319545/...,image/jpeg,44319545
...,...,...,...
407,https://ids.lib.harvard.edu/ids/iiif/44319948/...,image/jpeg,44319948
408,https://ids.lib.harvard.edu/ids/iiif/44319949/...,image/jpeg,44319949
409,https://ids.lib.harvard.edu/ids/iiif/44319950/...,image/jpeg,44319950
410,https://ids.lib.harvard.edu/ids/iiif/44319951/...,image/jpeg,44319951


### Test `util.map_csv_to_image`

In [44]:
# print function documentation
#print('{}'.format(util.map_csv_to_image.__doc__))

csv_df = mets_df.loc[mets_df['filename'].str.contains('.csv',case=False)]
jpg_df = mets_df.loc[mets_df['filename'].str.contains('.jpg',case=False)]

csv_list = list(csv_df['filename'])
image_list = list(jpg_df['filename'])

mappings = util.map_csv_to_image(image_list, csv_list)

pprint.pprint(mappings)

[{'44319547_iii-iv.csv': '44319547.jpg'},
 {'44319552_a.csv': '44319552.jpg'},
 {'44319552_b.csv': '44319552.jpg'},
 {'44319553_a.csv': '44319553.jpg'},
 {'44319553_b.csv': '44319553.jpg'},
 {'44319553_c.csv': '44319553.jpg'},
 {'44319554_a.csv': '44319554.jpg'},
 {'44319554_b.csv': '44319554.jpg'},
 {'44319555_a.csv': '44319555.jpg'},
 {'44319555_b.csv': '44319555.jpg'},
 {'44319555_c.csv': '44319555.jpg'},
 {'44319555_d.csv': '44319555.jpg'},
 {'44319555_e.csv': '44319555.jpg'},
 {'44319558.csv': '44319558.jpg'},
 {'44319561.csv': '44319561.jpg'},
 {'44319563.csv': '44319563.jpg'},
 {'44319564.csv': '44319564.jpg'},
 {'44319565.csv': '44319565.jpg'},
 {'44319566.csv': '44319566.jpg'},
 {'44319567.csv': '44319567.jpg'},
 {'44319570_a.csv': '44319570.jpg'},
 {'44319570_b.csv': '44319570.jpg'},
 {'44319571_a.csv': '44319571.jpg'},
 {'44319571_b.csv': '44319571.jpg'},
 {'44319572_18-19.csv': '44319572.jpg'},
 {'44319576_a.csv': '44319576.jpg'},
 {'44319576_b.csv': '44319576.jpg'},
 {'443

### Test `util.osf_get_project_files`
Note: This function should be refactored to handle multiple file folders within a single OSF project

In [None]:
# print function documentation
print('{}'.format(util.osf_get_project_files.__doc__))

# load the mets file
"""
osf_df = util.osf_get_project_files(g_test_osf_project_id,
                                    g_test_osf_username, 
                                    g_test_osf_password, 
                                    g_test_osf_token)
"""

#display(osf_df)