# Test: SAEF Module Classes

## About
Interactive tests of classes in `saef` module
- **Created:** 2022/12/21
- **Last update:** 2023/01/03

## Globals
Define global variables for testing purposes.

In [None]:
g_saef_module_path = '../src'

# this inventory contains the required saef mms_id and object_tags fields
g_test_saef_inventory = './inventory/test_saef_updated_inventory.csv'
# this inventory contains msft files, in addition to the required mms_id and object_tags fields
g_test_saef_msft_inventory = './inventory/test_saef_msft_inventory.csv'
# demo.dataverse.org API key
g_demo_dataverse_api_key = ''

Add local modules path to Jupyter system path

In [None]:
import sys
if g_saef_module_path not in sys.path:
    sys.path.append(g_saef_module_path)

## Modules

In [None]:
import lcd
import saef
import pprint

Documentation for `saef` module

In [None]:
print(saef.__doc__)

### Test `SAEFProjectConfig`

`SAEFProjectConfig` documentation

In [None]:
print('SAEFProjectConfig:: {}'.format(saef.SAEFProjectConfig.__doc__))
print('SAEFProjectConfig::read_ini {}'.format(saef.SAEFProjectConfig.read_ini.__doc__))
print('SAEFProjectConfig::initd {}'.format(saef.SAEFProjectConfig.initd.__doc__))
print('SAEFProjectConfig::get_options {}'.format(saef.SAEFProjectConfig.get_options.__doc__))
print('SAEFProjectConfig::get_sections {}'.format(saef.SAEFProjectConfig.get_sections.__doc__))

In [None]:
# create a saef project config instance
config = saef.SAEFProjectConfig()

# test an invalid ini file
try:
    config.read_ini('./config/test_saef_config_false.ini')
except ValueError:
    print('Error reading test_saef_config_false.ini. Missing required element.')

# test a valid ini file
print('SAEFProjectConfig::read_ini: {}'.format(config.read_ini('./config/test_saef_config_true.ini')))

# print initialization status
print('SAEFProjectConfig::initd: {}'.format(config.initd()))

# get configuration options
options = config.get_options()
pprint.pprint('SAEFProjectConfig::get_options: {}'.format(options),sort_dicts=True)

# get configuration sections
sections = config.get_sections()
pprint.pprint('SAEFProjectConfig::get_sections: {}'.format(sections))

### Test `MSFTInventory`

`MSFTInventory` documentation

In [None]:
print('MSFTInventory:: {}'.format(saef.MSFTInventory.__doc__))

Create the `MSFTInventory` instance

In [None]:
print('MSFTInventory::from_file {}'.format(saef.MSFTInventory.from_file.__doc__))
print('MSFTInventory::initd {}'.format(saef.MSFTInventory.initd.__doc__))

In [None]:
# create msft inventory instance
msft = saef.MSFTInventory()

# read msft inventory from valid csv file
print ('MSFTInventory::from_file: {}'.format(msft.from_file(g_test_saef_msft_inventory)))
# is file inventory initialized?
print ('MSFTInventory::initd: {}'.format(msft.initd()))

Get the `MSFTInventory` image files

In [None]:
print('MSFTInventory::get_msft_img_files: {}'.format(saef.MSFTInventory.get_msft_img_files.__doc__))

In [None]:
image_files_df = msft.get_msft_img_files()
print('Num images: {}'.format(len(image_files_df)))
display(image_files_df)

Get `MSFTInventory` JSON files

In [None]:
print('MSFTInventory::get_msft_json_files {}'.format(saef.MSFTInventory.get_msft_json_files.__doc__))

In [None]:
json_files_df = msft.get_msft_json_files()
print('Num json files: {}'.format(len(json_files_df)))
display(json_files_df)

Get `MSFTInventory` txt files

In [None]:
print('MSFTInventory::get_msft_txt_files {}'.format(saef.MSFTInventory.get_msft_txt_files.__doc__))

In [None]:
txt_files_df = msft.get_msft_txt_files()
print('Num txt files: {}'.format(len(txt_files_df)))
display(txt_files_df)

Test `MSFTInventory` get all files. Note, `METS` files are not returned.

In [None]:
print('MSFTInventory::get_files {}'.format(saef.MSFTInventory.get_files.__doc__))

In [None]:
files_df = msft.get_files()
print('Num total files: {}'.format(len(files_df)))
display(files_df)

### Test `SAEFDigitalObject`

`SAEFDigitalObject` documentation

In [None]:
print('SAEFDigitalObject:: {}'.format(saef.SAEFDigitalObject.__doc__))

In [None]:
# create file inventory instance
fi = lcd.FileInventory()

# read inventory from a csv file
print ('FileInventory::from_file: {}'.format(fi.from_file(g_test_saef_inventory)))

# get the second owner-supplied name in the inventory
# note: the first name does not have associated object_tags
osn = fi.get_owner_supplied_names()[1]

# get the files associated with the osn
files_df = fi.get_files('object_osn',osn)

# create saef digital object instance
saefdo = saef.SAEFDigitalObject()

# read saef digital object from dataframe
print ('SAEFDigitalObject::from_file: {}'.format(saefdo.from_dataframe(files_df)))

# is saef digital object initialized?
print ('SAEFDigitalObject::initd: {}'.format(saefdo.initd()))

Get the `SAEFDigitalObject` METS file

In [None]:
print('SAEFDigitalObject::get_mets_file {}'.format(saef.SAEFDigitalObject.get_mets_file.__doc__))

In [None]:
mets_df = saefdo.get_mets_file()
display(mets_df)

Get the `SAEFDigitalObject` image files

In [None]:
print('SAEFDigitalObject::get_image_files {}'.format(saef.SAEFDigitalObject.get_image_files.__doc__))

In [None]:
images_df = saefdo.get_image_files()
print('Num image files: {}'.format(len(images_df)))
display(images_df)

Get the `SAEFDigitalObject` `MSFT` image files

In [None]:
print('SAEFDigitalObject::get_msft_img_files {}'.format(saef.SAEFDigitalObject.get_msft_img_files.__doc__))

In [None]:
msft_images_df = saefdo.get_msft_img_files()
if (msft_images_df.empty == True):
    print('Num MSFT image files: {}'.format(0))
else:
    display(msft_images_df)

Get the `SAEFDigitalObject` `MSFT` JSON files

In [None]:
print('SAEFDigitalObject::get_msft_json_files {}'.format(saef.SAEFDigitalObject.get_msft_json_files.__doc__))

In [None]:
msft_json_df = saefdo.get_msft_json_files()
if (msft_json_df.empty == True):
    print('Num MSFT JSON files: {}'.format(0))
else:
    display(msft_json_df)

Get the `SAEFDigitalObject` `MSFT` TXT files

In [None]:
print('SAEFDigitalObject::get_msft_txt_files {}'.format(saef.SAEFDigitalObject.get_msft_txt_files.__doc__))

In [None]:
msft_txt_df = saefdo.get_msft_txt_files()
if (msft_txt_df.empty == True):
    print('Num MSFT TXT files: {}'.format(0))
else:
    display(msft_txt_df)

Get `SAEFDigitalObject` OCR files
- Note: this instance does not have any OCR files

In [None]:
print('SAEFDigitalObject::get_ocr_files {}'.format(saef.SAEFDigitalObject.get_ocr_files.__doc__))

In [None]:
ocr_df = saefdo.get_ocr_files()
print('Num OCR files: {}'.format(len(ocr_df)))
display(ocr_df)

Get `SAEFDigitalObject` metadata

In [None]:
print('SAEFDigitalObject::get_metadata {}'.format(saef.SAEFDigitalObject.get_metadata.__doc__))

In [None]:
metadata = saefdo.get_metadata()
print('Num keys: {}'.format(len(metadata.keys())))
pprint.pprint(metadata)

Get `SAEFDigitalObject` files

In [None]:
print('SAEFDigitalObject::get_files {}'.format(saef.SAEFDigitalObject.get_files.__doc__))

In [None]:
files_df = saefdo.get_files()
print('Total num files: {}'.format(len(files_df)))
display(files_df)

Get `SAEFDigitalObject` PDS relationships

In [None]:
print('SAEFDigitalObject::get_pds_relationships {}'.format(saef.SAEFDigitalObject.get_pds_relationships.__doc__))

In [None]:
pds_relationships_df = saefdo.get_pds_relationships()
print('Num files: {}'.format(len(pds_relationships_df)))

# write relationships, if any
if (len(pds_relationships_df) > 0):
    display(pds_relationships_df)
    name = options.get('digital_object').get('digital_object_pds_relationships')
    path = './relationships/' + name
    saefdo.write_relationships(path, 'pds')

Get `SAEFDigitalObject` MSFT relationships

In [None]:
print('SAEFDigitalObject::get_msft_relationships {}'.format(saef.SAEFDigitalObject.get_msft_relationships.__doc__))

In [None]:
msft_relationships_df = saefdo.get_msft_relationships()
print('Num files: {}'.format(len(msft_relationships_df)))

# write relationships, if any
if (len(msft_relationships_df) > 0):
    display(msft_relationships_df)
    name = options.get('digital_object').get('digital_object_msft_relationships')
    path = './relationships/' + name
    saefdo.write_relationships(path, 'msft')

Get `SAEFDigitalObject` OCR relationships
- Note: There are no OCR files in this digital object, therefore, there are no OCR relationships

In [None]:
print('SAEFDigitalObject::get_ocr_relationships {}'.format(saef.SAEFDigitalObject.get_ocr_relationships.__doc__))

In [None]:
ocr_relationships_df = saefdo.get_ocr_relationships()
print('Num OCR relationship files: {}'.format(len(ocr_relationships_df)))

# write relationships, if any
if (len(ocr_relationships_df) > 0):
    display(ocr_relationships_df)
    name = options.get('digital_object').get('digital_object_ocr_relationships')
    path = './relationships/' + name
    saefdo.write_relationships(path, 'ocr')

### Test `SAEFDatasetMetadata`
`SAEFDatasetMetadata` instances are generally not instantiated solo, rather they are created when `SAEFDataset` instances are initialized.

In [None]:
print('SAEFDatasetMetadata:: {}'.format(saef.SAEFDatasetMetadata.__doc__))

Create a `SAEFDatasetMetadata` instance from a metadata dictionary

In [None]:
print('SAEFDatasetMetadata::from_dict {}'.format(saef.SAEFDatasetMetadata.from_dict.__doc__))
print('SAEFDatasetMetadata::initd {}'.format(saef.SAEFDatasetMetadata.initd.__doc__))

In [None]:
# get the inventory of files from the current saef digital object
inventory_df = saefdo.get_files()

# get row of the mets file in the inventory
mets = inventory_df.loc[inventory_df['file_format'] == 'Extensible Markup Language']

# get mets file metadata values
index = list(mets.index)[0]
# get the record id
record_id = mets.at[index,'mms_id']
# get object_osn
object_osn = mets.at[index,'object_osn']
# get object_title
object_title = mets.at[index,'object_title']
# get object_tags
object_tags = mets.at[index,'object_tags']
# get the urn
object_urn = mets.at[index,'object_delivery_urn']

# get the config options
options = config.get_options()

# get values for digital object options
directory = options.get('digital_object').get('digital_object_relationships_directory')
pds_file = options.get('digital_object').get('digital_object_pds_relationships')
msft_file = options.get('digital_object').get('digital_object_msft_relationships')
ocr_file = options.get('digital_object').get('digital_object_ocr_relationships')

# set values on the saef metadata dictionary
md = {}
md['pds_filename'] = directory + '/' + object_osn + '_' + pds_file
md['msft_filename'] = directory + '/' + object_osn + '_' + msft_file
md['ocr_filename'] = directory + '/' + object_osn + '_' + ocr_file
md['title'] = object_osn
md['author_name'] = options.get('dataset').get('dataset_author')
md['author_affiliation'] = options.get('dataset').get('dataset_author_affiliation')
md['contact_name'] = options.get('dataset').get('dataset_contact_name')
md['contact_affiliation'] = options.get('dataset').get('dataset_contact_affiliation')
md['contact_email'] = options.get('dataset').get('dataset_contact_email')
md['subject'] = options.get('dataset').get('dataset_subject')
md['description'] = object_title
md['urn'] = object_urn
md['url'] = 'https://nrs.harvard.edu/' + object_urn
md['record_id'] = record_id
md['object_tags'] = object_tags
md['dataverse_collection_url'] = options.get('dataverse').get('dataverse_collection_url')
md['dataverse_installation_url'] = options.get('dataverse').get('dataverse_installation_url')
md['dataverse_api_logfile'] = options.get('dataverse').get('dataverse_api_logfile')

# create the saef dataset metadata instance
saefdmd = saef.SAEFDatasetMetadata()

# initialize metadata from a dictionary
print ('SAEFDatasetMetadata::from_dict: {}'.format(saefdmd.from_dict(md)))
pprint.pprint(saefdmd.metadata)

### Test `SAEFDataset`

`SAEFDataset` documentation

In [None]:
print('SAEFDataset:: {}'.format(saef.SAEFDataset.__doc__))

Initialize `pyDataverse` API
- The`pyDataverse` API adapter is use to push `SAEF` datasets to the dataverse installation defined in `g_project_config`

In [None]:
# import pyDataverse packages
from pyDataverse.api import NativeApi

# create pyDataverse API adapter
dataverse_collection_url = options.get('dataverse').get('dataverse_collection_url')
dataverse_installation_url = options.get('dataverse').get('dataverse_installation_url')

# use the locally set dataverse api key
api = NativeApi(dataverse_installation_url, g_demo_dataverse_api_key)
print('{}'.format(api))

Create a `SAEFDataset`

In [None]:
# create a saef dataset instance
dataset = saef.SAEFDataset()
print ('SAEFDataset:: {}'.format(dataset))

1. Initialize the `SAEFDataset` instance

In [None]:
print('SAEFDataset::initialize {}'.format(saef.SAEFDataset.initialize.__doc__))

In [None]:
status = False
try:
    # initialize dataset with a saef digital object instance
    status = dataset.initialize(saefdo, config)
except:
    print ('SAEFDataset::initialize: {}'.format(status))
    print('Error: Failed to initialize the dataset')
else:
    print ('SAEFDataset::initialize: {}'.format(status))
    print('Success: Dataset was initialized: {}'.format(status))

2. Create the actual Dataverse dataset on the installation: demo.dataverse.org.
- **NB**: Remember to turn off the SAEF metadata block in on demo.dataverse.org/saef before creating the dataset. Otherwise, the API call will fail.

In [None]:
status = dataset.create(api)
print ('SAEFDataset::create: {}'.format(status))

3. Upload the dataset's files using the `direct_upload` method

In [None]:
status = dataset.direct_upload_datafiles(api)
print ('SAEFDataset::direct_upload_datafiles: {}'.format(status))

4. Upload the dataset's relationship files using the `direct_upload` method

In [None]:
status = dataset.direct_upload_relationships(api)
print ('SAEFDataset::direct_upload_relationships: {}'.format(status))

5. Apply SAEF custom metadata to the dataset
- Note: First, make sure to turn on the SAEF custom metadata block on the demo dataverse installation
- Note: 2023/01/03: Currently, the demo installation appears to be in a strange state where two conflicting versions of the SAEF custom metadata block appear to be active. That situation needs to be corrected before datasets can be updated with the SAEF custom metadata.

In [None]:
# get the saef custom metadata block
md = dataset.get_dataset_metadata()
saef_md = md.get('dataset').get('customSAEF')
pprint.pprint(saef_md)

# get the dataset pid
dataset_doi = dataset.get_dataset_pid()

# set up the request
import requests
# get the base url
base_url = api.base_url
# get the api token
api_token = api.api_token
# create the headers
headers = {'X-Dataverse-key': api_token, 'Content-Type' : 'application/json'}

# create the request url
request_url = '{}/api/datasets/:persistentId/editMetadata/?persistentId={}&replace=true'.format(base_url, dataset_doi)

# call the requests library using the request url
response = requests.put(request_url, headers=headers, data=saef_md)
pprint.pprint(response)
status = response.json().get('status')
print('SAEFDataset::update dataset metadata: {}'.format(status))

### Clean up `demo.dataverse.org` collection
Delete the dataset from demo.dataverse.org/saef

In [None]:
# get the dataset pid
pid = dataset.get_dataset_pid()
print('SAEFDataset: pid = {}'.format(pid))
# destroy the dataset
response = api.destroy_dataset(pid, is_pid=True, auth=True)
status = response.json().get('status')
print('SAEFDataset::destroy dataset: {}'.format(status))

**End document.**