# Create SAEF Inventories

## About
Script to create inventories of SAEF datafiles and datasets on Harvard Dataverse Repository
- **Created:** 2023/02/06
- **Last update:** 2023/02/06

## Globals
Define global variables.

In [None]:
g_saef_module_path = '../src'
# json collection filename
g_saef_json_collection_inventory = './saef_collection.json'
# dataset inventory
g_saef_dataset_inventory = './saef_hdv_dataset_inventory.csv'
# datafiles inventory
g_saef_datafiles_inventory = './saef_hdv_datafiles_inventory.csv'
# installation url
g_dataverse_installation_url = 'https://dataverse.harvard.edu'
# collection url
g_dataverse_collection_url = 'SAEF'
# dataverse.harvard.edu API key
g_dataverse_api_key = ''

Add local modules path to Jupyter system path

In [None]:
import sys
if g_saef_module_path not in sys.path:
    sys.path.append(g_saef_module_path)

## Modules

In [None]:
import saef
import collection
import pprint
import json

### Initialize pyDataverse API
- **Description:** Initialize the `pyDataverse` API adapter

In [None]:
# import pyDataverse packages
from pyDataverse.api import NativeApi

# create pyDataverse API adapter
api = NativeApi(g_dataverse_installation_url, g_dataverse_api_key)

print('{}'.format(api))

### Create and initialize the `SAEFCollection`

In [None]:
# print documentation
print('SAEFCollection::initialize {}'.format(collection.SAEFCollection.__doc__))
print('SAEFCollection::initialize {}'.format(collection.SAEFCollection.initialize.__doc__))

# create collection
saef_collection = collection.SAEFCollection()

# initialize the collection
status = saef_collection.initialize(api, g_dataverse_collection_url)
print('SAEFCollection::initialize: {}'.format(status))

### Save the SAEF collection contents

In [None]:
# print documentation
print('SAEFCollection::get_collection_contents {}'.format(collection.SAEFCollection.get_collection_contents.__doc__))

# create or open a file to write json contents
fp = open(g_saef_json_collection_inventory, 'w')
# get the json contents of the collection
contents = saef_collection.get_collection_contents()
# write contents to file
fp.write(json.dumps(contents))
# close and save file
fp.close()

### Get Dataset Inventory

In [None]:
# print documentation
print('SAEFCollection::create_dataset_inventory {}'.format(collection.SAEFCollection.create_dataset_inventory.__doc__))

# create dataset inventory
datasets = saef_collection.create_dataset_inventory()
# write dataset inventory to a csv file
datasets.to_csv(g_saef_dataset_inventory,header=True,index=False)

### Get SAEF Datafile Inventory

In [None]:
# print documentation
print('SAEFCollection::create_datafile_inventory {}'.format(collection.SAEFCollection.create_datafile_inventory.__doc__))

# create the datafiles inventory
datafiles = saef_collection.create_datafile_inventory()

# save the datafiles inventory to a csv file
datafiles.to_csv(g_saef_datafiles_inventory,header=True,index=False)

**End document.**