# Initial setup

Import modules

In [None]:
import csv
import sys
import datetime

Set variables for this run

In [None]:
database = 'audubon'
vocabulary = 'ac'
namespace = 'terms'
date_issued = '2019-09-30'
local_offset_from_utc = '-05:00'
versions = database + '-versions'
modifications = 'mods.csv'

Define utility functions

In [None]:
def readCsv(filename):
    fileObject = open(filename, 'r', newline='', encoding='utf-8')
    readerObject = csv.reader(fileObject)
    array = []
    for row in readerObject:
        array.append(row)
    fileObject.close()
    return array

# returns a list with first item Boolean and second item the index
def findColumnWithHeader(header_row_list, header_label):
    found = False
    for column_number in range(0, len(header_row_list)):
        if header_row_list[column_number] == header_label:
            found = True
            found_column = column_number
    if found:
        return [True, found_column]
    else:
        return [False, 0]
    
def isoTime(offset):
    currentTime = datetime.datetime.now()
    return currentTime.strftime("%Y-%m-%dT%H:%M:%S") + offset

# Extract information from metadata files

Read in the files to be compared. Note: row 0 contains the headers.

In [None]:
term_metadata_filename = database + '/' + database + '.csv'
term_metadata = readCsv(term_metadata_filename)

modifications_metadata = readCsv(modifications)
print('Headers: ', modifications_metadata[0])

Find which column numbers in the modifications file and the metadata file hold the term local name

In [None]:
result = findColumnWithHeader(modifications_metadata[0], 'term_localName')
if result[0] == False:
    print('The modifications file does not have a term_localName column')
    sys.exit
else:
    mods_localname_column = result[1]

# don't error trap here because all existing files should have a local name column header
result = findColumnWithHeader(term_metadata[0], 'term_localName')
metadata_localname_column = result[1]

Create a list of the local names of terms to be added or modified

In [None]:
mods_term_localName = []
for term_number in range(1, len(modifications_metadata)):
    mods_term_localName.append(modifications_metadata[term_number][mods_localname_column])
print(mods_term_localName)

Find out which terms are new and which are modified old terms

In [None]:
new_terms = []
modified_terms = []
for test_term in mods_term_localName:
    found = False
    for term in term_metadata:
        if test_term == term[metadata_localname_column]:
            found = True
            modified_terms.append(test_term)
    if not found:
        new_terms.append(test_term)
print('New terms: ', new_terms)
print('Modified terms: ', modified_terms)

# Supersede old versions of the modified terms

Read in the term versions metadata file

In [None]:
term_versions_metadata_filename = versions + '/' + versions + '.csv'
term_versions_metadata = readCsv(term_versions_metadata_filename)
print(term_versions_metadata[0])

Find the status and term local name columns

In [None]:
result = findColumnWithHeader(term_versions_metadata[0], 'version_status')
status_column = result[1]

result = findColumnWithHeader(term_versions_metadata[0], 'term_localName')
term_local_name_column = result[1]

Go through each version and supersede any that match the local names of the modified terms

In [None]:
for term in modified_terms:
    for version_row in range(1, len(term_versions_metadata)):
        if term_versions_metadata[version_row][term_local_name_column] == term:
            print(version_row, term)
            term_versions_metadata[version_row][status_column] = 'superseded'
            # the 0th column always contains the time modified value
            term_versions_metadata[version_row][0] = isoTime(local_offset_from_utc)

# Create new versions of new and modified terms

Make sure that all columns in modified terms file are in the term versions file

In [None]:
for column in modifications_metadata[0]:
    result = findColumnWithHeader(term_versions_metadata[0], column)
    if result[0] == False:
        print('The modifications file is missing the ', column, ' column.')
        sys.exit

Create a new row in the term versions file for the added or modified terms

In [None]:
for row_number in range(1, len(modifications_metadata)):
    newVersion = []
    # create a column for every column in the term version file
    for column in term_versions_metadata[0]:
        # find the column in the modifications file that matches the version column and add its value
        result = findColumnWithHeader(modifications_metadata[0], column)
        if result[0] == True:
            newVersion.append(modifications_metadata[row_number][result[1]])
        else:
            newVersion.append('')
    newVersion[0] = isoTime(local_offset_from_utc)
    print(newVersion)