# Script to generate annotations from text and IIIF image URLs


In [None]:
import pandas as pd
import json
import requests

accession_number = '1979.0321P'
annotations_base_url = 'https://baskaufs.github.io/iiif/baskauf/'
base_path = '/Users/baskausj/github/iiif/baskauf/'
data_path = '/Users/baskausj/github/vandycite/gallery_buchanan/image_analysis/'
source_image_dataframe = pd.read_csv(data_path + 'combined_images.csv', dtype=str)

# Replace any spaces in the accession number with underscores
accession_number = accession_number.replace(' ', '_')

# Load the annotations CSV file
annotations = pd.read_csv('test_annotations.csv', dtype=str)
#annotations.head()

# Add a column to the dataframe with the xywh string by splitting by slash and extracting 5th piece
annotations['xywh'] = annotations['url'].str.split('/').str[6]

# Loop through each annotation (row) and build the resources element of the annotation
resources = []
for index, row in annotations.iterrows():
    # Build the annotation.
    on_value = {
        '@type': 'oa:SpecificResource',
        'full': 'https://iiif-manifest.library.vanderbilt.edu/gallery/' + accession_number.split('.')[0] + '/' + accession_number + '.json_1',
        'selector': {
            'type': 'oa:FragmentSelector',
            'value': 'xywh=' + row['xywh']
        },
        'within': {
            '@id': 'https://iiif-manifest.library.vanderbilt.edu/gallery/' + accession_number.split('.')[0] + '/' + accession_number + '.json',
            '@type': 'sc:Manifest'
        }
    }
    resource_value = {
        '@type': 'dctypes:Text',
        'format': 'text/plain',
        'chars': row['text']
    }

    annotation = {
        '@context': 'http://iiif.io/api/presentation/2/context.json',
        '@id': 'https://iiif-manifest.library.vanderbilt.edu/gallery/' + accession_number.split('.')[0] + '/' + accession_number + '/annotation/' + str(index),
        '@type': 'oa:Annotation',
        'motivation': [
            'oa:commenting'
        ],
        'on': on_value,
        'resource': [
            resource_value
        ]
    }
    resources.append(annotation)

annotations = {
    "@context": "http://www.shared-canvas.org/ns/context.json",
    "@id": annotations_base_url + 'annotations/' + accession_number.split('.')[0] + '/' + accession_number + "_annotations.json",
    "@type": "sc:AnnotationList",
    "resources": resources
}

# Write the annotations to a JSON file.
with open(base_path + 'annotations/' + accession_number.split('.')[0] + '/' + accession_number + '_annotations.json', 'w') as outfile:
    output_text = json.dumps(annotations, indent=2)
    outfile.write(output_text)

print('done')


# Add the link from the manifest to the annotation URL

In [None]:
# Look up the manifest URL for the image in the source image dataframe.
manifest_url = source_image_dataframe.loc[source_image_dataframe['accession_number'] == accession_number, 'iiif_manifest'].iloc[0]

# Get the manifest JSON.
manifest_response = requests.get(manifest_url)
manifest_json = manifest_response.json()

# Create otherContent dictionary.
other_content = [
    {
    '@id': annotations_base_url + 'annotations/' + accession_number.split('.')[0] + "/" + accession_number + "_annotations.json",
    '@type': 'sc:AnnotationList'
    }
]

# Add the otherContent dictionary to the manifest.
manifest_json['sequences'][0]['canvases'][0]['otherContent'] = other_content

# Write the manifest to a JSON file.
with open(base_path + 'annotations/' + accession_number + '.json', 'w') as outfile:
    text = json.dumps(manifest_json, indent=4)
    outfile.write(text)

print('done')
