# Script to generate annotations from text and IIIF image URLs


In [None]:
import pandas as pd
import json
import requests

annotations_base_url = 'https://iiif-manifest.library.vanderbilt.edu/gallery/'
manifest_filename = 'album_du_siege'
base_path = '/Users/baskausj/Downloads/'
data_path = '/Users/baskausj/github/vandycite/gallery_buchanan/image_analysis/'
source_image_dataframe = pd.read_csv(data_path + 'combined_images.csv', dtype=str)

# Load the annotations CSV file
# NOTE: Make sure that the order of the canvas IDs in the CSV file matches the order of the canvases in the manifest!
annotations = pd.read_csv(manifest_filename + '_annotations.csv', dtype=str)
#print(annotations.head())

# Add a column to the dataframe with the xywh string by splitting by slash and extracting 5th piece
annotations['xywh'] = annotations['url'].str.split('/').str[6]

# Get a list of the unique canvas IDs
canvas_ids = annotations['canvas_id'].unique()

# Loop through each canvas and build an annotations list for it
for canvas_id in canvas_ids:
    # NOTE: Using the unique UUID to identify the particular annotation file only works when the canvas ID is constructed from a UUID.
    # This is the case for manifests that were built using the Bodleian IIIF Manifest Builder.
    # If some other method of identifying the canvases is used, the next line of code will need to be modified.

    # Extract the canvas UUID from the canvas ID
    canvas_uuid = canvas_id.split('/')[-1]
    #print(canvas_uuid)

    # Loop through each annotation (row) and add it to the resources list if it matches the canvas ID
    resources = []
    for index, row in annotations.iterrows():
        if row['canvas_id'] == canvas_id:
            # Build the annotation.
            on_value = {
                '@type': 'oa:SpecificResource',
                'full': row['canvas_id'],
                'selector': {
                    'type': 'oa:FragmentSelector',
                    'value': 'xywh=' + row['xywh']
                },
                'within': {
                    '@id': row['manifest_id'],
                    '@type': 'sc:Manifest'
                }
            }
            resource_value = {
                '@type': 'dctypes:Text',
                'format': 'text/plain',
                'chars': row['string']
            }

            annotation = {
                '@context': 'http://iiif.io/api/presentation/2/context.json',
                '@id': row['canvas_id'] + '/annotation/' + str(index),
                '@type': 'oa:Annotation',
                'motivation': [
                    'oa:commenting'
                ],
                'on': on_value,
                'resource': [
                    resource_value
                ]
            }
            resources.append(annotation)

    # Build the annotation object for that canvas.
    annotations_object = {
        "@context": "http://www.shared-canvas.org/ns/context.json",
        "@id": annotations_base_url + 'annotations/' + manifest_filename + '_' + canvas_uuid + '.json',
        "@type": "sc:AnnotationList",
        "resources": resources
    }

    # Write the annotations to a JSON file.
    with open(base_path + 'annotations/' + manifest_filename + '_' + canvas_uuid + '.json', 'w') as outfile:
        output_text = json.dumps(annotations_object, indent=2)
        outfile.write(output_text)

print('done')


# Add the links from the manifest to the annotation URLs

In [None]:
# Look up the manifest URL for the image in the source image dataframe.
manifest_url = 'https://baskaufs.github.io/iiif/kim/album_du_siege.json'

# Get the manifest JSON.
manifest_response = requests.get(manifest_url)
manifest_json = manifest_response.json()

# Creat the manifest links for each canvas.
# Get the index number and canvas ID for each canvas.
# NOTE: the canvas indexing used here works only if the canvases were listed in the data CSV in the same order as they are listed in the manifest.
for index, canvas_id in enumerate(canvas_ids):

    # Extract the canvas UUID from the canvas ID (see note above)
    canvas_uuid = canvas_id.split('/')[-1]

    # Create otherContent dictionary.
    other_content = [
        {
        '@id': annotations_base_url + 'annotations/' + manifest_filename + '_' + canvas_uuid + '.json',
        '@type': 'sc:AnnotationList'
        }
    ]

    # Add the otherContent dictionary to the manifest.
    manifest_json['sequences'][0]['canvases'][index]['otherContent'] = other_content

# Write the manifest to a JSON file.
with open(base_path + 'annotations/' + manifest_filename + '.json', 'w') as outfile:
    text = json.dumps(manifest_json, indent=4)
    outfile.write(text)

print('done')
