# Uploading IIIF images to Transkribus based on IIIF manifests

## Get values from parsed issue description

* Set IIIF manifests to upload
* Set target collection (by ID)

In [None]:
import json
import re

with open('./issue-parser-result.json') as issue_json:
    issue_desc = json.load(issue_json)

to_process = issue_desc['iiif-manifests'].splitlines() [1:-1]
collectionId = re.search(r"\((\w+)\)", issue_desc['target-collection']).group(0) [1:-1]

#print(to_process)
#print(collectionId)

## Setup

In [None]:
!pip install lxml_html_clean
!pip install lxml[html_clean]
!pip install requests-toolbelt

import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder
import os
from IPython.core.display import HTML
import time
import random
from lxml import etree
import json

## Login

In [None]:
import os
import json
if secretsPath:
    with open(secretsPath, 'r') as secretsFile:
        secrets = json.loads(secretsFile.read())
        for (k, v) in secrets.items():
            os.environ[k] = v

creds = json.loads(os.environ["TRANSKRIBUS_CREDENTIALS"])

s = requests.Session()
s.post('https://transkribus.eu/TrpServer/rest/auth/login', data=creds)

## Initialisation

In [None]:
def iiif_manifest(url):
    r = requests.get(url)
    return r.json()

def get_pages(manifest):
    pagesDict = dict()
    for page in manifest['items']:
      try:
        pagesDict[page['label']['en'][0]] = page['items'][0]['items'][0]['body']['id']
      except KeyError:
        pagesDict = dict()
    return pagesDict

def download_pages(pages, wait=1):
    pageDict = dict()
    for page in enumerate(sorted(pages)):
        try:
            filename = page[1]
            r = requests.get(pages[filename].replace('info.json', 'full/max/0/default.jpg'), stream=True)
            # NB: the extension is required, otherwise no uploadId issued
            pageDict[filename+".jpg"] = r.content
            time.sleep(wait)
        except:
            print("exception")
            continue
    return pageDict

## Processing uploads

In [None]:
%%capture cap

print("# Workflow finished")
print(":sparkles: Here is the result of the workflow:")
print("```")

skipped = []
for processing in to_process:
    fail = False
    print("Processing", processing)
    manifest = iiif_manifest(processing)
    pages = get_pages(manifest)
    files = download_pages(pages)
    #print("pages:")
    #print(pages)
    #print("files:")
    #print(files)

    pages_metadata = [{'fileName': val, 'pageNr': idx+1} for idx,val in enumerate(sorted(files))]

    #print("pages_metadata:")
    #print(pages_metadata)

    uploadObj = {
        "md": {
            "title": processing.replace('https://iiif.annemarie-schwarzenbach.ch/presentation/', ''),
            "externalId": processing.replace('https://iiif.annemarie-schwarzenbach.ch/presentation/', '').replace('.json','')
        },
        "pageList": {"pages": pages_metadata}
    }
    print("- with upload object:", uploadObj)

    headers = {'Content-type': 'application/json'}
    try:
        cont = s.post('https://transkribus.eu/TrpServer/rest/uploads?collId='+collectionId, json=uploadObj, headers=headers)
        # parse and get upload ID
        response = etree.fromstring(cont.content)
        uploadId = response.xpath('//uploadId/text()')[0]
        print('- successfully uploaded metadata, got id', uploadId)
        print('- transmitting file(s)')
    except:
        print("-- failed to get upload ID, skipping", processing)
        skipped.append(processing)
        continue

    # loop through files
    for key in sorted(files):
        
        mp_encoder = MultipartEncoder(
        fields={
            'img': (key, files[key], 'application/octet-stream')
            }
        )

        try:
            cont = s.put('https://transkribus.eu/TrpServer/rest/uploads/' + uploadId, data=mp_encoder, headers={'Content-Type': mp_encoder.content_type})
            response = etree.fromstring(cont.content)
            jobId = response.xpath('//jobId/text()')[0]
            print("Job ID: ", jobId)
        except:
            print("-- ⚠️🛑 failed to upload", key, " 🛑⚠️")
            fail = True
            break
        time.sleep(random.randint(0,2))
    if fail == False:
        print("- done!")
        print("══════════════════════")
    else:
        skipped.append(processing)
        print("-- failed to upload file in ", processing, "skipping this manifest")

print("```")
print("Please check if the new addition to the collection looks good and close this issue. :sparkles:")

In [None]:
with open('./ipynb.txt', 'w') as f:
    f.write(cap.stdout)
cap()