# Deleting documents from a Transkribus collection

## Get values from parsed issue description

* Set IIIF manifests / file names / Transkribus IDs to delete
* Set target collection (by ID)

In [None]:
import json
import re

with open('./issue-parser-result.json') as issue_json:
    issue_desc = json.load(issue_json)

to_delete_iiif = issue_desc['iiif-manifests'].splitlines() [1:-1]
to_delete_iiif_filenames = [item.split('/')[-1] for item in to_delete_iiif]
to_delete_plain_filenames = issue_desc['file-names'].splitlines() [1:-1]
to_delete_transkribus = issue_desc['transkribus-ids'].splitlines() [1:-1]

collectionId = re.search(r"\((\w+)\)", issue_desc['target-collection']).group(0) [1:-1]

#print(collectionId)

## Setup

In [None]:
!pip install lxml_html_clean
!pip install lxml[html_clean]
!pip install requests-toolbelt

import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder
import os
from IPython.core.display import HTML
import time
import random
from lxml import etree
import json

## Login

In [None]:
import os
import json
if secretsPath:
    with open(secretsPath, 'r') as secretsFile:
        secrets = json.loads(secretsFile.read())
        for (k, v) in secrets.items():
            os.environ[k] = v

creds = json.loads(os.environ["TRANSKRIBUS_CREDENTIALS"])

s = requests.Session()
s.post('https://transkribus.eu/TrpServer/rest/auth/login', data=creds)

## Delete files

In [None]:
%%capture cap --no-stderr

import json

docs = s.get('https://transkribus.eu/TrpServer/rest/collections/'+ str(collectionId) +'/list')
docs_json = json.loads(docs.content)

print(f"Number of documents in collection " + str(collectionId) + " before deletion: " + str(len(docs_json)))

to_delete_filenames = to_delete_plain_filenames + to_delete_iiif_filenames

docIds_iiif_to_delete = []
for doc in docs_json:
  if 'title' in doc and any(filename in doc['title'] for filename in to_delete_filenames):
    docIds_iiif_to_delete.append(doc['docId'])

docIds_all_to_delete = docIds_iiif_to_delete + to_delete_transkribus
# this might not work for IIIF info (attempt to reduce over-eagerness of the action)
#docIds_all_to_delete = docIds_iiif_to_delete + to_delete_iiif_filenames
print(docIds_all_to_delete)

print(f"debug: docIds_iiif_to_delete " + str(docIds_iiif_to_delete) )
print(f"debug: to_delete_transkribus " + str(to_delete_transkribus) )
print(f"debug: to_delete_filenames " + str(to_delete_filenames) )
print(f"debug: to_delete_plain_filenames " + str(to_delete_plain_filenames) )
print(f"debug: to_delete_iiif_filenames " + str(to_delete_iiif_filenames) )


for docId in docIds_all_to_delete:
  response = s.delete(f'https://transkribus.eu/TrpServer/rest/collections/{collectionId}/{docId}')
  print(f"Deleted docId {docId} from collection {collectionId} with status code: {response.status_code}")

docs_after = s.get('https://transkribus.eu/TrpServer/rest/collections/'+ str(collectionId) +'/list')
docs_after_json = json.loads(docs_after.content)

print(f"Number of documents in collection " + str(collectionId) + " after deletion: " + str(len(docs_after_json)))


In [None]:
with open('./ipynb.txt', 'w') as f:
    f.write("# Workflow finished\n\n")
    f.write(":sparkles: Here is the result of the workflow:\n\n")
    f.write("```\n")
    f.write(cap.stdout)
    f.write("```\n\n")
    f.write("Feel free to verify the actual deletion in Transkribus and close this issue. :sparkles:")
cap()