In [None]:
import requests
import os
import json
import re

# Function to extract IDs from JSON
def extract_ids_from_json(data):
    extracted_ids = []
    def search_dict(d):
        for key, value in d.items():
            if key == 'id' and isinstance(value, str):
                match = re.search(r'objects/([a-f0-9\-]+)/', value)
                if match:
                    extracted_ids.append(match.group(1))
            elif isinstance(value, dict):
                search_dict(value)
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        search_dict(item)
    search_dict(data)
    return extracted_ids

# Load the JSON file
with open('results.json', 'r') as f:
    data = json.load(f)

# Extract IDs
extracted_ids = extract_ids_from_json(data)

# Ensure the root directory exists
os.makedirs("my_results", exist_ok=True)

# Iterate over each extracted ID
for doc_id in extracted_ids:
    manifest_url = f"https://iiif.bodleian.ox.ac.uk/iiif/manifest/{doc_id}.json"
    response = requests.get(manifest_url)
    manifest = response.json()

    # Create a subdirectory inside "my_results"
    os.makedirs(f"my_results/{doc_id}", exist_ok=True)

    for i, canvas in enumerate(manifest["sequences"][0]["canvases"], start=1):
        image_info = canvas["images"][0]["resource"]["service"]["@id"]
        image_url = f"{image_info}/full/full/0/default.jpg"

        response = requests.get(image_url)
        # Save the image inside the subdirectory
        image_path = os.path.join(f"my_results/{doc_id}", f"page_{i}.jpg")
        with open(image_path, "wb") as file:
            file.write(response.content)

        print(f"Downloaded page {i} for document {doc_id}")

    print(f"Download completed for document {doc_id}")


Downloaded page 1 for document ea036f6f-27d7-44ec-8e01-5c3835f5304e
Downloaded page 2 for document ea036f6f-27d7-44ec-8e01-5c3835f5304e
Download completed for document ea036f6f-27d7-44ec-8e01-5c3835f5304e
Downloaded page 1 for document abe1931e-cdfc-4cc1-85c5-906b263e7699
Downloaded page 2 for document abe1931e-cdfc-4cc1-85c5-906b263e7699
Download completed for document abe1931e-cdfc-4cc1-85c5-906b263e7699
Downloaded page 1 for document 80c01ac4-14e0-4483-97f5-4455ef2bcc19
Downloaded page 2 for document 80c01ac4-14e0-4483-97f5-4455ef2bcc19
Download completed for document 80c01ac4-14e0-4483-97f5-4455ef2bcc19
Downloaded page 1 for document a7b93358-b0b6-4c40-bda2-61c05271cce7
Download completed for document a7b93358-b0b6-4c40-bda2-61c05271cce7
Downloaded page 1 for document bdc848c1-a88e-42de-904a-ef539e0e5077
Downloaded page 2 for document bdc848c1-a88e-42de-904a-ef539e0e5077
Download completed for document bdc848c1-a88e-42de-904a-ef539e0e5077
Downloaded page 1 for document c9a242ba-11b