In [None]:
import pandas as pd
import requests
from io import BytesIO

# URL to download the Excel file
url = 'https://docs.google.com/spreadsheets/d/1KQZOmXHrjXuQCttNGuAxFUr72SlXcBCYbs8c2Cy3fDE/export?format=xlsx&gid=175388937'

# Fetch the file
response = requests.get(url)

all_file_names = []

# Check if the request was successful
if response.status_code == 200:
    # Load the content into a pandas DataFrame
    df = pd.read_excel(BytesIO(response.content))
    
    # Define a function to check if the Letter_ID falls within the specified range
    def is_within_range(letter_id):
        try:
            # Convert to integer if possible
            numeric_id = int(letter_id)
            return 0 <= numeric_id <= 500
        except ValueError:
            return False

    # Apply the filtering function
    filtered_df = df[df['Letter_ID'].apply(is_within_range)]
    
    # Iterate over the filtered DataFrame and split values
    for i in filtered_df['I Tatti file name(s)']:
        if pd.notna(i):  # Check if the value is not NaN
            file_names = i.split(', ')
            all_file_names.extend(file_names)  # Add split values to the list

    print(all_file_names)

else:
    print(f"Failed to fetch the file: {response.status_code}")
    
# Count the number of file names
n = len(all_file_names)
print(n)


In [None]:
import requests
import os

def download_images(base_url, image_numbers, output_dir="BGdataset/images"):
    """
    Downloads images from the specified IIIF base URL, processes and saves them in the given directory.
    
    Parameters:
        base_url (str): The base URL for the IIIF image service.
        image_numbers (list of str): The list of image identifiers or names to download.
        output_dir (str): The directory where images will be saved.
    """
    # Create a directory to store the images if it doesn't already exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for image_number in image_numbers:
        # Define the image metadata URL
        metadata_url = f"{base_url}{image_number}.jpg/info.json"

        try:
            # Fetch the metadata
            response = requests.get(metadata_url)
            
            # Check if the response status code is 200 (OK), meaning the image exists
            if response.status_code == 200:
                # Define the base image URL for downloading
                image_url = f"{base_url}{image_number}.jpg/full/full/0/default.jpg"
                
                print(f"Fetching image from: {image_url}")
                
                img_response = requests.get(image_url)
                if img_response.status_code == 200:
                    with open(os.path.join(output_dir, f"{image_number}.jpg"), "wb") as f:
                        f.write(img_response.content)
                    print(f"Downloaded image: {image_number}")
                else:
                    print(f"Failed to download image: {image_number}")
            else:
                print(f"Metadata for image {image_number} not found. Skipping.")
                continue  # Skip to the next image

        except requests.exceptions.RequestException as e:
            # Handle potential network issues
            print(f"Error fetching image {image_number}: {e}")
            continue  # Continue to the next image on error

# Example usage
base_url = "https://iiif.itatti.harvard.edu/iiif/2/bellegreene-full!"
image_numbers = all_file_names 
download_images(base_url, image_numbers)
