In [10]:
import os
import base64
import anthropic
from PIL import Image
from io import BytesIO

# Resize the image if it's too large in base64 format
from PIL import Image
from io import BytesIO
import base64

# Resize image if it exceeds the size limit
def resize_base64_image(base64_string, max_size_mb=5):
    # Decode base64 string to an image
    image_data = base64.b64decode(base64_string)
    img = Image.open(BytesIO(image_data))

    # Calculate current size
    current_size = len(base64_string)
    max_size_bytes = max_size_mb * 1024 * 1024

    if current_size <= max_size_bytes:
        print("Image is already under the size limit.")
        return base64_string

    # Initialize variables for resizing
    scale_factor = 1.0
    resized_img = img

    while True:
        # Calculate the size after encoding the image
        buffered = BytesIO()
        resized_img.save(buffered, format=img.format, quality=85)  # Adjust quality here
        resized_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
        new_size = len(resized_base64)

        # Check if the new size is under the limit
        if new_size <= max_size_bytes:
            print(f"Resized image to {new_size / 1024 / 1024:.2f}MB")
            return resized_base64

        # Scale down the image
        scale_factor *= 0.9  # Reduce size by 10%
        new_width = int(img.width * scale_factor)
        new_height = int(img.height * scale_factor)
        resized_img = img.resize((new_width, new_height), Image.LANCZOS)

        # Stop if the image is too small
        if new_width < 1 or new_height < 1:
            print("Cannot resize image further without losing data.")
            break

    print(f"Final size after resizing attempt: {new_size / 1024 / 1024:.2f}MB")
    return resized_base64

# Save transcription in TXT format
def save_transcription_txt(transcription, txt_file_path):
    with open(txt_file_path, "w", encoding='utf-8') as txt_file:
        txt_file.write(transcription)

    print(f"Transcription and HTML saved to {txt_file_path}")

# Process image and generate transcription
def process_image(client, image_path, txt_folder):
    # Extract image name and set up TXT file path
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    txt_file_path = os.path.join(txt_folder, f"{image_name}.txt")

    if os.path.exists(txt_file_path):
        print(f"TXT file for {image_name} already exists. Skipping.")
        return

    # Read image and encode to base64
    with open(image_path, "rb") as image_file:
        base64_string = base64.b64encode(image_file.read()).decode('utf-8')

    # Resize the image if necessary
    resized_base64_string = resize_base64_image(base64_string, max_size_mb=5)

    # System and initial prompts
    system_prompt = """You are an AI assistant specialized in transcribing handwritten text from images. Please follow these guidelines:
1. Examine the image carefully and identify all handwritten text.
2. Transcribe ONLY the handwritten text. Ignore any printed or machine-generated text in the image.
3. Maintain the original structure of the handwritten text, including line breaks and paragraphs.
4. Do not attempt to correct spelling or grammar in the handwritten text. Transcribe it exactly as written.
5. Do not describe the image or its contents.
6. Do not introduce or contextualize the transcription.
Please begin your response directly with the transcribed text. Remember, your goal is to provide an accurate transcription of ONLY the handwritten portions of the text, preserving its original form as much as possible."""

    initial_prompt = """Please transcribe only the handwritten portions of the provided image, respecting line breaks."""

    # Send initial transcription request
    initial_response = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=2048,
        system=system_prompt,
        temperature=0,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": resized_base64_string}},
                    {"type": "text", "text": initial_prompt}
                ]
            }
        ]
    )

    # Get the initial transcription
    initial_transcription = initial_response.content[0].text



    # Save the transcription and HTML
    save_transcription_txt(initial_transcription, txt_file_path)

# Main function to process a batch of images
def main():
    client = anthropic.Anthropic(api_key='')
    
    image_folder = "IAM/IAMa_cropped"
    txt_folder = "transcriptions_IAM3_claude/txt"
    
    # Create the TXT folder if it doesn't exist
    os.makedirs(txt_folder, exist_ok=True)

    processed_images = 0

    # Process each image in the folder
    for image_file in os.listdir(image_folder):
        if image_file.endswith((".png", ".jpg", ".jpeg", ".bmp")):
            image_path = os.path.join(image_folder, image_file)
            process_image(client, image_path, txt_folder)
            processed_images += 1
            
            if processed_images >= 600:
                print("Processed 600 images. Stopping.")
                break

    print(f"Total images processed: {processed_images}")

if __name__ == "__main__":
    main()

Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\c04-156.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\c04-160.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\c06-083.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d01-024.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d01-056.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d01-060.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d03-112.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d04-005.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IA