In [10]:
import base64
import anthropic
import os
import difflib
from PIL import Image
from io import BytesIO

# Configuration
image_folder = "IAM/IAMa_cropped"
transcription_folder = "transcriptions_IAM2_claude"
refinement_folder1 = os.path.join(transcription_folder, "refinement_folder1")
refinement_folder2 = os.path.join(transcription_folder, "refinement_folder2")
txt_folder = os.path.join(transcription_folder, "final_transcriptions")

client = anthropic.Anthropic(api_key='')
MODEL_NAME = "claude-3-5-sonnet-20240620"

# System prompt
system_prompt = """Please follow these guidelines:
1. Examine the image carefully and identify all handwritten text.
2. Transcribe ONLY the handwritten text. Ignore any printed or machine-generated text in the image.
3. Maintain the original structure of the handwritten text, including line breaks and paragraphs.
4. Do not attempt to correct spelling or grammar in the handwritten text. Transcribe it exactly as written.
Please begin your response directly with the transcribed text. Remember, your goal is to provide an accurate transcription of ONLY the handwritten portions of the text, preserving its original form as much as possible."""

def resize_image(image_path, max_size_bytes=5*1024*1024 - 1):
    with Image.open(image_path) as img:
        img_byte_arr = BytesIO()
        img.save(img_byte_arr, format='PNG')
        img_byte_arr = img_byte_arr.getvalue()
        
        if len(img_byte_arr) <= max_size_bytes:
            return img_byte_arr
        
        scale = 0.9
        while len(img_byte_arr) > max_size_bytes:
            new_size = tuple(int(dim * scale) for dim in img.size)
            resized_img = img.resize(new_size, Image.LANCZOS)
            
            img_byte_arr = BytesIO()
            resized_img.save(img_byte_arr, format='PNG', optimize=True)
            img_byte_arr = img_byte_arr.getvalue()
            
            scale *= 0.9
        
        if len(img_byte_arr) > max_size_bytes:
            raise ValueError(f"Unable to resize image below {max_size_bytes} bytes")
        
        return img_byte_arr

def process_image(client, image_path, txt_folder, refinement_folder1, refinement_folder2):
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    txt_file_path = os.path.join(txt_folder, f"{image_name}.txt")

    if os.path.exists(txt_file_path):
        print(f"File for {image_name} already exists in final_transcriptions. Skipping.")
        return

    try:
        binary_data = resize_image(image_path)
    except ValueError as e:
        print(f"Error processing {image_path}: {str(e)}")
        return

    base64_string = base64.b64encode(binary_data).decode('utf-8')

    message_list = [
        {
            "role": 'user',
            "content": [
                {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": base64_string}},
                {"type": "text", "text": "Please transcribe the handwritten text in this image accurately, respecting line breaks. Do not describe any fields or layout elements, focus solely on the handwritten content. DO NOT transcribe the field 'name:' and following signature, if there is one."}
            ]
        }
    ]

    initial_response = client.messages.create(
        model=MODEL_NAME,
        max_tokens=2048,
        system=system_prompt,
        messages=message_list,
        temperature=0
    )
    initial_transcription = initial_response.content[0].text
    print(f"Processing image: {image_name}")
    print("Initial transcription:")
    print(initial_transcription)
    print("\n" + "="*50 + "\n")

    initial_file_path = os.path.join(refinement_folder1, f"{image_name}_initial.txt")
    with open(initial_file_path, "w", encoding='utf-8') as f:
        f.write(initial_transcription)

    refined_transcription = initial_transcription
    refinement_prompts = [
        "Review the original image and your previous transcription. Focus on correcting any spelling errors, punctuation mistakes, or missed words. Ensure the transcription accurately reflects the handwritten text.",
        "Examine the structure of the transcription. Are paragraphs and line breaks correctly represented? Adjust the layout to match the original handwritten text more closely.",
        "Make a final pass over the transcription, comparing it closely with the original image. Make any last corrections or improvements to ensure the highest possible accuracy. Do not add any introduction or contextualization to the transcribed text.",
    ]

    for i, prompt in enumerate(refinement_prompts):
        refinement_response = client.messages.create(
            model=MODEL_NAME,
            max_tokens=2048,
            temperature=0,
            system=system_prompt,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": base64_string}},
                        {"type": "text", "text": "Please transcribe the handwritten text in this image accurately, respecting line breaks."}
                    ]
                },
                {"role": "assistant", "content": refined_transcription},
                {"role": "user", "content": prompt}
            ]
        )

        new_transcription = refinement_response.content[0].text
        
        diff = list(difflib.unified_diff(refined_transcription.splitlines(), new_transcription.splitlines(), lineterm=''))
        
        if diff:
            print(f"Refinement step {i+1} - Changes made:")
            for line in diff:
                print(line)
            refined_transcription = new_transcription
        else:
            print(f"Refinement step {i+1} - No changes made.")
        
        if i == 0:
            refinement_file_path = os.path.join(refinement_folder1, f"{image_name}.txt")
        elif i == 1:
            refinement_file_path = os.path.join(refinement_folder2, f"{image_name}.txt")
        else:
            refinement_file_path = os.path.join(txt_folder, f"{image_name}.txt")
        
        with open(refinement_file_path, "w", encoding='utf-8') as f:
            f.write(refined_transcription)
        
        print("\n" + "="*50 + "\n")

    print(f"Final refined text transcription saved to {refinement_file_path}")

def main():
    for folder in [transcription_folder, refinement_folder1, refinement_folder2, txt_folder]:
        os.makedirs(folder, exist_ok=True)

    image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]

    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        process_image(client, image_path, txt_folder, refinement_folder1, refinement_folder2)

if __name__ == "__main__":
    main()

Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\c04-156.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\c04-160.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\c06-083.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d01-024.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d01-056.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d01-060.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d03-112.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IAM3_claude/txt\d04-005.txt
Image is already under the size limit.
Transcription and HTML saved to transcriptions_IA