# Environment setup

In [1]:
from openai import OpenAI
from anthropic import Anthropic
import os
import re
import pandas as pd
from dotenv import load_dotenv
import requests

In [2]:
import subprocess
import base64
# from IPython.display import display, Image
from PIL import Image

In [3]:
# Parent directory of the current directory
path = os.path.dirname(os.getcwd()) 

# Paths to the folders of example images and transcriptions
image_folder = path+'/data/Archives_LLN_Nivelles_I_1921_REG 5193'
text_folder = path+'/data/transcriptions'

In [4]:
load_dotenv()

True

In [5]:
openai_API_KEY = os.getenv("OPENAI_API_KEY")
openai_client = OpenAI(api_key=openai_API_KEY)

In [6]:
anthropic_API_KEY = os.getenv("ANTHROPIC_API_KEY")
anthropic_client = Anthropic(api_key=anthropic_API_KEY)
MODEL_NAME = "claude-3-5-sonnet-20240620"

# With Examples

In [7]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [8]:

#===========================================================================
# LLM CALLS
#===========================================================================

def call(prompt, max_tokens=5000, base64_image=None, message=None, system=None):
    if("claude" in model):
        res =  callAnthropic(prompt, max_tokens=max_tokens, base64_image=base64_image, message=message, system=system)
    else:
        res =  callOpenAI(prompt, max_tokens=max_tokens, base64_image=base64_image, message=message)
    return res


def callAnthropic(prompt, max_tokens=5000, base64_image=None, message=None, system=None):
    client = Anthropic(api_key= anthropic_API_KEY)  
    try:
        if(message==None):
            if(base64_image):
                response = client.messages.create(
                    model= model,
                    max_tokens=max_tokens,
                    system = system,
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "image",
                                    "source": {
                                        "type": "base64",
                                        "media_type": "image/jpeg",
                                        "data": base64_image,
                                    },
                                },
                                {
                                    "type": "text",
                                    "text": prompt,
                                }
                            ],
                        }
                    ],
                    temperature=0,
                )
            else:
                response = client.messages.create(
                model=model,
                max_tokens=max_tokens,
                #system = system,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": prompt,
                            }
                        ],
                    }
                ],
                temperature=0,
            )
        else:
            response = client.messages.create(
                model= model,
                max_tokens=max_tokens,
                #system = system,
                messages=message,
                temperature=0,
            )
        
        return response.to_dict()["content"][0]["text"]
    except Exception as e:
        print(f"[ERROR] callAnthropic failed! {e}")
        return None


def callOpenAI(prompt, max_tokens=5000, base64_image=None, message=None):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_API_KEY}"
    } 
    model_vision = "gpt-4o"
    if(base64_image):  
        if(message==None):
            payload = {
                "model": model_vision, # only gpt-4o can handle images
                "messages": [
                {
                    "role": "user",
                    "content": [
                    {
                        "type": "text",
                        "text": prompt
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                    ]
                }
                ],
                "max_tokens": max_tokens,
                "temperature": 0
            }
        else:
            payload = {
                "model": model_vision, # only gpt-4o can handle images
                "messages": message,
                "max_tokens": max_tokens,
                "temperature": 0
            }
    else:
        payload = {
            "model":  model,
            "messages": [
            {
                "role": "user",
                "content": [
                {
                    "type": "text",
                    "text": prompt
                }
                ]
            }
            ],
            "max_tokens": max_tokens,
            "temperature": 0
        }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    try:
        return response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        print(f"[ERROR] callOpenAI failed! {e}")
        print(response.json()["error"]["message"])
    #return response

In [9]:
from io import BytesIO

def xlsx_to_string(filepath):
    df = pd.read_excel(filepath)
    
    # Drop rows that don't have any information in any columns
    df = df.dropna(how='all')
    
    # Fill NaN values with a whitespace
    df = df.fillna(" ")
    
    string = df.to_string(index=False, header=False)
    string = re.sub(' +', ' ', string)  # Replace multiple spaces with a single space
    string = string.replace("\n", " \n")  # Ensure each new row starts on a new line
    return string


def resize_image(image_path):
    try:
        with Image.open(image_path) as img:
            # Calculate new dimensions
            new_width = img.width // 3
            new_height = img.height // 3
            img = img.resize((new_width, new_height), Image.LANCZOS)
            buffer = BytesIO()
            img.save(buffer, format="JPEG")
            return buffer.getvalue()
    except Exception as e:
        print(f"[ERROR] Resizing image: {e},  Path: {image_path}")
        return None

        
def exampleShot(image_path, NbExamples=1, model = "claude"):
        # example
        example_xlsx = path + "/data/transcriptions/transcription_ex" + str(2) + ".xlsx"
        example_text_1 = xlsx_to_string(example_xlsx)
        example_image_1 = path + "/data/Archives_LLN_Nivelles_I_1921_REG 5193/example2.jpeg"
        
        if(NbExamples==2):
            # example
            example_xlsx2 = path + "/data/transcriptions/transcription_ex" + str(3) + ".xlsx"
            example_text_2 = xlsx_to_string(example_xlsx2)
            example_image_2 = path + "/data/Archives_LLN_Nivelles_I_1921_REG 5193/example3.jpeg"
            example_text = [example_text_1, example_text_2]
        
        if model == "claude":
            resized_image = resize_image(image_path)
            base64_image = base64.b64encode(resized_image).decode('utf-8')
            image_1 = resize_image(example_image_1)
            image_1 = base64.b64encode(image_1).decode('utf-8')
            if(NbExamples==2):
                image_2 = resize_image(example_image_2)
                image_2 = base64.b64encode(image_2).decode('utf-8')
        else:
            base64_image = encode_image(image_path)
            image_1 = encode_image(example_image_1)
            if(NbExamples==2):
                image_2 = encode_image(example_image_2)
        
        if model == "claude":
            if(NbExamples==1):
                message = [
                    {
                        "role": "user",
                        "content": [ 
                        {"type": "image", 
                            "source": {
                                "type": "base64", 
                                "media_type": "image/jpeg", 
                                "data": image_1}
                        },
                        {
                            "type": "text",
                            "text": example_text_1,
                        },
                        {
                            "type": "text",
                            "text": f"""
                            The ```plaintext block is the example transcription of the example image you saw:

                            Transcription:
                            ```plaintext
                            {example_text_1}
                            ```
                            Compare what you read initially and the solution key in ```plaintext block. Recreate the content of the table in this image. Only that, no other information from you.

                            """
                        },
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/jpeg",
                                "data": base64_image,
                            },
                        }
                        ]
                    }
                ]
            
            else:
                message = [
                    {
                        "role": "user",
                        "content": [ 
    
                        {"type": "image", 
                            "source": {
                                "type": "base64", 
                                "media_type": "image/jpeg", 
                                "data": image_1}},
                        {
                            "type": "text",
                            "text": example_text_1,
                        },
                        {"type": "image", 
                            "source": {
                                "type": "base64", 
                                "media_type": "image/jpeg", 
                                "data": image_2}},
                        {
                            "type": "text",
                            "text": example_text_2,
                        },
                        {
                            "type": "text",
                            "text": f"""
                            The ```plaintext block is the example transcription of the example image you saw:

                            Transcription:
                            ```plaintext
                            {example_text}
                            ```
                            Compare what you read initially and the solution key in ```plaintext block. Recreate the content of the table in this image. Only that, no other information from you.

                            """
                        },
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/jpeg",
                                "data": base64_image,
                            },
                        }
                        ]
                    }
                ]
            
            system_prompt =  "You are a helpful assistant who can read old handwriting with a background in history, and you are going to recreate a scanned déclaration de succession from Belgium in a txt format."
            
        else:
            if(NbExamples==1):
                message = [
                    {
                        "role": "system", 
                        "content": "You are a helpful assistant who can read old handwriting with a background in history, and you are going to recreate a scanned déclaration de succession from Belgium in a txt format."
                    },
                    {
                        "role": "user",
                        "content": [ 
                        {
                            "type": "image_url",
                            "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                            "url": f"data:image/jpeg;base64,{image_1}"
                            }
                        },
                        {
                            "type": "text",
                            "text": f"""
                            The ```plaintext block is the example transcription of the example image you saw:

                            Transcription:
                            ```plaintext
                            {example_text_1}
                            ```
                            Compare what you read initially and the solution key in ```plaintext block. Recreate the content of the table in this image. Only that, no other information from you.
                            
                            Even if it is hard to read the texts from the image, return as much as you can. You must read something. Do not return an apologetic message.
                            """
                        },
                        ]
                    }
                ]
            
            else:
                message = [
                    {
                        "role": "system", 
                        "content": "You are a helpful assistant who can read old handwriting with a background in history, and you are going to recreate a scanned déclaration de succession from Belgium in a txt format."
                    },
                    {
                        "role": "user",
                        "content": [ 
                        {
                            "type": "image_url",
                            "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                            "url": f"data:image/jpeg;base64,{image_1}"
                            }
                        },
                        {
                            "type": "text",
                            "text": example_text_2
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                            "url": f"data:image/jpeg;base64,{image_2}"
                            }
                        },
                        {
                            "type": "text",
                            "text": f"""
                            The ```plaintext block is the example transcription of the example image you saw:

                            Transcription:
                            ```plaintext
                            {example_text}
                            ```
                            Compare what you read initially and the solution key in ```plaintext block. Recreate the content of the table in this image. Only that, no other information from you.
                            
                            Even if it is hard to read the texts from the image, return as much as you can. You must read something. Do not return an apologetic message.
                            """
                        },
                        ]
                    }
                ]
            system_prompt = None
        return call(prompt="", max_tokens=3000, base64_image=base64_image, message=message, system=system_prompt)
        

In [10]:
def callPostProcessing(max_tokens=800, prompt_parameter = None):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_API_KEY}"
    } 
    payload = {
        "model": "gpt-4o",
        "messages": [
        {
            "role": "user",
            "content": [
            {
                "type": "text",
                "text": f"""This is an output from you. Clean it such that we have no separators and no comment from you: {prompt_parameter}
                """
            }
            ]
        }
        ],
        "max_tokens": max_tokens,
        "temperature": 0
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    try:
        return response.json()["choices"][0]["message"]["content"]
    except:
        print(response.json()["error"]["message"])

# use this when OpenAI credits are exhausted
def callPostProcessing_anthropic(max_tokens=5000, prompt_parameter = None):
    response = anthropic_client.messages.create(
        model=MODEL_NAME,
        max_tokens=max_tokens,
        system = "You are a helpful assistant who can read old handwriting with a background in history, and you are going to recreate a scanned déclaration de succession from Belgium in a txt format.",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"""This is an output from you. Clean it such that we have no separators and no comment from you: {prompt_parameter}
                """
                    }
                ],
            }
        ],
        temperature=0,
    )
    return response.to_dict()["content"][0]["text"]

In [24]:
import time
import json
import os


progress_file = 'gpt_two_example_whole_output_progress.json'
final_output_file = 'gpt_two_example_whole_output_final.json'

model = "gpt-4o"

# Load progress if available
try:
    with open(progress_file, 'r') as file:
        gpt_two_example_whole_output = json.load(file)
except FileNotFoundError:
    gpt_two_example_whole_output = {}

for image_path in os.listdir(image_folder):
    image_path_new = os.path.join(image_folder, image_path)
    image_id = os.path.splitext(image_path)[0]

    # Skip already processed images
    if image_id in gpt_two_example_whole_output:
        print(f"Skipping {image_id}, already processed.")
        continue

    if image_path.endswith(".jpeg"):
        start_time = time.time()
        print(f'------- Start processing file {image_id} -------')

        try:
            output = exampleShot(image_path=image_path_new, NbExamples=2, model="gpt-4o")
            output_cleaned = callPostProcessing(prompt_parameter=output)

            # Save the result
            gpt_two_example_whole_output[image_id] = output_cleaned

            # Update progress file after each processed image
            with open(progress_file, 'w') as file:
                json.dump(gpt_two_example_whole_output, file)

            print(f"Finished processing {image_id} in {time.time() - start_time:.2f} seconds.")

        except Exception as e:
            print(f"Error processing {image_id}: {e}")

    else:
        print(f"Skipping {image_path_new}, does not end with '.jpeg'.")

# Save the final output
with open(final_output_file, 'w') as file:
    json.dump(gpt_two_example_whole_output, file)

print("Processing complete.")


------- Start processing file example1 -------
Finished processing example1 in 141.41 seconds.
------- Start processing file example10 -------
Finished processing example10 in 63.03 seconds.
------- Start processing file example11 -------
Finished processing example11 in 53.10 seconds.
Skipping /Users/serenekim/Desktop/PhD/img-analysis_seorin_project/data/Archives_LLN_Nivelles_I_1921_REG 5193/.DS_Store, does not end with '.jpeg'.
------- Start processing file example7 -------
Finished processing example7 in 63.47 seconds.
------- Start processing file example16 -------
Finished processing example16 in 71.95 seconds.
------- Start processing file example20 -------
Finished processing example20 in 51.69 seconds.
------- Start processing file example17 -------
Finished processing example17 in 56.43 seconds.
------- Start processing file example6 -------
Finished processing example6 in 35.79 seconds.
------- Start processing file example14 -------
Finished processing example14 in 47.69 sec

In [28]:
claude_two_example_whole_output_df = pd.DataFrame(claude_two_example_whole_output.items(), columns=['file', 'text'])
claude_two_example_whole_output_df.to_csv('claude_two_example_whole_output.csv', index=False)

In [29]:
gpt_two_example_whole_output_df = pd.DataFrame(gpt_two_example_whole_output.items(), columns=['file', 'text'])
gpt_two_example_whole_output_df.to_csv('gpt_two_example_whole_output.csv', index=False)

In [30]:
from evaluate import load

cer_metric =load("cer")
bleu_metric = load("bleu")  

In [31]:
from glob import glob

gt_files = glob(os.path.join(path+'/data/transcriptions', '*.txt'))

In [40]:
gt = {}

for file in gt_files:
    with open(file, 'r') as f:
        name = os.path.basename(file).split('.')[0]
        name = name.split('ex')[1]
        gt[name] = f.read()
        

In [41]:
gt = pd.DataFrame(gt.items(), columns=['file', 'text'])
gt['text'] = gt['text'].replace(['\n', '\t'], ' ', regex=True)
gt

Unnamed: 0,file,text
0,17,N' d'ordre Date du dépôt des déclarations Dés...
1,16,N' d'ordre Date du dépôt des déclarations Dés...
2,14,N' d'ordre Date du dépôt des déclarations Dés...
3,15,N' d'ordre Date du dépôt des déclarations Dés...
4,11,N' d'ordre Date du dépôt des déclarations Dés...
5,10,N' d'ordre Date du dépôt des déclarations Dés...
6,12,N' d'ordre Date du dépôt des déclarations Dés...
7,13,N' d'ordre Date du dépôt des déclarations Dés...
8,8,N' d'ordre Date du dépôt des déclarations Dé...
9,9,N' d'ordre Date du dépôt des déclarations Dés...
