In [None]:
#Load dotenv
from dotenv import load_dotenv
load_dotenv()


In [None]:
def extract_field_descriptions():
    field_descriptions = {}
    with open('gold_values.tsv', 'r') as file:
        # Skip header line
        next(file)
        for line in file:
            # Split by tab and take only first two columns
            parts = line.strip().split('\t')[:2]
            if len(parts) == 2:
                field_descriptions[parts[0]] = parts[1]
    return field_descriptions

# Create the dictionary
field_descriptions = extract_field_descriptions()
print(field_descriptions)

In [None]:
import json
import base64
from openai import OpenAI

client = OpenAI()

def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def build_prompt():
    return f'''Analyze the provided image. Extract the values exactly as they appear, and return them in the json format specified below. If the value is missing, set that element to the string "-".

    {json.dumps(field_descriptions)}
    '''

In [None]:
def analyze_image(image_path, prompt="", model="gpt-4-vision-preview"):
    # Encode the image
    base64_image = encode_image(image_path)
    
    # Get the response from the API
    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": prompt
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    },
                ],
            }
        ],
        temperature=0,
        response_format={ "type": "json_object" }
    )
    
    # Return the parsed JSON response
    return response.choices[0].message.content
    

In [None]:
scenarios = ["S1", "S2", "S3"]
image_variants = ["PDF.png", "neat_hd.jpg", "neat_sd.jpg", "sloppy_hd.jpg", "sloppy_sd.jpg"]
model = "gpt-4o-2024-08-06"
results = []


for scenario in scenarios:
    for variant in image_variants:
        image_path = f"{scenario}_{variant}"
        if "PDF" in variant:
            writing = "typed"
            image_quality = "hd"
        else:
            writing = "neat" if "neat" in variant else "sloppy"
            image_quality = "hd" if "hd" in variant else "sd"
        result = analyze_image(f"images/{scenario}_{variant}", build_prompt(), model)
        new_entry = {
            "_model": model,
            "_scenario": scenario,
            "_writing": writing,
            "_image_quality": image_quality,
        }
        new_entry.update(json.loads(result))
        print(new_entry)
        results.append(new_entry)
            


In [None]:
#Export results to tsv
import pandas as pd
df = pd.DataFrame(results)
df.to_csv("results.tsv", sep="\t", index=False)


