In [41]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob
import os

In [42]:
gt_data = pd.read_json(f"./images/configurations.json")
len(gt_data)

120

In [43]:
# text_image_0a5a0d82-477d-47e2-a504-861c5952fe98-gemini-output.md
gt_data["model-output-file"] = gt_data["image_name"].apply(
    lambda x: "./images/"
    + x.replace(".png", "")
    + "-claude-3-5-sonnet-20240620-output.md"
)

# Read the content of each file for the gemini output and make a new column, ignoring missing files
gt_data["model-output-raw"] = gt_data["model-output-file"].apply(
    lambda x: (open(x, "r").read() if os.path.exists(os.path.join(x)) else None)
)

In [44]:
gt_data

Unnamed: 0,image_name,depth,center,initial_size,reduction_factor,line_thickness,padding,squares,model-output-file,model-output-raw
0,nested_squares_depth_2_image_1_thickness_2.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,2,0.75,"[{'center': [-2.2395532434656262, -2.245365217...",./images/nested_squares_depth_2_image_1_thickn...,The image contains 2 squares in total. There i...
1,nested_squares_depth_2_image_1_thickness_3.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,3,0.75,"[{'center': [-2.2395532434656262, -2.245365217...",./images/nested_squares_depth_2_image_1_thickn...,The image contains a total of 2 squares.\n\nTh...
2,nested_squares_depth_2_image_1_thickness_4.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,4,0.75,"[{'center': [-2.2395532434656262, -2.245365217...",./images/nested_squares_depth_2_image_1_thickn...,The image contains 2 squares in total:\n\n1. A...
3,nested_squares_depth_2_image_2_thickness_2.png,2,"[4.522003363881829, 1.575293928741691]",12.158126,0.75,2,0.75,"[{'center': [4.522003363881829, 1.575293928741...",./images/nested_squares_depth_2_image_2_thickn...,The image contains 2 squares in total. There i...
4,nested_squares_depth_2_image_2_thickness_3.png,2,"[4.522003363881829, 1.575293928741691]",12.158126,0.75,3,0.75,"[{'center': [4.522003363881829, 1.575293928741...",./images/nested_squares_depth_2_image_2_thickn...,The image contains 2 squares in total. There i...
...,...,...,...,...,...,...,...,...,...,...
115,nested_squares_depth_5_image_9_thickness_3.png,5,"[4.67396043765117, -3.083874562085932]",10.089593,0.75,3,0.75,"[{'center': [4.67396043765117, -3.083874562085...",./images/nested_squares_depth_5_image_9_thickn...,The image contains a total of 5 squares nested...
116,nested_squares_depth_5_image_9_thickness_4.png,5,"[4.67396043765117, -3.083874562085932]",10.089593,0.75,4,0.75,"[{'center': [4.67396043765117, -3.083874562085...",./images/nested_squares_depth_5_image_9_thickn...,The image shows a series of nested squares. To...
117,nested_squares_depth_5_image_10_thickness_2.png,5,"[1.960921482573098, 0.529244414005689]",8.411051,0.75,2,0.75,"[{'center': [1.960921482573098, 0.529244414005...",./images/nested_squares_depth_5_image_10_thick...,The image contains a total of 4 squares. There...
118,nested_squares_depth_5_image_10_thickness_3.png,5,"[1.960921482573098, 0.529244414005689]",8.411051,0.75,3,0.75,"[{'center': [1.960921482573098, 0.529244414005...",./images/nested_squares_depth_5_image_10_thick...,The image contains a total of 5 squares:\n\n1....


In [45]:
# drop rows with missing gemini output
gt_data = gt_data.dropna(subset=["model-output-raw"])

In [46]:
len(gt_data)

120

In [47]:
import re


def extract_marked_text(text):
    # Map textual numbers to integer values
    number_mapping = {
        "zero": 0,
        "one": 1,
        "two": 2,
        "three": 3,
        "four": 4,
        "five": 5,
        "six": 6,
        "seven": 7,
        "eight": 8,
        "nine": 9,
        "ten": 10,
    }

    # Use regular expression to find all numbers in the text
    numbers = re.findall(r"\b\d+\b", text)
    if numbers:
        # Convert the first found number to integer
        return int(numbers[0])

    # If no digits found, try to find textual numbers
    words = text.split()
    for word in words:
        if word.lower() in number_mapping:
            return number_mapping[word.lower()]

    # If no number is found, return a default value indicating no number was found
    return "number_not_found"


gt_data["predicted"] = gt_data["model-output-raw"].apply(extract_marked_text)

In [48]:
gt_data["model-output-raw"]

0      The image contains 2 squares in total. There i...
1      The image contains a total of 2 squares.\n\nTh...
2      The image contains 2 squares in total:\n\n1. A...
3      The image contains 2 squares in total. There i...
4      The image contains 2 squares in total. There i...
                             ...                        
115    The image contains a total of 5 squares nested...
116    The image shows a series of nested squares. To...
117    The image contains a total of 4 squares. There...
118    The image contains a total of 5 squares:\n\n1....
119    The image contains a total of 5 squares:\n\n1....
Name: model-output-raw, Length: 120, dtype: object

In [49]:
gt_data["predicted"].value_counts()

predicted
4    40
2    30
3    30
5    15
1     5
Name: count, dtype: int64

In [50]:
cleaned_data = gt_data.copy()
# drop squares column
cleaned_data = cleaned_data.drop(columns=["squares"])

In [51]:
# Ensure both columns are of integer type before comparison
cleaned_data["depth"] = cleaned_data["depth"].astype(int)
cleaned_data["predicted"] = cleaned_data["predicted"].astype(int)

cleaned_data["is_correct"] = cleaned_data["depth"] == cleaned_data["predicted"]
# Calculate accuracy
accuracy = cleaned_data["is_correct"].mean()
print(f"Overall Accuracy: {accuracy * 100:.2f}%")

Overall Accuracy: 87.50%


In [52]:
len(cleaned_data)

120

In [53]:
# Assuming 'cleaned_data' DataFrame has columns 'line_thickness', 'num_intersections', and 'extracted_number'

# Convert line_thickness to an appropriate numeric type if necessary
cleaned_data["line_thickness"] = cleaned_data["line_thickness"].astype(int)

# Calculate accuracy for each thickness
accuracy_by_thickness = cleaned_data.groupby("line_thickness").apply(
    lambda df: (df["is_correct"]).mean()
)

accuracy_by_thickness = 100 * accuracy_by_thickness.round(2)
accuracy_by_thickness

  accuracy_by_thickness = cleaned_data.groupby("line_thickness").apply(


line_thickness
2    88.0
3    88.0
4    88.0
dtype: float64

In [54]:
# Convert line_thickness to an appropriate numeric type if necessary
cleaned_data["depth"] = cleaned_data["depth"].astype(int)

# Calculate accuracy for each thickness
accuracy_by_thickness = cleaned_data.groupby("depth").apply(
    lambda df: (df["is_correct"]).mean()
)

accuracy_by_thickness = 100 * accuracy_by_thickness.round(2)
accuracy_by_thickness

  accuracy_by_thickness = cleaned_data.groupby("depth").apply(


depth
2    100.0
3    100.0
4    100.0
5     50.0
dtype: float64

# Export

In [55]:
cleaned_data

Unnamed: 0,image_name,depth,center,initial_size,reduction_factor,line_thickness,padding,model-output-file,model-output-raw,predicted,is_correct
0,nested_squares_depth_2_image_1_thickness_2.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,2,0.75,./images/nested_squares_depth_2_image_1_thickn...,The image contains 2 squares in total. There i...,2,True
1,nested_squares_depth_2_image_1_thickness_3.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,3,0.75,./images/nested_squares_depth_2_image_1_thickn...,The image contains a total of 2 squares.\n\nTh...,2,True
2,nested_squares_depth_2_image_1_thickness_4.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,4,0.75,./images/nested_squares_depth_2_image_1_thickn...,The image contains 2 squares in total:\n\n1. A...,2,True
3,nested_squares_depth_2_image_2_thickness_2.png,2,"[4.522003363881829, 1.575293928741691]",12.158126,0.75,2,0.75,./images/nested_squares_depth_2_image_2_thickn...,The image contains 2 squares in total. There i...,2,True
4,nested_squares_depth_2_image_2_thickness_3.png,2,"[4.522003363881829, 1.575293928741691]",12.158126,0.75,3,0.75,./images/nested_squares_depth_2_image_2_thickn...,The image contains 2 squares in total. There i...,2,True
...,...,...,...,...,...,...,...,...,...,...,...
115,nested_squares_depth_5_image_9_thickness_3.png,5,"[4.67396043765117, -3.083874562085932]",10.089593,0.75,3,0.75,./images/nested_squares_depth_5_image_9_thickn...,The image contains a total of 5 squares nested...,5,True
116,nested_squares_depth_5_image_9_thickness_4.png,5,"[4.67396043765117, -3.083874562085932]",10.089593,0.75,4,0.75,./images/nested_squares_depth_5_image_9_thickn...,The image shows a series of nested squares. To...,1,False
117,nested_squares_depth_5_image_10_thickness_2.png,5,"[1.960921482573098, 0.529244414005689]",8.411051,0.75,2,0.75,./images/nested_squares_depth_5_image_10_thick...,The image contains a total of 4 squares. There...,4,False
118,nested_squares_depth_5_image_10_thickness_3.png,5,"[1.960921482573098, 0.529244414005689]",8.411051,0.75,3,0.75,./images/nested_squares_depth_5_image_10_thick...,The image contains a total of 5 squares:\n\n1....,5,True


In [56]:
cleaned_data.drop(["model-output-file"], axis=1, inplace=True)
cleaned_data.rename(columns={"model-output-raw": "model_output_raw"}, inplace=True)
cleaned_data.rename(columns={"predicted": "extracted_number"}, inplace=True)

In [57]:
cleaned_data["Model"] = ["Sonnet-3.5"] * len(cleaned_data)
cleaned_data.to_pickle("./data/sonnet-3.5.pkl")

In [58]:
cleaned_data

Unnamed: 0,image_name,depth,center,initial_size,reduction_factor,line_thickness,padding,model_output_raw,extracted_number,is_correct,Model
0,nested_squares_depth_2_image_1_thickness_2.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,2,0.75,The image contains 2 squares in total. There i...,2,True,Sonnet-3.5
1,nested_squares_depth_2_image_1_thickness_3.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,3,0.75,The image contains a total of 2 squares.\n\nTh...,2,True,Sonnet-3.5
2,nested_squares_depth_2_image_1_thickness_4.png,2,"[-2.2395532434656262, -2.245365217594155]",17.574157,0.75,4,0.75,The image contains 2 squares in total:\n\n1. A...,2,True,Sonnet-3.5
3,nested_squares_depth_2_image_2_thickness_2.png,2,"[4.522003363881829, 1.575293928741691]",12.158126,0.75,2,0.75,The image contains 2 squares in total. There i...,2,True,Sonnet-3.5
4,nested_squares_depth_2_image_2_thickness_3.png,2,"[4.522003363881829, 1.575293928741691]",12.158126,0.75,3,0.75,The image contains 2 squares in total. There i...,2,True,Sonnet-3.5
...,...,...,...,...,...,...,...,...,...,...,...
115,nested_squares_depth_5_image_9_thickness_3.png,5,"[4.67396043765117, -3.083874562085932]",10.089593,0.75,3,0.75,The image contains a total of 5 squares nested...,5,True,Sonnet-3.5
116,nested_squares_depth_5_image_9_thickness_4.png,5,"[4.67396043765117, -3.083874562085932]",10.089593,0.75,4,0.75,The image shows a series of nested squares. To...,1,False,Sonnet-3.5
117,nested_squares_depth_5_image_10_thickness_2.png,5,"[1.960921482573098, 0.529244414005689]",8.411051,0.75,2,0.75,The image contains a total of 4 squares. There...,4,False,Sonnet-3.5
118,nested_squares_depth_5_image_10_thickness_3.png,5,"[1.960921482573098, 0.529244414005689]",8.411051,0.75,3,0.75,The image contains a total of 5 squares:\n\n1....,5,True,Sonnet-3.5
