In [65]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob
import os

In [66]:
gt_data = pd.read_json(f"./images_second_prompt/configurations.json")

In [67]:
gt_data = (
    gt_data.T
)  # use indexx as a new column in the dataframe and called it image_path
gt_data = gt_data.reset_index()
gt_data = gt_data.rename(columns={"index": "image_path"})
gt_data

Unnamed: 0,image_path,num_points,num_intersections,line_colors,line_thickness
0,image_0_thickness_2.png,3,1,"[red, blue]",2
1,image_0_thickness_3.png,3,1,"[red, blue]",3
2,image_0_thickness_4.png,3,1,"[red, blue]",4
3,image_1_thickness_2.png,3,2,"[red, blue]",2
4,image_1_thickness_3.png,3,2,"[red, blue]",3
...,...,...,...,...,...
145,image_48_thickness_3.png,3,2,"[red, blue]",3
146,image_48_thickness_4.png,3,2,"[red, blue]",4
147,image_49_thickness_2.png,3,0,"[red, blue]",2
148,image_49_thickness_3.png,3,0,"[red, blue]",3


In [68]:
gt_data["model-output-file"] = gt_data["image_path"].apply(
    lambda x: "./images_second_prompt/"
    + x.replace(".png", "")
    + "-claude-3-sonnet-20240229-output.md"
)

gt_data["model-output-raw"] = gt_data["model-output-file"].apply(
    lambda x: (open(x, "r").read() if os.path.exists(os.path.join(x)) else None)
)

In [69]:
import re


def extract_marked_text(text):
    # Convert text to lowercase
    text = text.lower()

    if "there is a single crossing point" in text:
        return "1"

    if "never cross each other" in text:
        return "0"
    if "do not intersect" in text:
        return "0"
    if "not intersect" in text:
        return "0"
    if "no intersection" in text:
        return "0"

    # Regular expression to find numbers (digits or words)
    number_words = "zero|one|two|three|four|five|six|seven|eight|nine|ten|once|twice|three times|do not|not|no intersection"
    pattern = rf"\b({number_words}|\d+)\b"

    # Mapping from words to digits
    number_map = {
        "zero": "0",
        "one": "1",
        "two": "2",
        "three": "3",
        "four": "4",
        "five": "5",
        "six": "6",
        "seven": "7",
        "eight": "8",
        "nine": "9",
        "ten": "10",
        "once": "1",
        "twice": "2",
        "three times": "3",
        "do not": "0",
        "not": "0",
        "no intersection": "0",
    }

    # Search for the first occurrence of a number
    match = re.search(pattern, text)
    if match:
        # Map the word to its corresponding digit if it's a word
        return number_map.get(match.group(0), match.group(0))
    else:
        # Check for implied single intersection
        if "intersect once" in text or "intersect only once" in text:
            return "1"
        # Check for no intersections
        elif (
            "do not intersect" in text
            or "not intersect" in text
            or "no intersection" in text
        ):
            return "0"
        # Check for multiple intersections
        elif "intersect twice" in text:
            return "2"
        elif "intersect three times" in text:
            return "3"
        # Default case if no clear intersection information is found
        elif "once" in text:
            return "1"
        elif "twice" in text:
            return "2"
        elif "three times" in text:
            return "3"
        elif "do not" in text or "not" in text:
            return "0"
        else:
            return "number_not_found" + text


gt_data["predicted"] = gt_data["model-output-raw"].apply(extract_marked_text)

In [70]:
gt_data["model-output-raw"]

0      The blue and red lines intersect at one point ...
1      The blue and red lines intersect at one point ...
2      The blue and red lines intersect once in the i...
3      The blue and red lines intersect once in the i...
4      In the given image, the blue and red lines int...
                             ...                        
145    The blue and red lines intersect at one point ...
146    The blue line and the red line intersect at on...
147    The red line and the blue line segments do not...
148    The red line and the blue line in the image do...
149    The blue and red lines in the image do not int...
Name: model-output-raw, Length: 150, dtype: object

In [71]:
gt_data["predicted"].value_counts()

predicted
1    94
0    27
2    20
4     9
Name: count, dtype: int64

In [72]:
cleaned_data = gt_data.copy()

In [73]:
# Ensure both columns are of integer type before comparison
cleaned_data["num_intersections"] = cleaned_data["num_intersections"].astype(int)
cleaned_data["predicted"] = cleaned_data["predicted"].astype(int)

cleaned_data["is_correct"] = (
    cleaned_data["num_intersections"] == cleaned_data["predicted"]
)
# Calculate accuracy
accuracy = cleaned_data["is_correct"].mean()
print(f"Overall Accuracy: {accuracy * 100:.2f}%")

Overall Accuracy: 64.00%


In [74]:
cleaned_data["line_thickness"] = cleaned_data["line_thickness"].astype(int)

# Calculate accuracy for each thickness
accuracy_by_thickness = cleaned_data.groupby("line_thickness").apply(
    lambda df: (df["is_correct"]).mean()
)

accuracy_by_thickness = 100 * accuracy_by_thickness.round(2)
accuracy_by_thickness

  accuracy_by_thickness = cleaned_data.groupby("line_thickness").apply(


line_thickness
2    58.0
3    66.0
4    68.0
dtype: float64

In [75]:
print(accuracy_by_thickness.to_latex())

\begin{tabular}{lr}
\toprule
 & 0 \\
line_thickness &  \\
\midrule
2 & 58.000000 \\
3 & 66.000000 \\
4 & 68.000000 \\
\bottomrule
\end{tabular}



In [76]:
# # show full data in dataframe
# pd.set_option("display.max_rows", None)
# pd.set_option("display.max_columns", None)
# # show full content of each cell
# pd.set_option("display.max_colwidth", None)

# cleaned_data

# Export

In [77]:
cleaned_data

Unnamed: 0,image_path,num_points,num_intersections,line_colors,line_thickness,model-output-file,model-output-raw,predicted,is_correct
0,image_0_thickness_2.png,3,1,"[red, blue]",2,./images_second_prompt/image_0_thickness_2-cla...,The blue and red lines intersect at one point ...,1,True
1,image_0_thickness_3.png,3,1,"[red, blue]",3,./images_second_prompt/image_0_thickness_3-cla...,The blue and red lines intersect at one point ...,1,True
2,image_0_thickness_4.png,3,1,"[red, blue]",4,./images_second_prompt/image_0_thickness_4-cla...,The blue and red lines intersect once in the i...,1,True
3,image_1_thickness_2.png,3,2,"[red, blue]",2,./images_second_prompt/image_1_thickness_2-cla...,The blue and red lines intersect once in the i...,1,False
4,image_1_thickness_3.png,3,2,"[red, blue]",3,./images_second_prompt/image_1_thickness_3-cla...,"In the given image, the blue and red lines int...",1,False
...,...,...,...,...,...,...,...,...,...
145,image_48_thickness_3.png,3,2,"[red, blue]",3,./images_second_prompt/image_48_thickness_3-cl...,The blue and red lines intersect at one point ...,1,False
146,image_48_thickness_4.png,3,2,"[red, blue]",4,./images_second_prompt/image_48_thickness_4-cl...,The blue line and the red line intersect at on...,1,False
147,image_49_thickness_2.png,3,0,"[red, blue]",2,./images_second_prompt/image_49_thickness_2-cl...,The red line and the blue line segments do not...,0,True
148,image_49_thickness_3.png,3,0,"[red, blue]",3,./images_second_prompt/image_49_thickness_3-cl...,The red line and the blue line in the image do...,0,True


In [78]:
cleaned_data.drop(["model-output-file"], axis=1, inplace=True)
cleaned_data.rename(columns={"model-output-raw": "model_output_raw"}, inplace=True)
cleaned_data.rename(columns={"predicted": "extracted_number"}, inplace=True)

In [79]:
cleaned_data["Model"] = ["Sonnet"] * len(cleaned_data)
cleaned_data.to_pickle("./data/sonnet2.pkl")

In [80]:
cleaned_data

Unnamed: 0,image_path,num_points,num_intersections,line_colors,line_thickness,model_output_raw,extracted_number,is_correct,Model
0,image_0_thickness_2.png,3,1,"[red, blue]",2,The blue and red lines intersect at one point ...,1,True,Sonnet
1,image_0_thickness_3.png,3,1,"[red, blue]",3,The blue and red lines intersect at one point ...,1,True,Sonnet
2,image_0_thickness_4.png,3,1,"[red, blue]",4,The blue and red lines intersect once in the i...,1,True,Sonnet
3,image_1_thickness_2.png,3,2,"[red, blue]",2,The blue and red lines intersect once in the i...,1,False,Sonnet
4,image_1_thickness_3.png,3,2,"[red, blue]",3,"In the given image, the blue and red lines int...",1,False,Sonnet
...,...,...,...,...,...,...,...,...,...
145,image_48_thickness_3.png,3,2,"[red, blue]",3,The blue and red lines intersect at one point ...,1,False,Sonnet
146,image_48_thickness_4.png,3,2,"[red, blue]",4,The blue line and the red line intersect at on...,1,False,Sonnet
147,image_49_thickness_2.png,3,0,"[red, blue]",2,The red line and the blue line segments do not...,0,True,Sonnet
148,image_49_thickness_3.png,3,0,"[red, blue]",3,The red line and the blue line in the image do...,0,True,Sonnet
