In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob
import os

In [None]:
# Path to the JSONL file
WORDs = [
    "Acknowledgement",
    "Subdermatoglyphic",
    "tHyUiKaRbNqWeOpXcZvM",
]

index = 1
WORD = WORDs[index]

gt_data = pd.read_json(f"./images_second_prompt//{WORD}/configurations.json")

In [None]:
# remplace ./images/ with ./images_second_prompt/
gt_data["image_path"] = gt_data["image_path"].apply(
    lambda x: x.replace("./images/", "./images_second_prompt/")
)

In [None]:
# text_image_0a5fd2d1-d0ad-490d-a4d4-a01955a8de8c

gt_data["model-output-file"] = gt_data["image_path"].apply(
    lambda x: x.replace(".png", "") + "-claude-3-sonnet-20240229-output.md"
)

gt_data["model-output-raw"] = gt_data["model-output-file"].apply(
    lambda x: (open(x, "r").read() if os.path.exists(os.path.join(x)) else None)
)

In [None]:
# drop rows with missing sonnet output
gt_data = gt_data.dropna(subset=["model-output-raw"])
gt_data

In [None]:
def extract_marked_text(text):
    import re

    # Check if the description explicitly states that no letter is being circled
    no_circled_letter_patterns = [
        "no letter being circled",
        "no individual letter",
        "no circles or other annotations",
        "no circled letter",
        "no letters being circled",
        "no character being highlighted",
        "no red oval or any other highlighting",
        "no red oval highlighting any character",
        "no red oval or any highlighted character",
        "no red oval or highlighting",
        "no character highlighted with a red oval",
        "the image does not contain any characters highlighted with a red oval",
        "the image does not contain any red oval highlighting a specific character",
        "there is no red oval or any character being highlighted",
        "there is no character highlighted with a red oval in this image",
        "there is no image provided",
        "the image simply shows",
        "the text appears to be a sequence of mixed uppercase and lowercase letters without any particular emphasis or highlighting",
        "the image does not contain any red oval or highlighted character",
        "the image simply shows a string of seemingly random capitalized letters",
        "no red oval or any highlighting",
        "the image does not contain any red oval or highlighted character",
        "the image simply shows a string of seemingly random capitalized letters",
        "there is no red oval or any highlighting in the provided image",
        "the image shows a string of uppercase letters displayed against a plain white background",
        "no red oval or any highlighting",
        "the image does not contain any red oval or highlighted character",
        "the image simply shows a string of seemingly random capitalized letters",
        "there is no red oval or any highlighting in the provided image",
        "the image shows a string of uppercase letters displayed against a plain white background",
    ]
    if any(phrase in text.lower() for phrase in no_circled_letter_patterns):
        return "none"

    # Use regular expressions to find single characters or explicitly mentioned letters or numbers
    patterns = [
        r"the (letter|number) being circled in the [^\.]* is ['\"]?([a-zA-Z0-9])['\"]?",
        r"['\"]([a-zA-Z0-9])['\"]",
        r"the character highlighted with a red oval in the word ['\"]?[^'\"]*['\"]? is the (lowercase|uppercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
        r"the character being highlighted with a red oval in the word ['\"]?[^'\"]*['\"]? is the (lowercase|uppercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
        r"the character being highlighted with a red oval in the (image|word) is the (number|numeral) ['\"]?([0-9])['\"]?",
        r"the character being highlighted with a red oval in the provided image is the (number|numeral) ['\"]?([0-9])['\"]? in the word ['\"]?[^'\"]*['\"]?",
        r"the character being highlighted with a red oval in the (given text|string|image) is the (uppercase|lowercase)? ?(letter|number) ['\"]?([a-zA-Z0-9])['\"]?",
        r"the character highlighted with a red oval in the (given text|string|image) is the (letter|number) ['\"]?([a-zA-Z0-9])['\"]?",
        r"the character being highlighted with a red oval is the (uppercase|lowercase)? ?(letter|number) ['\"]?([a-zA-Z0-9])['\"]?",
        r"the character highlighted with a red oval in the string of letters ['\"]?[^'\"]*['\"]? is the letter ['\"]?([a-zA-Z])['\"]?",
        r"the character being highlighted with a red oval in the given text string is ['\"]?([^'\"]+)['\"]?",
        r"the character being highlighted with a red oval in the (given text|string|image) ['\"]?([^'\"]+)['\"]? is the (uppercase|lowercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
        r"the character highlighted with a red oval in the (given text|string|image) ['\"]?([^'\"]+)['\"]? is the (uppercase|lowercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
        r"the character being highlighted with a red oval in the text string ['\"]?([^'\"]+)['\"]? is the (uppercase|lowercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
        r"the character being highlighted with a red oval in the (given text|string|image) ['\"]?([^'\"]+)['\"]? is the (uppercase|lowercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
        r"the character highlighted with a red oval in the (given text|string|image) ['\"]?([^'\"]+)['\"]? is the (uppercase|lowercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
        r"the character being highlighted with a red oval in the text string ['\"]?([^'\"]+)['\"]? is the (uppercase|lowercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
        r"the character highlighted with a red oval in the given text ['\"]?([^'\"]+)['\"]? is the letter ['\"]?([a-zA-Z])['\"]?",
        r"the character highlighted with a red oval in the given text is the (uppercase|lowercase)? ?letter ['\"]?([a-zA-Z])['\"]?",
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            # Extract the last group which should be the character or substring
            result = match.group(match.lastindex).lower()
            # If the result is a longer substring, handle it appropriately
            if len(result) > 1 and not result.isdigit():
                # Handle specific cases like "We" or "ZvM"
                if result in ["we", "zvm"]:
                    return result
                else:
                    continue
            return result  # Return the matched letter or number in lowercase

    return "marker_not_found" + text

In [None]:
gt_data["predicted"] = gt_data["model-output-raw"].apply(extract_marked_text)

In [None]:
# show full column and row
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)


gt_data["predicted"].value_counts()

In [None]:
cleaned_data = gt_data.copy()

In [None]:
cleaned_data["gt"] = cleaned_data.apply(
    lambda row: row["word"][row["circle_index"]].lower(), axis=1
)

In [None]:
cleaned_data["is_prediction_correct"] = cleaned_data["gt"] == cleaned_data["predicted"]
# get accuracy
accuracy = cleaned_data["is_prediction_correct"].mean()
print(f"Overall Accuracy: {accuracy * 100:.2f}%")

In [None]:
len(cleaned_data)

In [None]:
errors = cleaned_data[cleaned_data["is_prediction_correct"] == False]
common_errors = (
    errors.groupby(["predicted", "gt"])
    .size()
    .reset_index(name="count")
    .sort_values(by="count", ascending=False)
)
print(common_errors.head(10))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set up the matplotlib figure with a more professional color palette and layout
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(18, 12))
fig.suptitle(f"Detailed Analysis of Model Predictions -- {WORD}", fontsize=16)

# Customize the color palette
sns.set(style="whitegrid", palette="muted")

# Plot Accuracy by Font Path
sns.barplot(
    ax=axes[0, 0],
    x="is_prediction_correct",
    y="font_path",
    data=cleaned_data,
    estimator=lambda x: x.mean(),
    palette="Blues_d",
)
axes[0, 0].set_title("Accuracy by Font Path")
axes[0, 0].set_xlabel("Accuracy")
axes[0, 0].set_ylabel("Font Path")

# Plot Accuracy by Circle Index
sns.barplot(
    ax=axes[0, 1],
    x="circle_index",
    y="is_prediction_correct",
    data=cleaned_data,
    estimator=lambda x: x.mean(),
    palette="Greens_d",
)
axes[0, 1].set_title("Accuracy by Circle Index")
axes[0, 1].set_xlabel("Circle Index")
axes[0, 1].set_ylabel("Accuracy")
# Set x-axis labels to characters from the word
axes[0, 1].set_xticklabels(list("Subdermatoglyphic"))


# Plot Distribution of Incorrect Predictions
sns.countplot(
    ax=axes[1, 0],
    x="predicted",
    data=errors,
    order=errors["predicted"].value_counts().index,
    palette="Reds_d",
)
axes[1, 0].set_title("Distribution of Incorrect Predictions")
axes[1, 0].set_xlabel("Predicted Characters")
axes[1, 0].set_ylabel("Count")

# Plot Distribution of Ground Truth for Incorrect Predictions
sns.countplot(
    ax=axes[1, 1],
    x="gt",
    data=errors,
    order=errors["gt"].value_counts().index,
    palette="Purples_d",
)
axes[1, 1].set_title("Distribution of Ground Truth for Incorrect Predictions")
axes[1, 1].set_xlabel("Ground Truth Characters")
axes[1, 1].set_ylabel("Count")

# Plot Accuracy by Thickness
sns.lineplot(
    ax=axes[2, 0],
    x="thickness",
    y="is_prediction_correct",
    data=cleaned_data,
    estimator=lambda x: x.mean(),
    marker="o",
    color="deepskyblue",
)
axes[2, 0].set_title("Accuracy by Thickness")
axes[2, 0].set_xlabel("Thickness")
axes[2, 0].set_ylabel("Accuracy")


plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust subplots to fit into figure area.
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Extract ground truth and predictions
ground_truth = cleaned_data["gt"]
predictions = cleaned_data["predicted"]

# Define the order of labels based on the word "Acknowledgement" and any extra characters
desired_order = list(
    "Acknowledgement"
)  # Ensure it's in lowercase if your data is in lowercase
all_labels = np.unique(np.concatenate((ground_truth, predictions)))
extra_labels = [label for label in all_labels if label not in desired_order]
final_labels = desired_order + extra_labels

# Create the confusion matrix with the specified label order
conf_matrix = confusion_matrix(ground_truth, predictions, labels=final_labels)

# Plot the confusion matrix with a more professional appearance
plt.figure(figsize=(14, 12))
sns.set(font_scale=1.4)  # Increase font size for readability
heatmap = sns.heatmap(
    conf_matrix,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=final_labels,
    yticklabels=final_labels,
    cbar_kws={"label": "Frequency"},
)
plt.title(f"Confusion Matrix -- {WORD}", fontsize=18, fontweight="bold")
plt.xlabel("Predicted Label", fontsize=14)
plt.ylabel("True Label", fontsize=14)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.show()

# Export

In [None]:
import pandas as pd
import json
import os
import re

# Define the words
WORDs = [
    "Acknowledgement",
    "Subdermatoglyphic",
    "tHyUiKaRbNqWeOpXcZvM",
]

# Initialize an empty list to store DataFrames
all_data_frames = []

# Loop through each word
for WORD in WORDs:
    gt_data = pd.read_json(f"./images/{WORD}/configurations.json")
    # remplace ./images/ with ./images_second_prompt/
    gt_data["image_path"] = gt_data["image_path"].apply(
        lambda x: x.replace("./images/", "./images_second_prompt/")
    )
    # Generate model output file paths and read the content if the file exists
    gt_data["model-output-file"] = gt_data["image_path"].apply(
        lambda x: x.replace(".png", "") + "-claude-3-sonnet-20240229-output.md"
    )
    gt_data["model-output-raw"] = gt_data["model-output-file"].apply(
        lambda x: (open(x, "r").read() if os.path.exists(x) else None)
    )

    # Drop rows with missing sonnet output
    gt_data = gt_data.dropna(subset=["model-output-raw"])

    gt_data["predicted"] = gt_data["model-output-raw"].apply(extract_marked_text)
    print(gt_data["predicted"].value_counts())
    # Calculate ground truth and correctness
    gt_data["gt"] = gt_data.apply(
        lambda row: row["word"][row["circle_index"]].lower(), axis=1
    )
    gt_data["is_prediction_correct"] = gt_data["gt"] == gt_data["predicted"]
    gt_data["word_label"] = WORD  # Add a column to identify the word

    # Append to the list
    all_data_frames.append(gt_data)

# Concatenate all DataFrames into one
final_data_frame = pd.concat(all_data_frames, ignore_index=True)

In [None]:
final_data_frame["Model"] = ["Sonnet"] * len(final_data_frame)

In [None]:
final_data_frame.to_pickle("./data/Sonnet-2.pkl")