In [1]:
import pandas as pd
import matplotlib.pyplot as plt

from ast import literal_eval

In [2]:
def plot_image(img_urls, labels, preds):
    assert len(img_urls) == 4
    fig, ax = plt.subplots(1, 4, figsize=(20,5))
    for i, (url, label, pred) in enumerate(zip(img_urls, labels, preds)):
        image = plt.imread("../"+url)
        ax[i].imshow(image)
        ax[i].set_title(f"Label: {label}\nPred: {pred}")
        ax[i].axis("off")

def plot_image_title(img_url, explanation, title):
    image = plt.imread("../"+img_url)
    plt.imshow(image)
    plt.figtext(0.5, 0.01, explanation, wrap=True, horizontalalignment='center', fontsize=7)
    plt.title(f"Article Title: {title}")
    plt.axis("off")

### Frame improvements

In [3]:
human_df = pd.read_csv("../data/human/val_set_full.csv", index_col=[0])
anno_df = pd.read_json("../data/annotated/vision/multipleframes_val_set_pixtral_anno.jsonl", lines=True, orient="records")
anno_df.set_index("uuid", inplace=True)

In [4]:
len(anno_df), len(human_df)

(300, 300)

In [5]:
label_cols = ['mini_multiple_frames_frames-list']#, 'normal_multiple_frames_frame-list']
for col in label_cols:
    for i, row in anno_df[col].items():
        if isinstance(row, list):
            anno_df.at[i, col] = set([i.lower() for i in row])

In [6]:
human_df['merged_labels'] = human_df['merged_labels'].apply(lambda x: literal_eval(x))

In [7]:
merged_df = human_df.join(anno_df[label_cols+['image_url']], how="inner")

In [8]:
for col in label_cols:
    merged_df[col] = merged_df[col].str.lower()

In [9]:
# results = {}
# for col in label_cols:
#         correct = 0
#         for i, row in merged_df.iterrows():
#                 if row[col] in row['merged_labels']:
#                         correct += 1
#         accuracy = correct / len(merged_df)
#         results[col] = accuracy
#         print(col, accuracy)

In [10]:
# pd.DataFrame(results, index=["accuracy"]).T.plot(kind="barh", figsize=(10, 5))

In [11]:
pred_df = anno_df[["mini_multiple_frames_frames-list","mini_multiple_frames_reason", 'image_url']]
pred_df.columns = ['pred_labels', 'pred_reason', 'image_url']

In [12]:
human_df.columns = ['human_annotations']

In [13]:
dropped_df = pred_df['pred_labels'].dropna()
merged_df = pd.merge(dropped_df, human_df, left_index=True, right_index=True)
merged_df['intersection'] = merged_df.apply(lambda x: x['pred_labels'].intersection(x['human_annotations']), axis=1)
merged_df['intersection_len'] = merged_df['intersection'].apply(len)
merged_df['correct'] = merged_df['intersection_len'] > 0
print(merged_df['correct'].sum()/len(dropped_df))
print("------------------")

0.8366666666666667
------------------


In [14]:
from IPython.display import display, HTML
from PIL import Image
from io import BytesIO
import base64

def image_base64(im):
    if isinstance(im, str):
        im = Image.open(im)
    with BytesIO() as buffer:
        im.save(buffer, 'jpeg')
        return base64.b64encode(buffer.getvalue()).decode()

In [15]:
merged_df = pred_df.merge(human_df, left_index=True, right_index=True, how="inner")

In [16]:
img_paths = merged_df['image_url'].apply(lambda x: "../"+x).tolist()
imgStrs = [image_base64(img_path) for img_path in img_paths]

merged_df['Image'] = [f'<img width=400 src="data:image/png;base64,{imgStr}">' for imgStr in imgStrs]
merged_df.drop(columns=['image_url'], inplace=True)
# merged_df.to_html("../data/annotated/vision/pixtral_anno_multi_frames_val_set.html", escape=False)

In [17]:
# sample_incorrect = merged_df[merged_df["intersection_len"] == 0].sample(4)
# plot_image(sample_incorrect['image_url'], sample_incorrect['merged_labels'], sample_incorrect['normal_multiple_frames_frame-list'])

In [18]:
# plot_image(sample_incorrect['image_url'], sample_incorrect['merged_labels'], sample_incorrect['frame-name'])