In [None]:
import pandas as pd
import numpy as np
import ast
import math
import matplotlib.pyplot as plt
import random
from mpl_toolkits.axes_grid1 import ImageGrid
from PIL import Image
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

data_path = "../data/"

In [None]:
def show_images(ranked, img_path, relevances=None):
    fig = plt.figure(figsize=(20,20)) # specifying the overall grid size
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                 nrows_ncols=(int(math.ceil(len(ranked)/4)),4),
                 axes_pad=0.3,  share_all=True
                 )
    if relevances is None:
        relevances = np.zeros(len(ranked))
    for ax, rank, relevance in zip(grid, ranked, relevances):
        rico_id = rank
        img = Image.open(img_path + str(rico_id) + ".jpg")
        img = img.resize((1080, 1920))
        ax.imshow(np.array(img))
        ax.grid(False)
        title = str(rico_id)
        if relevance != 0:
            title += ", " + relevance
        ax.title.set_text(title)

    plt.show()

# 1. Load Datasets

In [None]:
dataset = pd.read_csv(data_path + "dataset_test_k_top_20.csv")
dataset.head()

In [None]:
anno_0 = pd.read_excel(data_path + "annotation_0.xlsx")
anno_0.head()

In [None]:
anno_1 = pd.read_excel(data_path + "annotation_1.xlsx")
anno_1.head()

In [None]:
anno_2 = pd.read_excel(data_path + "annotation_2.xlsx")
anno_2.head()

In [None]:
dataset["anno_0"] = ""
dataset["anno_1"] = ""
dataset["anno_2"] = ""
concat_0 = []
concat_1 = []
concat_2 = []
for index, row in anno_0.iterrows():
    annos = []
    for i in range(20):
        annos.append(row["annotation_" + str(i)])
    dataset.at[index, "anno_0"] = annos
    concat_0.extend(annos)

for index, row in anno_1.iterrows():
    annos = []
    for i in range(20):
        annos.append(row["annotation_" + str(i)])
    dataset.at[index, "anno_1"] = annos
    concat_1.extend(annos)

for index, row in anno_2.iterrows():
    annos = []
    for i in range(20):
        annos.append(row["annotation_" + str(i)])
    dataset.at[index, "anno_2"] = annos
    concat_2.extend(annos)

dataset.head()

In [None]:
not_unanimous = 0
dataset["anno_majority"] = ""
for index, row in dataset.iterrows():
    anno = []
    for i in range(20):
        annos = []
        for j in range(3):
            annos.append(row["anno_"+str(j)][i])
        values, counts = np.unique(np.array(annos), return_counts=True)
        if np.max(counts) != 3:
            not_unanimous += 1
            print(counts)
            print(annos)
            print()
        anno.append(values[np.argmax(counts)])
    dataset.at[index, "anno_majority"] = anno
not_un_rate = not_unanimous/1000
print(not_un_rate)
dataset.head()

# 2. Fleiss Kappa

In [None]:
N = 1000
k = 2

fleiss_matrix = np.zeros((N, k))
for i in range(N):
    fleiss_matrix[i][concat_0[i]] += 1
    fleiss_matrix[i][concat_1[i]] += 1
    fleiss_matrix[i][concat_2[i]] += 1
fleiss_matrix

In [None]:
def fleiss_kappa(fleiss_matrix):
    N = fleiss_matrix.shape[0]
    k = fleiss_matrix.shape[1]
    n = np.sum(fleiss_matrix[0])

    print("N", N, "k", k, "n", n)
    
    p_is = np.empty((k))
    for i in range(k):
        p_is[i] = 1/(N*n)*np.sum(fleiss_matrix, axis = 0)[i]
        
    P_is = np.empty((N))
    first_part = 1/(n*(n-1))
    for i in range(N):
        second_part = 0
        for j in range(k):
            second_part += fleiss_matrix[i][j]*fleiss_matrix[i][j]
        second_part -= n
        P_is[i] = first_part * second_part
        
    P_dash = np.mean(P_is)
    
    P_dash_e = 0
    for i in range(k):
        P_dash_e += p_is[i]*p_is[i]

    kappa = (P_dash - P_dash_e)/(1 - P_dash_e)
    return kappa

In [None]:
fleiss_kappa(fleiss_matrix)

# 3. Comparison with LLM

In [None]:
gpt_annotations = pd.read_csv(data_path + "dataset_test_k_top_20_binary_one_to_ten_annotated.csv")
gpt_annotations = gpt_annotations[["RICO GUI", "Descriptions", "Length", "rico_ranking", "binary_annotation", "binary_reasonings"]]
gpt_annotations["binary_annotation"] = gpt_annotations["binary_annotation"].apply(ast.literal_eval)
gpt_annotations["binary_reasonings"] = gpt_annotations["binary_reasonings"].apply(ast.literal_eval)
gpt_annotations["rico_ranking"] = gpt_annotations["rico_ranking"].apply(ast.literal_eval)
gpt_annotations.head()

In [None]:
for index, row in gpt_annotations.iterrows():
    ranking = row["rico_ranking"]
    binary = row["binary_annotation"]
    reasonings = row["binary_reasonings"]
    new_ranking = [x for x,_ in sorted(zip(ranking, binary))]#, key=lambda x: x[0])]
    new_binary = [x for _,x in sorted(zip(ranking, binary))]#, key=lambda x: x[0])]
    new_reasonings = [x for _,x in sorted(zip(ranking, reasonings))]
    gpt_annotations.at[index, "rico_ranking"] = new_ranking
    gpt_annotations.at[index, "binary_annotation"] = new_binary
    gpt_annotations.at[index, "binary_reasonings"] = new_reasonings
gpt_annotations.head()

In [None]:
joined = gpt_annotations.join(dataset[["RICO GUI", "anno_majority"]].set_index("RICO GUI"), on = "RICO GUI")
joined.head()

In [None]:
joined.to_csv(data_path + "dataset_test_k_top_20_binary_majority_annotation.csv", index=False)

In [None]:
true_positives = 0
pred_positives = 0
pred_true_positives = 0
y_true = []
y_pred = []

for index, row in joined.iterrows():
    binary = row["binary_annotation"]
    truth = row["anno_majority"]
    for i in range(len(binary)):
        y_true.append(truth[i])
        y_pred.append(binary[i])
        if truth[i] == 1:
            true_positives += 1
        if binary[i] == 1:
            pred_positives += 1
            if truth[i] == 1:
                pred_true_positives += 1

print(true_positives, pred_positives, pred_true_positives)
precision = float(pred_true_positives)/float(pred_positives)
recall = float(pred_true_positives)/float(true_positives)
f1 = 2*(precision*recall)/(precision + recall)
print("PRECISION", precision)
print("RECALL", recall)
print("F1", f1)

In [None]:
conf_matrix = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix)
disp.plot()
plt.show()

In [None]:
no_relevants = 0
for index, row in joined.iterrows():
    if np.sum(np.array(row["anno_majority"])) == 0:
        no_relevants += 1
        print(index, row["Descriptions"])
print(no_relevants)

In [None]:
false_positives = []
for index, row in joined.iterrows():
    for i in range(20):
        if row["anno_majority"][i] == 0 and row["binary_annotation"][i] == 1:
            false_positives.append((row["Descriptions"], row["rico_ranking"][i], row["binary_reasonings"][i]))
print(len(false_positives))

In [None]:
sample = random.sample(false_positives, 20)
for fp in sample:
    img = Image.open("../data/rico/unique_uis/combined/" + str(fp[1]) + ".jpg")
    plt.imshow(img)
    plt.grid(False)
    plt.title(fp[1])
    plt.show()
    print(fp[0])
    print(fp[2])