In [102]:
import os
import sys
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
import logging
from torch.utils.data import DataLoader
import multiprocessing
import tqdm
from ast import literal_eval
from annoy import AnnoyIndex
import cv2
import torchvision.transforms as transforms

from torchvision import transforms
from umap import UMAP
import plotly.express as px
from collections import Counter


In [2]:
dir_path = os.path.dirname(os.path.abspath(os.getcwd()))

In [3]:
sys.path.append(os.path.join(dir_path, "src"))

In [4]:
from data_module import ImageDataModule
from resnet import Resnet50
from utils import collate_batch
from dataset import ImageDataset

In [5]:
ckpt = os.path.join(dir_path, "notebooks","lightning_logs","version_39","checkpoints","epoch=7-step=1952.ckpt")


In [6]:
model = Resnet50.load_from_checkpoint(ckpt, embedding_size=512, num_classes=19)



In [7]:
def load_image_arrary(file):
        path = os.path.join(dir_path, "images", "raw")
        image = cv2.imread(f"{path}/{file}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        normalize = transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
        )
        transformations = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]) 
        image = transformations(image)
        return image.unsqueeze(0) # add a batch channel

In [8]:
def extract_embedding(model, image):
    model.eval()
    with torch.no_grad():
        embedding = model(image)
    return embedding[0]


In [9]:
index_df = pd.read_csv("metadata.csv", index_col=0)

In [10]:
embeddings = []

In [11]:
# for i, row in index_df[index_df["genre"] == f"['{genre}']"].iterrows():
for i, row in tqdm.tqdm(index_df.iterrows()):
    image = load_image_arrary(row["image_name"])
    query_embedding = extract_embedding(model, image)
    embeddings.append(query_embedding)


9739it [14:02, 11.56it/s]


In [12]:
umap_3d = UMAP(n_components=5, init='random', random_state=0)


In [20]:
list_embeddings = [x.tolist() for x  in embeddings] 

In [28]:
list_embeddings[0]

[0.017839781939983368,
 -0.0020168619230389595,
 -0.09078569710254669,
 -0.10151342302560806,
 0.027614912018179893,
 -0.0261685810983181,
 0.02291029505431652,
 -0.1472741961479187,
 0.03172680735588074,
 0.0617537647485733,
 0.0513116791844368,
 0.013941647484898567,
 -0.004079429432749748,
 -0.0003509853850118816,
 -0.026556581258773804,
 -0.026939833536744118,
 -0.0055480096489191055,
 0.0160692036151886,
 -0.010538513772189617,
 -0.0367879755795002,
 0.01898203231394291,
 0.0419270396232605,
 0.01714361272752285,
 4.485963290790096e-05,
 -0.013616516254842281,
 0.06261464208364487,
 -0.036758653819561005,
 -0.026074083521962166,
 0.009408047422766685,
 -0.0034446308854967356,
 0.034473519772291183,
 0.07049272209405899,
 -0.012973318807780743,
 0.029872125014662743,
 0.08257605880498886,
 0.0032051524613052607,
 -0.03974813222885132,
 -0.004831834230571985,
 -0.01249630842357874,
 0.02724573016166687,
 -0.014259607531130314,
 -0.007297751028090715,
 -0.08242394030094147,
 0.002920

In [21]:
umap_embeddings = umap_3d.fit_transform(list_embeddings)

In [25]:
subsampled_fig_3d = px.scatter_3d(
    umap_embeddings, x=0, y=1, z=2,
    color=index_df.genre.to_list(), labels={'color': 'genre'}
)

subsampled_fig_3d.update_traces(marker_size=2)
subsampled_fig_3d.show()

# Calculating Recall@K

In [100]:
data = pd.read_csv(os.path.join(dir_path, "data", "raw_data.csv"), lineterminator='\n', index_col=0)

In [104]:
literal_eval(data.iloc[0]["genres"])

['Animation', 'Adventure', 'Family', 'Fantasy', 'Comedy']

In [61]:
embedding_size = 512
annoy_index_file = 'annoy_index.ann'

In [62]:
loaded_annoy_index = AnnoyIndex(embedding_size, 'euclidean')
loaded_annoy_index.load(annoy_index_file)

True

In [31]:
index_df

Unnamed: 0,genre,image_name
0,['Animation'],qNBAXBIQlnOThrVvA6mA2B5ggV6.jpg
1,['Drama'],vJU3rXSP9hwUuLeq8IpfsJShLOk.jpg
2,['Science Fiction'],t6HIqrRAclMCA60NsSmeqe9RmNV.jpg
3,['Animation'],qVdrYN8qu7xUtsdEFeGiIVIaYd.jpg
4,['Comedy'],swzMoIVn6xjB857ziYJ8KBV440g.jpg
...,...,...
9734,['Comedy'],vkF8VLrazGtk9OjdEhihG6kKAhP.jpg
9735,['Thriller'],yw8x2i3vaHZZzpvqvF75E8q2N6M.jpg
9736,['Drama'],bFOmE3zCFU01TuomOOwClAWdvOD.jpg
9737,['Action'],kziBJGQFo9f0Vkj9s37qI0G9I0I.jpg


In [130]:
k_neighbors = [1, 2, 3, 5, 8]

In [169]:
genres_dict = {}

In [170]:
genres_list = [
 'Action',
 'Adventure',
 'Animation',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'History',
 'Horror',
 'Music',
 'Mystery',
 'Romance',
 'Science Fiction',
 'TV Movie',
 'Thriller',
 'War',
 'Western'
]

In [171]:
for genre in reversed(genres_list):
    print(f"Genre: {genre}")
    genres_dict[genre] = []

    genre_df = index_df[index_df["genre"] == f"['{genre}']"]

    nearest_neighbors = []
    for i, row in tqdm.tqdm(genre_df.iterrows()):
        image = load_image_arrary(row["image_name"])
        query_embedding = extract_embedding(model, image)
        nearest_indices = loaded_annoy_index.get_nns_by_vector(query_embedding, max(k_neighbors), search_k=100)
        nearest_neighbors.append(nearest_indices)


    total_queries = len(genre_df)
    for k in k_neighbors:
        correct_retrievals = 0
        genre_hits = 0
        for i in nearest_neighbors:
            indices = i[:k]            
            all_genres = []
            for neighbor in indices:
                image_neighbor = index_df.iloc[neighbor]["image_name"]
                nearest_genres = literal_eval(data[data["poster_path"] == image_neighbor]["genres"].values[0])
                all_genres += nearest_genres
            counter = Counter(all_genres)
            most_common_val = counter.most_common()[0][1]
            if counter.get(genre) == most_common_val:
                correct_retrievals += 1

            if counter.get(genre, 0) > 0:
                genre_hits += 1

        majority = correct_retrievals / total_queries
        recall = genre_hits / total_queries
        print(f"{genre} Majority Vote@{k} - {majority}")
        print(f"{genre} Recall@{k} - {recall}")
        genres_dict[genre].append({f"majority@{k}": majority})
        genres_dict[genre].append({f"recall@{k}": recall})
            
                    

Genre: Western


78it [00:08,  9.56it/s]


Western Majority Vote@1 - 0.9743589743589743
Western Recall@1 - 0.9743589743589743
Western Majority Vote@2 - 0.9615384615384616
Western Recall@2 - 0.9743589743589743
Western Majority Vote@3 - 0.8717948717948718
Western Recall@3 - 0.9743589743589743
Western Majority Vote@5 - 0.8589743589743589
Western Recall@5 - 0.9743589743589743
Western Majority Vote@8 - 0.8333333333333334
Western Recall@8 - 0.9743589743589743
Genre: War


80it [00:08,  8.92it/s]


War Majority Vote@1 - 0.9375
War Recall@1 - 0.9375
War Majority Vote@2 - 0.8125
War Recall@2 - 0.9375
War Majority Vote@3 - 0.7375
War Recall@3 - 0.9375
War Majority Vote@5 - 0.75
War Recall@5 - 0.95
War Majority Vote@8 - 0.7375
War Recall@8 - 0.95
Genre: Thriller


575it [01:02,  9.23it/s]


Thriller Majority Vote@1 - 0.9878260869565217
Thriller Recall@1 - 0.9878260869565217
Thriller Majority Vote@2 - 0.92
Thriller Recall@2 - 0.9947826086956522
Thriller Majority Vote@3 - 0.8678260869565217
Thriller Recall@3 - 0.9982608695652174
Thriller Majority Vote@5 - 0.8173913043478261
Thriller Recall@5 - 0.9982608695652174
Thriller Majority Vote@8 - 0.7895652173913044
Thriller Recall@8 - 0.9982608695652174
Genre: TV Movie


30it [00:03,  8.46it/s]


TV Movie Majority Vote@1 - 0.9666666666666667
TV Movie Recall@1 - 0.9666666666666667
TV Movie Majority Vote@2 - 0.7666666666666667
TV Movie Recall@2 - 0.9666666666666667
TV Movie Majority Vote@3 - 0.6666666666666666
TV Movie Recall@3 - 0.9666666666666667
TV Movie Majority Vote@5 - 0.43333333333333335
TV Movie Recall@5 - 0.9666666666666667
TV Movie Majority Vote@8 - 0.43333333333333335
TV Movie Recall@8 - 0.9666666666666667
Genre: Science Fiction


303it [00:34,  8.73it/s]


Science Fiction Majority Vote@1 - 0.9867986798679867
Science Fiction Recall@1 - 0.9867986798679867
Science Fiction Majority Vote@2 - 0.8976897689768977
Science Fiction Recall@2 - 0.9900990099009901
Science Fiction Majority Vote@3 - 0.8415841584158416
Science Fiction Recall@3 - 0.9900990099009901
Science Fiction Majority Vote@5 - 0.8118811881188119
Science Fiction Recall@5 - 0.9900990099009901
Science Fiction Majority Vote@8 - 0.7953795379537953
Science Fiction Recall@8 - 0.9966996699669967
Genre: Romance


378it [00:35, 10.52it/s]


Romance Majority Vote@1 - 0.9920634920634921
Romance Recall@1 - 0.9920634920634921
Romance Majority Vote@2 - 0.91005291005291
Romance Recall@2 - 0.9947089947089947
Romance Majority Vote@3 - 0.8756613756613757
Romance Recall@3 - 0.9973544973544973
Romance Majority Vote@5 - 0.8597883597883598
Romance Recall@5 - 0.9973544973544973
Romance Majority Vote@8 - 0.8386243386243386
Romance Recall@8 - 0.9973544973544973
Genre: Mystery


109it [00:10, 10.48it/s]


Mystery Majority Vote@1 - 0.9908256880733946
Mystery Recall@1 - 0.9908256880733946
Mystery Majority Vote@2 - 0.9174311926605505
Mystery Recall@2 - 0.9908256880733946
Mystery Majority Vote@3 - 0.8715596330275229
Mystery Recall@3 - 0.9908256880733946
Mystery Majority Vote@5 - 0.8165137614678899
Mystery Recall@5 - 0.9908256880733946
Mystery Majority Vote@8 - 0.8348623853211009
Mystery Recall@8 - 0.9908256880733946
Genre: Music


72it [00:07,  9.65it/s]


Music Majority Vote@1 - 0.9861111111111112
Music Recall@1 - 0.9861111111111112
Music Majority Vote@2 - 0.9305555555555556
Music Recall@2 - 0.9861111111111112
Music Majority Vote@3 - 0.8472222222222222
Music Recall@3 - 0.9861111111111112
Music Majority Vote@5 - 0.8333333333333334
Music Recall@5 - 1.0
Music Majority Vote@8 - 0.8194444444444444
Music Recall@8 - 1.0
Genre: Horror


906it [01:44,  8.69it/s]


Horror Majority Vote@1 - 0.9922737306843267
Horror Recall@1 - 0.9922737306843267
Horror Majority Vote@2 - 0.9260485651214128
Horror Recall@2 - 0.9977924944812362
Horror Majority Vote@3 - 0.8664459161147903
Horror Recall@3 - 0.9988962472406181
Horror Majority Vote@5 - 0.8465783664459161
Horror Recall@5 - 0.9988962472406181
Horror Majority Vote@8 - 0.8333333333333334
Horror Recall@8 - 0.9988962472406181
Genre: History


38it [00:04,  9.38it/s]


History Majority Vote@1 - 0.9210526315789473
History Recall@1 - 0.9210526315789473
History Majority Vote@2 - 0.8947368421052632
History Recall@2 - 0.9473684210526315
History Majority Vote@3 - 0.8157894736842105
History Recall@3 - 0.9473684210526315
History Majority Vote@5 - 0.6578947368421053
History Recall@5 - 0.9736842105263158
History Majority Vote@8 - 0.5
History Recall@8 - 0.9736842105263158
Genre: Fantasy


251it [00:28,  8.82it/s]


Fantasy Majority Vote@1 - 1.0
Fantasy Recall@1 - 1.0
Fantasy Majority Vote@2 - 0.9043824701195219
Fantasy Recall@2 - 1.0
Fantasy Majority Vote@3 - 0.8247011952191236
Fantasy Recall@3 - 1.0
Fantasy Majority Vote@5 - 0.8087649402390438
Fantasy Recall@5 - 1.0
Fantasy Majority Vote@8 - 0.8007968127490039
Fantasy Recall@8 - 1.0
Genre: Family


320it [00:32,  9.77it/s]


Family Majority Vote@1 - 0.9875
Family Recall@1 - 0.9875
Family Majority Vote@2 - 0.915625
Family Recall@2 - 0.990625
Family Majority Vote@3 - 0.865625
Family Recall@3 - 0.99375
Family Majority Vote@5 - 0.8375
Family Recall@5 - 0.99375
Family Majority Vote@8 - 0.8
Family Recall@8 - 0.996875
Genre: Drama


1805it [02:52, 10.48it/s]


Drama Majority Vote@1 - 0.971191135734072
Drama Recall@1 - 0.971191135734072
Drama Majority Vote@2 - 0.8614958448753463
Drama Recall@2 - 0.9850415512465374
Drama Majority Vote@3 - 0.7817174515235457
Drama Recall@3 - 0.9939058171745152
Drama Majority Vote@5 - 0.7257617728531855
Drama Recall@5 - 0.997229916897507
Drama Majority Vote@8 - 0.6958448753462604
Drama Recall@8 - 0.9988919667590028
Genre: Documentary


142it [00:13, 10.33it/s]


Documentary Majority Vote@1 - 0.9788732394366197
Documentary Recall@1 - 0.9788732394366197
Documentary Majority Vote@2 - 0.9647887323943662
Documentary Recall@2 - 0.9788732394366197
Documentary Majority Vote@3 - 0.8028169014084507
Documentary Recall@3 - 0.9788732394366197
Documentary Majority Vote@5 - 0.7605633802816901
Documentary Recall@5 - 0.9788732394366197
Documentary Majority Vote@8 - 0.7676056338028169
Documentary Recall@8 - 0.9788732394366197
Genre: Crime


360it [00:36,  9.88it/s]


Crime Majority Vote@1 - 0.9638888888888889
Crime Recall@1 - 0.9638888888888889
Crime Majority Vote@2 - 0.8527777777777777
Crime Recall@2 - 0.9694444444444444
Crime Majority Vote@3 - 0.7777777777777778
Crime Recall@3 - 0.975
Crime Majority Vote@5 - 0.7138888888888889
Crime Recall@5 - 0.9916666666666667
Crime Majority Vote@8 - 0.6916666666666667
Crime Recall@8 - 0.9916666666666667
Genre: Comedy


1333it [02:08, 10.39it/s]


Comedy Majority Vote@1 - 0.981245311327832
Comedy Recall@1 - 0.981245311327832
Comedy Majority Vote@2 - 0.9099774943735934
Comedy Recall@2 - 0.9902475618904726
Comedy Majority Vote@3 - 0.8769692423105776
Comedy Recall@3 - 0.9954988747186797
Comedy Majority Vote@5 - 0.8424606151537885
Comedy Recall@5 - 0.9969992498124531
Comedy Majority Vote@8 - 0.8304576144036009
Comedy Recall@8 - 0.9992498124531133
Genre: Animation


864it [01:26, 10.05it/s]


Animation Majority Vote@1 - 0.9942129629629629
Animation Recall@1 - 0.9942129629629629
Animation Majority Vote@2 - 0.9560185185185185
Animation Recall@2 - 0.9965277777777778
Animation Majority Vote@3 - 0.9398148148148148
Animation Recall@3 - 0.9965277777777778
Animation Majority Vote@5 - 0.9282407407407407
Animation Recall@5 - 0.9976851851851852
Animation Majority Vote@8 - 0.9247685185185185
Animation Recall@8 - 0.9988425925925926
Genre: Adventure


542it [00:56,  9.59it/s]


Adventure Majority Vote@1 - 0.985239852398524
Adventure Recall@1 - 0.985239852398524
Adventure Majority Vote@2 - 0.8505535055350554
Adventure Recall@2 - 0.9907749077490775
Adventure Majority Vote@3 - 0.8099630996309963
Adventure Recall@3 - 0.996309963099631
Adventure Majority Vote@5 - 0.7619926199261993
Adventure Recall@5 - 0.996309963099631
Adventure Majority Vote@8 - 0.7509225092250923
Adventure Recall@8 - 0.996309963099631
Genre: Action


1553it [02:37,  9.88it/s]


Action Majority Vote@1 - 0.9665164198325821
Action Recall@1 - 0.9665164198325821
Action Majority Vote@2 - 0.8074694140373471
Action Recall@2 - 0.9800386349001932
Action Majority Vote@3 - 0.7231165486155827
Action Recall@3 - 0.9877656149388281
Action Majority Vote@5 - 0.6754668383773342
Action Recall@5 - 0.9935608499678042
Action Majority Vote@8 - 0.6452028332260141
Action Recall@8 - 0.9974243399871217


In [174]:
pd.DataFrame.from_dict(genres_dict, orient="index")

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Western,{'majority@1': 0.9743589743589743},{'recall@1': 0.9743589743589743},{'majority@2': 0.9615384615384616},{'recall@2': 0.9743589743589743},{'majority@3': 0.8717948717948718},{'recall@3': 0.9743589743589743},{'majority@5': 0.8589743589743589},{'recall@5': 0.9743589743589743},{'majority@8': 0.8333333333333334},{'recall@8': 0.9743589743589743}
War,{'majority@1': 0.9375},{'recall@1': 0.9375},{'majority@2': 0.8125},{'recall@2': 0.9375},{'majority@3': 0.7375},{'recall@3': 0.9375},{'majority@5': 0.75},{'recall@5': 0.95},{'majority@8': 0.7375},{'recall@8': 0.95}
Thriller,{'majority@1': 0.9878260869565217},{'recall@1': 0.9878260869565217},{'majority@2': 0.92},{'recall@2': 0.9947826086956522},{'majority@3': 0.8678260869565217},{'recall@3': 0.9982608695652174},{'majority@5': 0.8173913043478261},{'recall@5': 0.9982608695652174},{'majority@8': 0.7895652173913044},{'recall@8': 0.9982608695652174}
TV Movie,{'majority@1': 0.9666666666666667},{'recall@1': 0.9666666666666667},{'majority@2': 0.7666666666666667},{'recall@2': 0.9666666666666667},{'majority@3': 0.6666666666666666},{'recall@3': 0.9666666666666667},{'majority@5': 0.43333333333333335},{'recall@5': 0.9666666666666667},{'majority@8': 0.43333333333333335},{'recall@8': 0.9666666666666667}
Science Fiction,{'majority@1': 0.9867986798679867},{'recall@1': 0.9867986798679867},{'majority@2': 0.8976897689768977},{'recall@2': 0.9900990099009901},{'majority@3': 0.8415841584158416},{'recall@3': 0.9900990099009901},{'majority@5': 0.8118811881188119},{'recall@5': 0.9900990099009901},{'majority@8': 0.7953795379537953},{'recall@8': 0.9966996699669967}
Romance,{'majority@1': 0.9920634920634921},{'recall@1': 0.9920634920634921},{'majority@2': 0.91005291005291},{'recall@2': 0.9947089947089947},{'majority@3': 0.8756613756613757},{'recall@3': 0.9973544973544973},{'majority@5': 0.8597883597883598},{'recall@5': 0.9973544973544973},{'majority@8': 0.8386243386243386},{'recall@8': 0.9973544973544973}
Mystery,{'majority@1': 0.9908256880733946},{'recall@1': 0.9908256880733946},{'majority@2': 0.9174311926605505},{'recall@2': 0.9908256880733946},{'majority@3': 0.8715596330275229},{'recall@3': 0.9908256880733946},{'majority@5': 0.8165137614678899},{'recall@5': 0.9908256880733946},{'majority@8': 0.8348623853211009},{'recall@8': 0.9908256880733946}
Music,{'majority@1': 0.9861111111111112},{'recall@1': 0.9861111111111112},{'majority@2': 0.9305555555555556},{'recall@2': 0.9861111111111112},{'majority@3': 0.8472222222222222},{'recall@3': 0.9861111111111112},{'majority@5': 0.8333333333333334},{'recall@5': 1.0},{'majority@8': 0.8194444444444444},{'recall@8': 1.0}
Horror,{'majority@1': 0.9922737306843267},{'recall@1': 0.9922737306843267},{'majority@2': 0.9260485651214128},{'recall@2': 0.9977924944812362},{'majority@3': 0.8664459161147903},{'recall@3': 0.9988962472406181},{'majority@5': 0.8465783664459161},{'recall@5': 0.9988962472406181},{'majority@8': 0.8333333333333334},{'recall@8': 0.9988962472406181}
History,{'majority@1': 0.9210526315789473},{'recall@1': 0.9210526315789473},{'majority@2': 0.8947368421052632},{'recall@2': 0.9473684210526315},{'majority@3': 0.8157894736842105},{'recall@3': 0.9473684210526315},{'majority@5': 0.6578947368421053},{'recall@5': 0.9736842105263158},{'majority@8': 0.5},{'recall@8': 0.9736842105263158}


In [176]:
flattened_data = [
    {**entry, "index": index}
    for index, entries in genres_dict.items()
    for entry in entries
]

In [178]:
metrics = pd.DataFrame(flattened_data).set_index("index")

In [186]:
metrics = metrics.groupby(metrics.index).first()

In [200]:
metrics

Unnamed: 0_level_0,majority@1,recall@1,majority@2,recall@2,majority@3,recall@3,majority@5,recall@5,majority@8,recall@8
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Action,0.966516,0.966516,0.807469,0.980039,0.723117,0.987766,0.675467,0.993561,0.645203,0.997424
Adventure,0.98524,0.98524,0.850554,0.990775,0.809963,0.99631,0.761993,0.99631,0.750923,0.99631
Animation,0.994213,0.994213,0.956019,0.996528,0.939815,0.996528,0.928241,0.997685,0.924769,0.998843
Comedy,0.981245,0.981245,0.909977,0.990248,0.876969,0.995499,0.842461,0.996999,0.830458,0.99925
Crime,0.963889,0.963889,0.852778,0.969444,0.777778,0.975,0.713889,0.991667,0.691667,0.991667
Documentary,0.978873,0.978873,0.964789,0.978873,0.802817,0.978873,0.760563,0.978873,0.767606,0.978873
Drama,0.971191,0.971191,0.861496,0.985042,0.781717,0.993906,0.725762,0.99723,0.695845,0.998892
Family,0.9875,0.9875,0.915625,0.990625,0.865625,0.99375,0.8375,0.99375,0.8,0.996875
Fantasy,1.0,1.0,0.904382,1.0,0.824701,1.0,0.808765,1.0,0.800797,1.0
History,0.921053,0.921053,0.894737,0.947368,0.815789,0.947368,0.657895,0.973684,0.5,0.973684


In [187]:
recalls = ["recall@1", "recall@2", "recall@3", "recall@5", "recall@8"]

In [206]:
majorities = ["majority@1", "majority@3", "majority@8"]

In [207]:
metrics[majorities]

Unnamed: 0_level_0,majority@1,majority@3,majority@8
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Action,0.966516,0.723117,0.645203
Adventure,0.98524,0.809963,0.750923
Animation,0.994213,0.939815,0.924769
Comedy,0.981245,0.876969,0.830458
Crime,0.963889,0.777778,0.691667
Documentary,0.978873,0.802817,0.767606
Drama,0.971191,0.781717,0.695845
Family,0.9875,0.865625,0.8
Fantasy,1.0,0.824701,0.800797
History,0.921053,0.815789,0.5


In [208]:
print(metrics[majorities].to_latex(index=True, escape=False,
                                     sparsify=True,
        multirow=True,
        multicolumn=True,
        multicolumn_format='c',
        position='p',
        bold_rows=True))

\begin{table}[p]
\begin{tabular}{lrrr}
\toprule
 & majority@1 & majority@3 & majority@8 \\
index &  &  &  \\
\midrule
\textbf{Action} & 0.966516 & 0.723117 & 0.645203 \\
\textbf{Adventure} & 0.985240 & 0.809963 & 0.750923 \\
\textbf{Animation} & 0.994213 & 0.939815 & 0.924769 \\
\textbf{Comedy} & 0.981245 & 0.876969 & 0.830458 \\
\textbf{Crime} & 0.963889 & 0.777778 & 0.691667 \\
\textbf{Documentary} & 0.978873 & 0.802817 & 0.767606 \\
\textbf{Drama} & 0.971191 & 0.781717 & 0.695845 \\
\textbf{Family} & 0.987500 & 0.865625 & 0.800000 \\
\textbf{Fantasy} & 1.000000 & 0.824701 & 0.800797 \\
\textbf{History} & 0.921053 & 0.815789 & 0.500000 \\
\textbf{Horror} & 0.992274 & 0.866446 & 0.833333 \\
\textbf{Music} & 0.986111 & 0.847222 & 0.819444 \\
\textbf{Mystery} & 0.990826 & 0.871560 & 0.834862 \\
\textbf{Romance} & 0.992063 & 0.875661 & 0.838624 \\
\textbf{Science Fiction} & 0.986799 & 0.841584 & 0.795380 \\
\textbf{TV Movie} & 0.966667 & 0.666667 & 0.433333 \\
\textbf{Thriller} & 0.98782

In [198]:
fig = px.bar(metrics[majorities], x=metrics.index, y="majority@1", color=metrics.index, title="Genre Majority Vote @ 1 Neighbor")
fig.show()

In [199]:
fig = px.bar(metrics[majorities], x=metrics.index, y="majority@5", color=metrics.index, title="Genre Majority Vote @ 5 Neighbors")
fig.show()

In [209]:
recalls = ["recall@1", "recall@2", "recall@3", "recall@5", "recall@8"]

In [210]:
print(metrics[recalls].to_latex(index=True, escape=False,
                                     sparsify=True,
        multirow=True,
        multicolumn=True,
        multicolumn_format='c',
        position='p',
        bold_rows=True))

\begin{table}[p]
\begin{tabular}{lrrr}
\toprule
 & recall@1 & recall@3 & recall@8 \\
index &  &  &  \\
\midrule
\textbf{Action} & 0.966516 & 0.987766 & 0.997424 \\
\textbf{Adventure} & 0.985240 & 0.996310 & 0.996310 \\
\textbf{Animation} & 0.994213 & 0.996528 & 0.998843 \\
\textbf{Comedy} & 0.981245 & 0.995499 & 0.999250 \\
\textbf{Crime} & 0.963889 & 0.975000 & 0.991667 \\
\textbf{Documentary} & 0.978873 & 0.978873 & 0.978873 \\
\textbf{Drama} & 0.971191 & 0.993906 & 0.998892 \\
\textbf{Family} & 0.987500 & 0.993750 & 0.996875 \\
\textbf{Fantasy} & 1.000000 & 1.000000 & 1.000000 \\
\textbf{History} & 0.921053 & 0.947368 & 0.973684 \\
\textbf{Horror} & 0.992274 & 0.998896 & 0.998896 \\
\textbf{Music} & 0.986111 & 0.986111 & 1.000000 \\
\textbf{Mystery} & 0.990826 & 0.990826 & 0.990826 \\
\textbf{Romance} & 0.992063 & 0.997354 & 0.997354 \\
\textbf{Science Fiction} & 0.986799 & 0.990099 & 0.996700 \\
\textbf{TV Movie} & 0.966667 & 0.966667 & 0.966667 \\
\textbf{Thriller} & 0.987826 & 0.

In [211]:
fig = px.bar(metrics[["recall@1"]], x=metrics.index, y="recall@1", color=metrics.index, title="Recall @ 1 Neighbor")
fig.show()

In [221]:
import statistics

In [222]:
statistics.mean([ x[0] for x in metrics[["recall@1"]].values.tolist()])

0.9770602564180475

In [223]:
statistics.mean([ x[0] for x in metrics[["majority@5"]].values.tolist()])

0.7758067652164634