In [1]:
import pandas as pd
indices = pd.read_pickle("indices_museum_dataset.pkl")
indices['train'][:10], indices['val'][:10], indices['test'][:10],

(tensor([ 244, 2066, 2390,  491, 1661, 1910,  432, 1716,  561, 1514]),
 tensor([1512, 1029,  136, 1034, 2774,  853,  844, 1302, 1699, 2537]),
 tensor([  87,  376, 2034,  228, 2892,  125, 2048, 1906, 2734, 2307]))

In [2]:
from torch.utils.data import Dataset
import torch
import os

class DescriptionSceneMuseum(Dataset):
    def __init__(self, data_description_path, data_raw_description_path, data_scene_path, data_art_path, indices, split, customized_margin=False):
        self.description_path = data_description_path
        self.raw_description_path = data_raw_description_path
        self.data_pov_path = data_scene_path
        self.indices = indices[split]
        self.split = split

        available_data = [im.strip(".pt") for im in os.listdir(data_scene_path)]
        available_data = sorted(available_data)
        available_data = [available_data[ix] for ix in self.indices.tolist()]

        self.descs = [torch.load(os.path.join(data_description_path, f"{sm}.pt")) for sm in available_data]
        self.raw_descs = [" ".join(pd.read_pickle(os.path.join(data_raw_description_path, f"{sm}.pkl"))) for sm in available_data]
        self.pov_images = [torch.load(os.path.join(data_scene_path, f"{sm}.pt")) for sm in available_data]
        self.art_vectors = [torch.load(os.path.join(data_art_path, f"{sm}.pt")) for sm in available_data]
        self.names = available_data
        print(f"'{split.upper()}': {len(self.names)} names, "
              f"{len(self.descs)} sentences ({sum([len(x) for x in self.descs]) / len(self.descs)} avg), "
              f"{len(self.pov_images)} images ({sum([len(x) for x in self.pov_images]) / len(self.pov_images)} avg).")

    def __len__(self):
        return len(self.names)

    def __getitem__(self, index):
        desc_tensor = self.descs[index]
        raw_desc = self.raw_descs[index]
        scene_img_tensor = self.pov_images[index]
        scene_art_tensor = self.art_vectors[index]
        name = self.names[index]

        return desc_tensor, scene_img_tensor, scene_art_tensor, raw_desc, name, index

In [3]:
visual_backbone = "rn50"
device = "cuda:0"
visual_bb_ftsize_k = {'rn18': 512, 'rn34': 512, 'rn50': 2048, 'rn101': 2048, 'vitb16': 768, 'vitb32': 768, 'openclip': 512}
visual_bb_ftsize = visual_bb_ftsize_k[visual_backbone]

In [4]:
test_dataset = DescriptionSceneMuseum("./tmp_museums/open_clip_features_museums3k/descriptions/sentences", 
                                   "./tmp_museums/open_clip_features_museums3k/descriptions/tokens_strings", 
                                   "./tmp_museums/open_clip_features_museums3k/images",
                                   f"./preextracted_vectors_wikiart_{visual_backbone}",
                            indices, "test")


'TEST': 450 names, 450 sentences (170.16444444444446 avg), 450 images (77.38666666666667 avg).


In [5]:
import torch.nn as nn
class MyHierBaseline_v2(nn.Module):
    def __init__(self, in_channels, out_channels, feature_size, art_features_size=2048):
        super().__init__()
        self.trf_photo = nn.Linear(in_channels+art_features_size, out_channels)
        self.trf_room = nn.Linear(out_channels, out_channels)
        self.relu = nn.ReLU()
        self.trf_museum = nn.Linear(out_channels, feature_size)

    def forward(self, x, x_art, list_length=None, clip_mask=None, imgs_per_room=None):
        x = x.to(torch.float32)
        
        x1 = self.trf_photo(torch.cat((
            x.transpose(1, 2), 
            x_art.transpose(1, 2)
        ), -1))
        
        if clip_mask is not None:
            x1 = x1 * clip_mask
        # remove the effect of the padding
        if list_length is not None:
            for item_idx in range(x.shape[0]):
                x1[item_idx, list_length[item_idx]:, :] = 0
        x1_img = self.relu(x1)
        
        bsz, max_n_imgs, ft_size = x1_img.shape
        if isinstance(imgs_per_room, int):
            n_rooms = max_n_imgs // imgs_per_room
            x1_room = x1_img.view(bsz, n_rooms, imgs_per_room, ft_size)
            
            # we aggregate the "image-level" information, and learn room-level information
            x1_room = x1_room.mean(2)
            x1_room = self.trf_room(x1_room)
            x1_room = self.relu(x1_room)
            
            # then, we aggregate the room-level info, and learn a museum-level representation
            assert list_length is not None
            x1_museum = x1_room.clone()
            for item_idx in range(x.shape[0]):
                x1_museum[item_idx, list_length[item_idx]:, :] = 0
            list_length_t = torch.tensor(list_length, device=x1_room.device) if isinstance(list_length, list) else list_length.to(x1_room.device)
            x1_museum = x1_museum.sum(1) / (list_length_t / imgs_per_room).view(-1, 1)
            x1_museum = self.trf_museum(x1_museum)
        
        else:
            assert False, imgs_per_room
            x1 = x1.sum(1) / (x1.sum(-1) > 0).sum(1).unsqueeze(-1)
            x1 = x1.view(x1.size(0), -1)
            x1 = self.trf_museum(x1)
        return x1_img, x1_room, x1_museum

In [6]:
class GRUNet(nn.Module):
    def __init__(self, hidden_size, num_features, is_bidirectional=False):
        super(GRUNet, self).__init__()
        self.gru = nn.GRU(input_size=num_features, hidden_size=hidden_size, batch_first=True,
                          bidirectional=is_bidirectional)
        self.is_bidirectional = is_bidirectional

    def forward(self, x):
        x = x.to(torch.float32)
        _, h_n = self.gru(x)
        if self.is_bidirectional:
            return h_n.mean(0)
        return h_n.squeeze(0)
    
class MyBaseline(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.trf_photo = nn.Linear(in_channels, out_channels)
        self.trf_mean = nn.Linear(out_channels, out_channels)
        self.relu = nn.ReLU()

    def forward(self, x, list_length=None, clip_mask=None, imgs_per_room=None):
        x = x.to(torch.float32)
        
        x1 = self.trf_photo(x.transpose(1, 2))
        
        if clip_mask is not None:
            x1 = x1 * clip_mask
        # remove the effect of the padding
        if list_length is not None:
            for item_idx in range(x.shape[0]):
                x1[item_idx, list_length[item_idx]:, :] = 0
        x1_img = self.relu(x1)
        
        bsz, max_n_imgs, ft_size = x1_img.shape
        list_length_t = torch.tensor(list_length, device=x1_img.device) if isinstance(list_length, list) else list_length.to(x1_img.device)
        x1_mean = x1_img.sum(1) / list_length_t.unsqueeze(1)
        x1_museum = self.trf_mean(x1_mean)
        return x1_museum

In [7]:
import numpy as np 
def cosine_sim(im, s):
    '''cosine similarity between all the image and sentence pairs
    '''
    inner_prod = im.mm(s.t())
    im_norm = torch.sqrt((im ** 2).sum(1).view(-1, 1) + 1e-18)
    s_norm = torch.sqrt((s ** 2).sum(1).view(1, -1) + 1e-18)
    sim = inner_prod / (im_norm * s_norm)
    return sim


def create_rank(result, entire_descriptor, desired_output_index):
    similarity = torch.nn.functional.cosine_similarity(entire_descriptor, result, dim=1)
    similarity = similarity.squeeze()
    sorted_indices = torch.argsort(similarity, descending=True)
    position = torch.where(sorted_indices == desired_output_index)
    return position[0].item(), sorted_indices


def evaluate(output_description, output_scene, section, out_values=False, excel_format=False):
    avg_rank_scene = 0
    ranks_scene = []
    avg_rank_description = 0
    ranks_description = []

    ndcg_10_list = []
    ndcg_entire_list = []

    for j, i in enumerate(output_scene):
        rank, sorted_list = create_rank(i, output_description, j)
        avg_rank_scene += rank
        ranks_scene.append(rank)

    for j, i in enumerate(output_description):
        rank, sorted_list = create_rank(i, output_scene, j)
        avg_rank_description += rank
        ranks_description.append(rank)

    ranks_scene = np.array(ranks_scene)
    ranks_description = np.array(ranks_description)

    n_q = len(output_scene)
    sd_r1 = 100 * len(np.where(ranks_scene < 1)[0]) / n_q
    sd_r5 = 100 * len(np.where(ranks_scene < 5)[0]) / n_q
    sd_r10 = 100 * len(np.where(ranks_scene < 10)[0]) / n_q
    sd_medr = np.median(ranks_scene) + 1
    sd_meanr = ranks_scene.mean() + 1

    n_q = len(output_description)
    ds_r1 = 100 * len(np.where(ranks_description < 1)[0]) / n_q
    ds_r5 = 100 * len(np.where(ranks_description < 5)[0]) / n_q
    ds_r10 = 100 * len(np.where(ranks_description < 10)[0]) / n_q
    ds_medr = np.median(ranks_description) + 1
    ds_meanr = ranks_description.mean() + 1

    ds_out, sc_out = "", ""
    for mn, mv in [["R@1", ds_r1],
                   ["R@5", ds_r5],
                   ["R@10", ds_r10],
                   ["median rank", ds_medr],
                   ["mean rank", ds_meanr],
                   ]:
        ds_out += f"{mn}: {mv:.4f}   "

    for mn, mv in [("R@1", sd_r1),
                   ("R@5", sd_r5),
                   ("R@10", sd_r10),
                   ("median rank", sd_medr),
                   ("mean rank", sd_meanr),
                   ]:
        sc_out += f"{mn}: {mv:.4f}   "

    if out_values:
        print(section + " data: ")
        print("Scenes ranking: " + ds_out)
        print("Descriptions ranking: " + sc_out)
    if section == "test" and len(ndcg_10_list) > 0:
        avg_ndcg_10_entire = 100 * sum(ndcg_10_list) / len(ndcg_10_list)
        avg_ndcg_entire = 100 * sum(ndcg_entire_list) / len(ndcg_entire_list)
    else:
        avg_ndcg_10_entire = -1
        avg_ndcg_entire = -1
    
    if excel_format:
        print("-"*5)
        print("{ds_r1};{ds_r5};{ds_r10};{sd_r1};{sd_r5};{sd_r10};{ds_medr};{sd_medr}")
        print(f"{ds_r1};{ds_r5};{ds_r10};{sd_r1};{sd_r5};{sd_r10};{ds_medr};{sd_medr}")
        print("-"*5)
        formatted_string = f"{ds_r1};{ds_r5};{ds_r10};{sd_r1};{sd_r5};{sd_r10};{ds_medr};{sd_medr}"
        return ds_r1, ds_r5, ds_r10, sd_r1, sd_r5, sd_r10, avg_ndcg_10_entire, avg_ndcg_entire, ds_medr, sd_medr, formatted_string        
    
    return ds_r1, ds_r5, ds_r10, sd_r1, sd_r5, sd_r10, avg_ndcg_10_entire, avg_ndcg_entire, ds_medr, sd_medr


In [8]:
weights_vis = torch.load("models/TQFQD/hierarchical_v2_art_vectors_rn50_1.pt")
model_pov = MyHierBaseline_v2(in_channels=512, out_channels=256, feature_size=256, art_features_size=visual_bb_ftsize)

model_pov.load_state_dict(weights_vis['best_model_0'])
model_pov.to(device)

MyHierBaseline_v2(
  (trf_photo): Linear(in_features=2560, out_features=256, bias=True)
  (trf_room): Linear(in_features=256, out_features=256, bias=True)
  (relu): ReLU()
  (trf_museum): Linear(in_features=256, out_features=256, bias=True)
)

In [9]:
model_desc_pov = GRUNet(hidden_size=256, num_features=512, is_bidirectional=True)
model_desc_pov.load_state_dict(weights_vis['best_model_1'])
model_desc_pov.to(device)

GRUNet(
  (gru): GRU(512, 256, batch_first=True, bidirectional=True)
)

In [10]:
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
def collate_fn(data):  # data -> desc_tensor, scene_img_tensor, name, index
    raw_descs = False
    adj = 0
    if len(data[0]) == 6:  # train -> raw descriptions
        raw_descs = True
        adj = 1

    tmp_description_povs = [x[0] for x in data]
    tmp = pad_sequence(tmp_description_povs, batch_first=True)
    descs_pov = pack_padded_sequence(tmp,
                                     torch.tensor([len(x) for x in tmp_description_povs]),
                                     batch_first=True,
                                     enforce_sorted=False)

    tmp_pov = [x[1] for x in data]
    len_pov = torch.tensor([len(x) for x in tmp_pov])
    padded_pov = pad_sequence(tmp_pov, batch_first=True)
    padded_pov = torch.transpose(padded_pov, 1, 2)

    tmp_art = [x[2] for x in data]
    len_art = torch.tensor([len(x) for x in tmp_art])
    padded_art = pad_sequence(tmp_art, batch_first=True)
    padded_art = torch.transpose(padded_art, 1, 2)

    if raw_descs:
        raw_descs = [x[3] for x in data]
    names = [x[3+adj] for x in data]
    indexes = [x[4+adj] for x in data]
    
    if raw_descs:
        return descs_pov, padded_pov, padded_art, raw_descs, names, indexes, len_pov
    else:
        return descs_pov, padded_pov, padded_art, names, indexes, len_pov

In [11]:
from torch.utils.data import DataLoader
batch_size = 32
test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=False, num_workers=4)
test_names = list()
model_pov.eval()
model_desc_pov.eval()
output_description_test = torch.empty(len(indices['test']), 256)
output_pov_test = torch.empty(len(indices['test']), 256)
with torch.no_grad():
    for j, (data_desc_pov, data_pov, data_art, raw_descs, names, indexes, len_pov) in enumerate(test_loader):

        data_desc_pov = data_desc_pov.to(device)
        data_pov = data_pov.to(device)
        data_art = data_art.to(device)

        test_names.extend(names)

        bsz, fts, no_room_times_no_imgs = data_pov.shape

        output_desc_pov = model_desc_pov(data_desc_pov)
        output_pov_img_level, output_pov_room_level, output_pov = model_pov(data_pov, data_art, len_pov, imgs_per_room=12)

        initial_index = j * batch_size
        final_index = (j + 1) * batch_size
        if final_index > len(indices['test']):
            final_index = len(indices['test'])
        output_description_test[initial_index:final_index, :] = output_desc_pov
        output_pov_test[initial_index:final_index, :] = output_pov


In [12]:
output_pov_test.shape

torch.Size([450, 256])

In [13]:
evaluate(output_description_test, output_pov_test, "test")

(48.666666666666664,
 82.88888888888889,
 91.33333333333333,
 45.55555555555556,
 81.55555555555556,
 90.22222222222223,
 -1,
 -1,
 2.0,
 2.0)

In [14]:
import open_clip
clip, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32-quickgelu', pretrained='laion400m_e32')
tokenizer = open_clip.get_tokenizer('ViT-B-32-quickgelu')


In [15]:
def cosine_sim(im, s):
    '''cosine similarity between all the image and sentence pairs
    '''
    inner_prod = im.mm(s.t())
    im_norm = torch.sqrt((im ** 2).sum(1).view(-1, 1) + 1e-18)
    s_norm = torch.sqrt((s ** 2).sum(1).view(1, -1) + 1e-18)
    sim = inner_prod / (im_norm * s_norm)
    return sim

In [16]:
from collections import Counter

import string
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

import nltk
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/afalcon/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/afalcon/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [17]:
weights_vis = torch.load("models/L6U2M/mean_pool_baseline_2.pt")
model_pov_base = MyBaseline(in_channels=512, out_channels=256)
model_pov_base.load_state_dict(weights_vis['best_model_0'])
model_pov_base.to(device)

model_desc_pov_base = GRUNet(hidden_size=256, num_features=512, is_bidirectional=True)
model_desc_pov_base.load_state_dict(weights_vis['best_model_1'])
model_desc_pov_base.to(device)

from torch.utils.data import DataLoader
batch_size = 32
test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=False, num_workers=4)
test_names = list()
model_pov_base.eval()
model_desc_pov_base.eval()
output_description_test_base = torch.empty(len(indices['test']), 256)
output_pov_test_base = torch.empty(len(indices['test']), 256)
with torch.no_grad():
    for j, (data_desc_pov, data_pov, data_art, raw_descs, names, indexes, len_pov) in enumerate(test_loader):

        data_desc_pov = data_desc_pov.to(device)
        data_pov = data_pov.to(device)
        data_art = data_art.to(device)

        test_names.extend(names)

        bsz, fts, no_room_times_no_imgs = data_pov.shape

        output_desc_pov = model_desc_pov_base(data_desc_pov)
        output_pov = model_pov_base(data_pov, len_pov)

        initial_index = j * batch_size
        final_index = (j + 1) * batch_size
        if final_index > len(indices['test']):
            final_index = len(indices['test'])
        output_description_test_base[initial_index:final_index, :] = output_desc_pov
        output_pov_test_base[initial_index:final_index, :] = output_pov


In [18]:
evaluate(output_description_test, output_pov_test, "test")

(48.666666666666664,
 82.88888888888889,
 91.33333333333333,
 45.55555555555556,
 81.55555555555556,
 90.22222222222223,
 -1,
 -1,
 2.0,
 2.0)

In [19]:
evaluate(output_description_test_base, output_pov_test_base, "test")

(21.77777777777778,
 50.888888888888886,
 69.55555555555556,
 21.11111111111111,
 49.111111111111114,
 64.88888888888889,
 -1,
 -1,
 5.0,
 6.0)

In [20]:
n_most_common = 5
n_top_k = 1

In [21]:
gt_concepts = []
bad_words =  [
    '1533', 'painting', 'paintings', 'one', 'two', 'titled', 'described', 'follows', 'work', 'room', 'picture', 'csontv', 'last', 'late', 'later', 'ois', 'period',
    'right', 'left', 'panels', 'painted', 'scene', 'first', '\'1\'', '1', 'figures', 'scenes', 'shows', 'style', 'school', '``', 'elements', 'form', 'found', 'seven', 'may',
    "''", 'wall', '2', 'st', 'van', 'made', 'head', 'di', 'genre', 'four', 'three', 'panel', 'also', 'artist', 'new', 'de', 'commissioned', 'frame', 'half', 'image',
    'pictures', 'self', 'considered', 'view', 'grand', 'main', 'painter', 'rer', 'end', 'art', 'signed', 'subject', 'figure', 'figures', 'first', 'second', 'following',
    'side', 'whose', 'seen', 'known', '000', '3', 'acts', 'age', 'almost', 'another', 'although', 'appears', 'around', 'artists', 'background', 'behind', 'brothers', 'c', 
    'centre', 'ceiling', 'central', 'could', 'corner', 'da', 'dated', 'del', 'dell', 'della', 'der', 'du', 'evident', 'f', 'example', 'famous', 'five', 'foreground', 'many',
    'fully', 'g', 'good', 'great', 'high', 'however', 'ii', 'important', 'influence', 'le', 'like', 'little', 'lot', 'lotto', 'often', 'painters', 'placed', 'probably', 'use',
    'produced', 'represented', 'represents', 'representing', 'shown', 'six', 'time', 'toward', 'towards', 'twenty', 'way', 'works', 'depicted', 'depicts', 'different', 'see',
    'us', 'career', 'collection', 'composition', 'construction', 'early', 'even', 'exchange', 'master', 'middle', 'museum', 'rard', 'rati', 'several', 'sts', 'ry', 'version',
    'viewer', 'year', 'years', '25', '35', '6', 'among', 'along', 'appear', 'attributed', 'back', 'based', 'became', 'began', 'belonged', 'beside', 'bottom', 'called', 'calling', 'came',
    'characters', 'cm', 'compartments', 'created', 'dei', 'depicting', 'detail', 'displayed', 'either', 'el', 'earlier', 'es', 'especially', 'existence', 'features', 'exterior', 'interior',
    'holding', 'identified', 'intended', 'length', 'lent', 'lived', 'long', 'ly', 'madame', 'much', 'near', 'perhaps', 'place', 'point', 'popular', 'portrayed', 'r', 'rather', 'rd',
    'raire', 'register', 'return', 'reverse', 'rooms', 'seem', 'show', 'sides', 'similar','something', 'spectator', 'state', 'study', 'subjects', 'surrounded', 'sz',
    'taken', 'though', 'together', 'upper', 'v', 'von', 'without', 'would', 'x'
]

def filter_proc(_words, most_common=15):
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in _words if word not in stop_words and word not in string.punctuation and word not in bad_words]

    word_counts = Counter(filtered_words)

    most_common_words = word_counts.most_common(most_common)
    most_common_words = [n for (n, c) in most_common_words]
    return most_common_words

for ix in range(450):
    raw_desc = test_dataset[ix][-3]
    words = word_tokenize(raw_desc.lower())

    most_common_words = filter_proc(words, n_most_common)
    for _ in range(n_top_k):
        gt_concepts.append(", ".join(most_common_words))

print(gt_concepts[:5])

['landscape, religious, life, still, italian', 'life, portrait, french, still, altarpiece', 'portrait, italian, landscape, religious, century', 'christ, portrait, mary, italian, flemish', 'portrait, life, landscape, italian, studio']


In [22]:
baseline_concepts = []
top1r = 0
for ix in range(450):
    query = test_dataset[ix][0]
    query_fts = model_desc_pov_base(query.to(device))

    ext_res = torch.topk(cosine_sim(output_pov_test_base.to(device), query_fts.float().unsqueeze(0)), k=n_top_k, dim=0)
    if ext_res[1][0].item() == ix:
        top1r += 1
    for other_ix in ext_res[1]:
        rd = test_dataset[other_ix.item()][-3]
        words = word_tokenize(rd.lower())
        most_common_words = filter_proc(words, n_most_common)
        baseline_concepts.append(", ".join(most_common_words))
print(top1r)

98


In [23]:
hierartex_concepts = []
_top_1_indices = []
_top_1_museum_names = []
top1r = 0
for ix in range(450):
    query = test_dataset[ix][0]
    query_fts = model_desc_pov(query.to(device))

    ext_res = torch.topk(cosine_sim(output_pov_test.to(device), query_fts.float().unsqueeze(0)), k=n_top_k, dim=0)
    _top_1_indices.append(ext_res[1][0].item())
    _top_1_museum_names.append(test_dataset[ext_res[1][0].item()][-2])
    if ext_res[1][0].item() == ix:
        top1r += 1
        
    for other_ix in ext_res[1]:
        rd = test_dataset[other_ix.item()][-3]
        words = word_tokenize(rd.lower())
        most_common_words = filter_proc(words, n_most_common)
        hierartex_concepts.append(", ".join(most_common_words))

print(top1r)

219


In [24]:
def count_matches(gt, pred):
    cnt = 0
    for concept in gt.split(","):
        for p in pred.split(","):
            if concept.strip() in p.strip():
                cnt += 1
    return cnt

wins, draws, losses = 0, 0, 0
if n_top_k == 1:
    for ix in range(450):
        h_cn = count_matches(gt_concepts[ix], hierartex_concepts[ix])
        b_cn = count_matches(gt_concepts[ix], baseline_concepts[ix])
        
        if h_cn > b_cn:
            wins += 1
        elif h_cn == b_cn:
            draws += 1
        elif b_cn > h_cn:
            losses += 1

else:        
    for ix in range(450):
        cum_h, cum_b = 0, 0
        for it in range(n_top_k):
            h_cn = count_matches(gt_concepts[ix*n_top_k+it], hierartex_concepts[ix*n_top_k+it])
            b_cn = count_matches(gt_concepts[ix*n_top_k+it], baseline_concepts[ix*n_top_k+it])
            cum_h += h_cn
            cum_b += b_cn
            
        if cum_h > cum_b:
            wins += 1
        elif cum_h == cum_b:
            draws += 1
        elif cum_b > cum_h:
            losses += 1

print(f"wins {wins}, draws {draws}, losses {losses}")
print(f"{wins}/{draws}/{losses}")


wins 228, draws 116, losses 106
228/116/106


In [25]:
chosen_idx = 255  # 69, 42, 255
_top_1_indices[chosen_idx], _top_1_museum_names[chosen_idx], gt_concepts[chosen_idx], hierartex_concepts[chosen_idx]

(190,
 'Museum1798-7.unity',
 'portrait, landscape, italian, christ, life',
 'french, italian, portrait, religious, life')

In [26]:
concepts = set()
for gs in gt_concepts:
    for g in gs.split(","):
        concepts.add(g.strip())

print(len(concepts))
concepts

156


{'adam',
 'aeneas',
 'agnes',
 'altar',
 'altarpiece',
 'andrew',
 'angels',
 'angeluccio',
 'anguissola',
 'anthony',
 'apollo',
 'arms',
 'baptist',
 'barak',
 'bardi',
 'begat',
 'bianca',
 'blind',
 'blue',
 'bridge',
 'campin',
 'cano',
 'caravaggio',
 'cardinal',
 'carlevaris',
 'carlo',
 'catherine',
 'cecilia',
 'cell',
 'century',
 'chapel',
 'child',
 'children',
 'choir',
 'christ',
 'church',
 'city',
 'claude',
 'colonna',
 'constable',
 'courbet',
 'cranach',
 'cross',
 'cupid',
 'cuyp',
 'cycle',
 'david',
 'death',
 'decoration',
 'diderot',
 'dutch',
 'esther',
 'fabritius',
 'fall',
 'family',
 'father',
 'ferdinand',
 'fischer',
 'flemish',
 'florence',
 'flower',
 'fountain',
 'francesco',
 'francia',
 'francis',
 'french',
 'fresco',
 'frescoes',
 'fruit',
 'garden',
 'gauguin',
 'gentile',
 'german',
 'giorgione',
 'giovanni',
 'giuseppe',
 'goethe',
 'gogh',
 'gonzaga',
 'hall',
 'haman',
 'hand',
 'hecuba',
 'holofernes',
 'holy',
 'horses',
 'italian',
 'jacob'