In [None]:
## TODOS POST DEADLINE
#1. compute the per-class soft-similarities via numpy (to validate current results)
#2. long tail analysis with the soft-metrics
#3. triplet-based results

In [16]:
import pandas as pd
import numpy as np
from collections import defaultdict

In [17]:
in_csv_file = '/home/optas/DATA/OUT/ltvrd/similarities.csv'
similarities_used = ['jcn_similarity', 'word2vec_GNews', 'word2vec_visualVG']

In [109]:
df = pd.read_csv(in_csv_file)

In [110]:
## Clip similarities to [-1, 1] range.
for sim in similarities_used:
    too_large = df[sim] > 1    
    df.loc[too_large, sim] = 1
    too_small = df[sim] < -1
    df.loc[too_small, sim] = -1

In [115]:
import os.path  as osp
# How can we do the long tail with the soft? 
# we pick the winner from our table (most likely a hubness) and the baseline.
# I want a csv that tells me for every prediction (rel. or sbj.)
# a) its class, (optionally the class-frequency), and the top-1 or top-5 [choose]
# score per a soft matrix.

sim = 'word2vec_GNews' # 
workon = 'subject' # or relations
top_aux_data_dir = '/home/optas/DATA/OUT/ltvrd'

freq_info = pd.read_csv(osp.join(top_aux_data_dir, 'gvqa_{}_to_train_freq.csv'.format(workon)))
freq_info_dict = dict()

if workon == 'subject':
    x, y = freq_info.gt_sbj, freq_info.sbj_freq_gt
elif workon == 'relations':
    x, y = freq_info.gt_rel, freq_info.rel_freq_gt

for k, v in zip(x, y):
    freq_info_dict[k] = v

ndf = df[df['i'] == 0][[sim, 'gold']]
g = ndf.groupby('gold')
average_per_class = g[sim].mean().reset_index()
average_per_class['frequency'] = average_per_class['gold'].apply(lambda x: freq_info_dict[x])
average_per_class = average_per_class.sort_values('frequency', ascending=False)
average_per_class.to_csv('vgqa_sorted_mean_acc_on_{}_with_{}.csv'.format(workon, sim))

In [5]:
print('Average Rank (ignoring misses >250, with pandas)', df['i'][df['exact_match'] == 1].mean())

('Average Rank (ignoring misses >250, with pandas)', 1.7125)


In [6]:
hits = []
hits_per_class = defaultdict(list)
totally_missed = 0
miss_penalty = 250 # penalty if you didn't find it in the 250-cases

for k in range(0, len(df)-250, 250):
    
    i_th_slice = df.loc[k: k+250-1]
    assert i_th_slice['i'].min() == 0
    assert i_th_slice['i'].max() == 249    
    
    hit = np.where(i_th_slice['exact_match'] == 1.0)[0]    
    assert len(hit) in [0, 1]
    
    i_th_class = i_th_slice['gold'].unique()
    assert len(i_th_class) == 1
    i_th_class = i_th_class[0]
    
    if len(hit) == 1:
        hit = hit[0]
        hits.append(hit)
        hits_per_class[i_th_class].append(hit)
    else:
        totally_missed += 1
        if miss_penalty > 0:
            hits_per_class[i_th_class].append(miss_penalty)
        
print('Average Rank (ignoring misses > 250):  {:.3f}'.format(np.mean(hits)))
print('Total observations:', len(hits))
print('Missed in top 250:', totally_missed)
penalty_mu = np.mean(hits + [miss_penalty] * totally_missed)
print('Average Rank (with miss penalty {}): {:.3f}'.format(miss_penalty, penalty_mu))

p_class_mu = []
for h in hits_per_class:
    p_class_mu.append(np.mean(hits_per_class[h]))    
print('Per Class Average Rank (with miss penalty {})  {:.3f}'.format(miss_penalty, np.mean(p_class_mu)))

Average Rank (ignoring misses > 250):  1.734
('Total observations:', 79)
('Missed in top 250:', 0)
Average Rank (with miss penalty 250): 1.734
Per Class Average Rank (with miss penalty 250)  2.681


In [7]:
g = df.groupby(['gold'])
means_of_groups = []
for k, v in g.groups.items():
    group_content = df.loc[v]
    group_i = group_content[group_content['exact_match'] == 1.0]['i']
    if len(group_i) > 0:
        group_mean = group_i.mean()
        means_of_groups.append(group_mean)
    else:
        pass # Missed element, ignore.
    
print ('Per-class Average Rank (ignore missed): {:.3f}'.format(np.mean(means_of_groups)))

Per-class Average Rank (ignore missed): 2.681


In [8]:
for sim in similarities_used:
    mu = df[df['i'] == 0][sim].mean()
    print ('Average similarity per metric at Top-1', sim, '{:.3f}'.format(mu))

('Average similarity per metric at Top-0', 'jcn_similarity', '0.806')
('Average similarity per metric at Top-0', 'word2vec_GNews', '0.845')
('Average similarity per metric at Top-0', 'word2vec_visualVG', '0.815')


In [15]:
for sim in similarities_used:    
    for k in [1]:
        ndf = df[df['i'] < k]
        max_per_image_gold = ndf.groupby(['image_id', 'gold'])[sim].max() # Missed Corner-case (bag/bag in same image.)
        mu = max_per_image_gold.groupby('gold').mean().mean()
        print ('Per-class similarity per metric at Top-{}'.format(k), sim, '{:.3f}'.format(mu))

('Per-class similarity per metric at Top-1', 'jcn_similarity', '0.773')
('Per-class similarity per metric at Top-1', 'word2vec_GNews', '0.770')
('Per-class similarity per metric at Top-1', 'word2vec_visualVG', '0.794')
