In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

In [3]:
def save_notebook():
    return display(Javascript("IPython.notebook.save_notebook()"),
                   include=['application/javascript'])

def output_HTML(read_file, output_file):
    from nbconvert import HTMLExporter
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

In [4]:
import sys, os
import glob
import warnings
sys.path.append('../')

from rankutils.rIO import read_rank
from rankutils.cfgloader import cfgloader
from rankutils.drawing import heatmap, annotate_heatmap
from rankutils.mappings import exp_aliases

import numpy as np
import pandas

import matplotlib.pyplot as plt

from scipy.stats import kendalltau

from tqdm import tqdm_notebook as tqdm
from tqdm import tnrange

In [5]:
pathcfg = cfgloader("/home/alberto/phD/projects/performance_prediction/ret-mr-learning/source/path_2.cfg")
dataset = "oxford"
#keys = ['imagenet_001', 'imagenet_002', 'imagenet_003', 'imagenet_004']
#keys = ['places365_001', 'places365_002', 'places365_003', 'places365_004', 'places365_005']
#keys = ['vggfaces_001', 'vggfaces_002', 'vggfaces_003', 'vggfaces_004', 'vggfaces_005']
keys = ['unicamp_004', 'unicamp_005']
topk=10

In [6]:
pathmap = dict()
labelmap = dict()

for s in keys:
    pathmap[pathcfg.get(s, 'rktpdir')] = glob.glob(pathcfg.get(s, 'rank') + "*.rk")
    pathmap[pathcfg.get(s, 'rktpdir')].sort()
    
    labelpref = ".".join(s.split('_', 1))
    aux = glob.glob(pathcfg.get(s, 'label') + "{0:s}*".format(labelpref))
    labelmap[pathcfg.get(s, 'rktpdir')] = np.load(aux[0])[:, 0:topk]

nk = len(keys)

iou_table = np.zeros((nk, nk))

In [7]:
keys = list(pathmap.keys())
nranks = len(pathmap[keys[0]])

valmap = dict()

for k in keys:
    valmap[k] = []    
    for x in tqdm(range(nranks), desc=k, total=nranks):
        aux = read_rank(pathmap[k][x], colname='name')
        valmap[k].append(np.array(aux[0:topk]))
        del aux

HBox(children=(IntProgress(value=0, description='004.resnetv2-L2', max=360), HTML(value='')))




HBox(children=(IntProgress(value=0, description='005.vgg16-L2', max=360), HTML(value='')))




In [8]:
#keys = ['001.vgg16-L2sq', '003.vgg16-Cos', '004.vgg16-Cheby', '005.vgg16-Canb']
#keys = ['imagenet_001', 'imagenet_002', 'imagenet_003', 'imagenet_004']
keys = list(pathmap.keys())
nk = len(keys)

data = {'# rel A':[],
        '# rel B':[],
        'subset':[],
        'jaccard idx':[],
        'kendall tau':[]}

kA = keys[0]
kB = keys[1]

print("A:", kA)
print("B:", kB)

labels_A = labelmap[kA]
labels_B = labelmap[kB]

for x in range(nranks):

    vals_A = valmap[kA][x]
    vals_B = valmap[kB][x]

    relidx_A = np.nonzero(labels_A[x] == 1)[0]
    relidx_B = np.nonzero(labels_B[x] == 1)[0]
    
    data['# rel A'].append(relidx_A.size)
    data['# rel B'].append(relidx_B.size)
    
    if relidx_A.size >= relidx_B.size:
        data['subset'].append(int(np.all(np.isin(vals_B[relidx_B], vals_A[relidx_A]))))
    else:
        data['subset'].append(int(np.all(np.isin(vals_A[relidx_A], vals_B[relidx_B]))))

    inters = np.intersect1d(vals_A[relidx_A], vals_B[relidx_B])
    uni = np.union1d(vals_A[relidx_A], vals_B[relidx_B])

    kt, _ = kendalltau(vals_A, vals_B)

    try:
        data['jaccard idx'].append(float(inters.size)/float(uni.size))
    except ZeroDivisionError:
        data['jaccard idx'].append(1)

    data['kendall tau'].append(kt)

df = pandas.DataFrame(data)
print(df[df['subset'] == 0].describe())
df[df['subset'] == 0]

A: 004.resnetv2-L2
B: 005.vgg16-L2




          # rel A     # rel B  subset  jaccard idx  kendall tau
count  258.000000  258.000000   258.0   258.000000   258.000000
mean     7.131783    6.891473     0.0     0.384477     0.132817
std      2.689885    2.598234     0.0     0.169512     0.265083
min      2.000000    2.000000     0.0     0.076923    -0.555556
25%      5.000000    5.000000     0.0     0.250000    -0.066667
50%      7.000000    7.000000     0.0     0.357143     0.111111
75%     10.000000   10.000000     0.0     0.500000     0.333333
max     10.000000   10.000000     0.0     0.818182     0.777778


Unnamed: 0,# rel A,# rel B,subset,jaccard idx,kendall tau
3,4,4,0,0.600000,0.466667
10,9,7,0,0.600000,0.022222
12,7,7,0,0.750000,0.244444
13,7,7,0,0.750000,-0.200000
15,9,8,0,0.416667,0.333333
17,5,6,0,0.571429,0.555556
18,5,6,0,0.571429,0.422222
19,5,5,0,0.666667,0.644444
20,8,5,0,0.444444,0.333333
21,6,5,0,0.571429,0.288889
