In [1]:
from __future__ import division
import pandas as pd
import numpy as np
import gzip
import cPickle as pickle

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.precision', 2)

Measures I'd like to have:

* acc | mrr | acv | rnd , for rel and no rel
* for rel: how much has been dropped? (Probably easiest if both is one line.)
* **acc as function of utterance length (for bins, 1-2, 2-3, 4-, ?)**
* as function of vocab size? (Ok, that would have to be done as separate eval, only summarised here)

In [180]:
def is_iou_over_threshold_top_n(row, threshold=0.5, n=1, random=False):
    if np.isnan(row['nob']):
        return np.nan
    if random:
        return np.any(np.array(row['ious'])[np.random.choice(range(len(row['ious'])), n)] > threshold)
    return np.any(np.array(row['ious'])[row['rnk']][:n] > threshold)

In [3]:
with gzip.open('EvalOut/results-20160313.pklz', 'r') as f:
    results = pickle.load(f)

In [12]:
with gzip.open('EvalOut/results-top20.pklz', 'r') as f:
    results.extend(pickle.load(f))

In [189]:
with gzip.open('EvalOut/results-rprops2.pklz', 'r') as f:
    results.extend(pickle.load(f))

In [193]:
with gzip.open('EvalOut/results-cutoff.pklz', 'r') as f:
    results.extend(pickle.load(f))

In [194]:
len(results)

35

In [195]:
[m for m,_ in results]

['01_s5r; add',
 '01_s5r; mult',
 '01_s5r; hmean',
 '02_s5s; add',
 '02_s5s; mult',
 '02_s5s; hmean',
 '03_r5r; mult',
 '03_r5r->gr; mult',
 '04_g5r; mult',
 '04_g5r->rc; mult',
 '05_sr5r->s; mult',
 '05_sr5r->r; mult',
 '05_sr5r->g; mult',
 '10_srg5r->s; mult',
 '10_srg5r->r; mult',
 '10_srg5r->g; mult',
 '11_rg5r->s; mult',
 '11_rg5r->r; mult',
 '11_rg5r->g; mult',
 'max area; s',
 'max area; c',
 '06_pos_s5r; mult',
 '07_nopos_s5r; mult',
 '08_pos_r5r; mult',
 '09_nopos_r5r; mult',
 '01_s5r; brprop; mult',
 '05_sr5r; brprop; mult',
 '03_r5r; grprop; mult',
 '09_nopos_r5r; grprop; mult',
 '08_pos_r5r; grprop; mult',
 '01_s5r; mult; top20',
 '03_r5r; mult; top20',
 '04_g5r; mult; top20',
 '04_g5r; grprop; mult',
 '01_co75_s5r; mult']

In [196]:
res_dict = {m:df for m,df in results}

In [197]:
def mrr_f(series):
    return np.mean(series.apply(lambda x:(1/x)))
def acc_f(series):
    return np.count_nonzero(np.nan_to_num(series.tolist())) / len(series)

In [198]:
def summarise_rdf(rdf):
    this_row = {}
    # for the full data frame
    this_row['acc-full'] = acc_f(rdf['suc'])
    this_row['mrr-full'] = mrr_f(rdf['rnk'])
    this_row['arc-full'] = rdf['cov'].mean()
    this_row['rnd-full'] = mrr_f(rdf['nob'])
    # for the ones where at least one word was known
    nz_rdf = rdf.query('cov > 0')
    this_row['>0 wrcov'] = len(nz_rdf) / len(rdf)
    this_row['acc->0wc'] = acc_f(nz_rdf['suc'])
    this_row['mrr->0wc'] = mrr_f(nz_rdf['rnk'])
    this_row['arc->0wc'] = nz_rdf['cov'].mean()
    this_row['rnd->0wc'] = mrr_f(nz_rdf['nob'])
    # binned by refexp length
    lens = nz_rdf['refexp'].apply(lambda x: len(x.split()))
    this_bin = nz_rdf[(lens > 0) & (lens <= 2)]
    this_row['acc-b1-2'] = acc_f(this_bin['suc'])
    this_row['12%'] = len(this_bin) / len(nz_rdf)
    this_bin = nz_rdf[(lens > 2) & (lens <= 4)]
    this_row['acc-b3-4'] = acc_f(this_bin['suc'])
    this_row['34%'] = len(this_bin) / len(nz_rdf)
    this_bin = nz_rdf[(lens > 4) & (lens <= 6)]
    this_row['acc-b5-6'] = acc_f(this_bin['suc'])
    this_row['56%'] = len(this_bin) / len(nz_rdf)
    return this_row

def summarise_rdf_rprop(rdf):
    this_row = {}
    # for the full data frame
    this_row['RP@1-full'] = acc_f(rdf.apply(is_iou_over_threshold_top_n, axis=1))
    this_row['RP@10-full'] = acc_f(rdf.apply(lambda x:is_iou_over_threshold_top_n(x, n=10), 
                                           axis=1))
    this_row['arc-full'] = rdf['cov'].mean()
    this_row['rnd-full'] = acc_f(rdf.apply(lambda x:is_iou_over_threshold_top_n(x, random=True), 
                                           axis=1))
    # for the ones where at least one word was known
    nz_rdf = rdf.query('cov > 0')
    this_row['>0 wrcov'] = len(nz_rdf) / len(rdf)
    this_row['RP@1->0wc'] = acc_f(nz_rdf.apply(is_iou_over_threshold_top_n, axis=1))
    this_row['RP@10->0wc'] = acc_f(nz_rdf.apply(lambda x:is_iou_over_threshold_top_n(x, n=10), 
                                               axis=1))
    this_row['arc->0wc'] = nz_rdf['cov'].mean()
    this_row['rnd->0wc'] = acc_f(nz_rdf.apply(lambda x:is_iou_over_threshold_top_n(x, random=True), 
                                           axis=1))
    return this_row

In [199]:
index = []
rows = []
for model, rdf in results:
    if 'rprop' in model:
        continue
        # rows.append(summarise_rdf_rprop(rdf))
    else:
        index.append(model)
        this_resdict = summarise_rdf(rdf)
        this_resdict['%tst'] = 1.0
        rows.append(this_resdict)
        index.append(model + '; NR')
        rdf_norel = rdf.query('is_rel == False')
        this_resdict = summarise_rdf(rdf_norel)
        this_resdict['%tst'] = len(rdf_norel) / len(rdf)
        rows.append(this_resdict)

collected_columns = {}
for this_row in rows:
    for this_key, this_val in this_row.items():
        this_list = collected_columns.get(this_key, list())
        this_list.append(this_val)
        collected_columns[this_key] = this_list
        
full_df = pd.DataFrame(collected_columns, index=index)
full_df = full_df[['%tst', 'acc-full', 'mrr-full', 'arc-full', 'rnd-full', '>0 wrcov', 
                   'acc->0wc', 'mrr->0wc', 'arc->0wc', 'rnd->0wc',
                   'acc-b1-2', '12%', 'acc-b3-4', '34%', 'acc-b5-6', '56%']]

In [200]:
pd.set_option('display.float_format', '{:.2f}'.format)
full_df

Unnamed: 0,%tst,acc-full,mrr-full,arc-full,rnd-full,>0 wrcov,acc->0wc,mrr->0wc,arc->0wc,rnd->0wc,acc-b1-2,12%,acc-b3-4,34%,acc-b5-6,56%
01_s5r; add,1.0,0.62,0.77,0.89,0.2,0.97,0.64,0.77,0.92,0.2,0.77,0.41,0.63,0.32,0.5,0.15
01_s5r; add; NR,0.86,0.66,0.8,0.91,0.2,0.97,0.68,0.8,0.94,0.2,0.77,0.47,0.65,0.33,0.54,0.13
01_s5r; mult,1.0,0.65,0.79,0.89,0.2,0.97,0.67,0.79,0.92,0.2,0.78,0.41,0.67,0.32,0.54,0.15
01_s5r; mult; NR,0.86,0.68,0.82,0.91,0.2,0.97,0.71,0.82,0.94,0.2,0.78,0.47,0.69,0.33,0.57,0.13
01_s5r; hmean,1.0,0.62,0.77,0.89,0.2,0.97,0.64,0.77,0.92,0.2,0.77,0.41,0.64,0.32,0.48,0.15
01_s5r; hmean; NR,0.86,0.66,0.8,0.91,0.2,0.97,0.68,0.8,0.94,0.2,0.77,0.47,0.66,0.33,0.53,0.13
02_s5s; add,1.0,0.6,0.76,0.89,0.2,0.97,0.62,0.76,0.92,0.2,0.77,0.41,0.61,0.32,0.45,0.15
02_s5s; add; NR,0.86,0.64,0.79,0.91,0.2,0.97,0.66,0.79,0.94,0.2,0.77,0.47,0.63,0.33,0.48,0.13
02_s5s; mult,1.0,0.63,0.78,0.89,0.2,0.97,0.65,0.78,0.92,0.2,0.78,0.41,0.65,0.32,0.51,0.15
02_s5s; mult; NR,0.86,0.67,0.81,0.91,0.2,0.97,0.69,0.81,0.94,0.2,0.78,0.47,0.67,0.33,0.55,0.13


In [190]:
index = []
rows = []
for model, rdf in results:
    if 'rprop' in model:
        index.append(model)
        rows.append(summarise_rdf_rprop(rdf))
        index.append(model+'; no rel')
        rows.append(summarise_rdf_rprop(rdf.query('is_rel == False')))

collected_columns = {}
for this_row in rows:
    for this_key, this_val in this_row.items():
        this_list = collected_columns.get(this_key, list())
        this_list.append(this_val)
        collected_columns[this_key] = this_list
        
full_df_rp = pd.DataFrame(collected_columns, index=index)

In [191]:
full_df_rp[['RP@1-full', 'RP@10-full', 'arc-full', 'rnd-full', '>0 wrcov',
            'RP@1->0wc', 'RP@10->0wc', 'arc->0wc', 'rnd->0wc']]

Unnamed: 0,RP@1-full,RP@10-full,arc-full,rnd-full,>0 wrcov,RP@1->0wc,RP@10->0wc,arc->0wc,rnd->0wc
01_s5r; brprop; mult,0.09,0.24,0.88,0.03,0.97,0.09,0.25,0.92,0.03
01_s5r; brprop; mult; no rel,0.1,0.26,0.9,0.03,0.96,0.1,0.27,0.94,0.03
05_sr5r; brprop; mult,0.09,0.24,0.9,0.03,0.97,0.1,0.25,0.93,0.04
05_sr5r; brprop; mult; no rel,0.1,0.26,0.92,0.03,0.97,0.1,0.26,0.95,0.04
03_r5r; grprop; mult,0.52,0.77,0.91,0.17,0.98,0.54,0.79,0.93,0.17
03_r5r; grprop; mult; no rel,0.54,0.78,0.92,0.17,0.98,0.55,0.8,0.94,0.18
09_nopos_r5r; grprop; mult,0.38,0.77,0.91,0.17,0.98,0.39,0.79,0.93,0.17
09_nopos_r5r; grprop; mult; no rel,0.39,0.78,0.92,0.17,0.98,0.4,0.8,0.94,0.18
08_pos_r5r; grprop; mult,0.48,0.77,0.91,0.17,0.98,0.49,0.79,0.93,0.17
08_pos_r5r; grprop; mult; no rel,0.49,0.78,0.92,0.17,0.98,0.51,0.8,0.94,0.18


In [192]:
with gzip.open('EvalOut/full_df_precomp.pklz', 'w') as f:
    pickle.dump((full_df, full_df_rp), f)

### Some post-hoc analysis

#### Max Area for grexp

In [124]:
r01 = res_dict['01_s5r; mult']

In [25]:
len(res_dict['03_r5r; mult'])

10834

In [92]:
r03 = res_dict['03_r5r; mult']

In [26]:
len(res_dict['04_g5r; mult'])

4849

In [31]:
r04 = res_dict['04_g5r; mult']
r04_filelist = list(set(r04['image_id'].tolist()))

In [32]:
len(res_dict['max area; c'])

10834

In [74]:
len(res_dict['max area; c'][res_dict['max area; c']['image_id'].isin(r04_filelist)])

7469

In [76]:
grex_maxarea = res_dict['max area; c'][res_dict['max area; c']['image_id'].isin(r04_filelist)]

In [80]:
np.sum(grex_maxarea['suc']) / len(grex_maxarea)

0.19627794885526845

In [82]:
len(grex_maxarea['image_id'].unique())

1103

In [94]:
mrr_f(r04.groupby('image_id').first().reset_index()['nob'])

0.19912366024671468

In [95]:
mrr_f(r03.groupby('image_id').first().reset_index()['nob'])

0.17035588398429238

In [100]:
ma_red = res_dict['max area; c'].groupby('image_id').first().reset_index()
np.count_nonzero(ma_red['suc']) / len(ma_red)

0.23266666666666666

Double checking stuff:

In [120]:
len(r03.query('cov > 0')) / len(r03)

0.9800627653682851

In [122]:
len(r03.query('cov > 0 & is_rel == False')) / len(r03.query('is_rel == False'))

0.979234009207562

In [123]:
len(r04.query('cov > 0')) / len(r04)

0.9973190348525469

In [125]:
len(r01.query('cov > 0')) / len(r01)

0.9697751873438801

In [126]:
len(r01.query('cov > 0 & is_rel == False')) / len(r01.query('is_rel == False'))

0.9663646877409406

#### region props

why are scores lower for >0 wc?

FIXED. NANs count as non_zero. Must replace them with zero first.

In [165]:
r01rp = res_dict['01_s5r; brprop; mult']

In [187]:
np.isnan(r01rp.apply(is_iou_over_threshold_top_n, axis=1).tolist()).any()

True