In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import pickle as pkl
import os, sys
import matplotlib.pyplot as plt

In [None]:
#data_dir = 'data/data-new'
#dat_suffix = ['Osak', 'Glas', 'Edin', 'Toro', 'Melb']
dat_name = ['Osaka', 'Glasgow', 'Edinburge', 'Toronto', 'Melbourne']
C_SET = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]  # regularisation parameter

In [None]:
algo = ['rand', 'pop', 'linreg', 'logreg', 'rank', 'logpwr', 'tranDP', 'tranILP', 'combDP', 'combILP', \
        'ssvm-greedy', 'ssvm-viterbi', 'ssvm-listViterbi', 'ssvm-ILP', 'memm']
algo_name = ['Random', 'Popularity', 'POILocationPrediction', 'POIOccurrencePrediction', 'RankSVM', 'RankLogistic', \
             'Markov', 'MarkovPath', 'Rank+Markov', 'Rank+MarkovPath', \
             'SSVM-Greedy', 'SSVM-Viterbi', 'SSVM-ListViterbi', 'SSVM-ILP', 'MEMM']
metric_name = ['F$_1$', 'pairs-F$_1$', '$\\tau$']

```dat_ix``` is required in notebook ```shared.ipynb```.

In [None]:
dat_ix = 0

Run notebook ```shared.ipynb```.

In [None]:
%run 'shared.ipynb'

Construct file names.

In [None]:
def build_fnames(dat_ix):
    fnames = []
    for a in algo:
        fnames.append(os.path.join(data_dir, a + '-' + dat_suffix[dat_ix] + '.pkl'))
    return fnames        

In [None]:
fnames = build_fnames(dat_ix)
fnames

Compute evaluation metrics.

In [None]:
def calc_metrics(fnames):
    assert(len(fnames) == len(algo))
    recdicts = []
    for f in fnames:
        recdicts.append(pkl.load(open(f, 'rb')))
    
    keys = set(recdicts[0].keys())
    #print(len(keys))
    for d in recdicts[1:]:
        keys = keys & set(d.keys())
        #print(len(d.keys()))
    print('#Records:', len(keys))
    keys = sorted(keys)
    
    metrics = np.zeros((len(algo), 3, len(keys)), dtype=np.float)
    Cs = -1 * np.ones((len(algo), len(keys)), dtype=np.float)
    
    for j in range(len(recdicts)):
        d = recdicts[j]
        for k in range(len(keys)):
            q = keys[k]
            F1, pF1, tau = evaluate(d[q]['PRED'], TRAJ_GROUP_DICT[q])
            if 'C' in d[q]: Cs[j, k] = d[q]['C']
            metrics[j, 0, k] = F1
            metrics[j, 1, k] = pF1
            metrics[j, 2, k] = tau
    return metrics, keys, Cs

In [None]:
metrics, keys, Cs = calc_metrics(fnames)

Plot the values of metrics.

In [None]:
metric_ix = 0  # [F1, pairs-F1, Tau]

plt.figure(figsize=[15, 5])
X = np.arange(metrics.shape[2])
plt.plot(X, metrics[4, metric_ix, :], c='r', ls='--', marker='^', markeredgewidth=0) # RankSVM
plt.plot(X, metrics[10, metric_ix, :], c='g', ls='--', marker='v', markeredgewidth=0) # SSVMListViterbi
plt.xticks(np.arange(metrics.shape[2]), [str(q) for q in keys], fontsize=10, rotation=50, horizontalalignment='right')
plt.xlim(-1, metrics.shape[2])
plt.ylim(-0.1, 1.1)
plt.xlabel('Query')
plt.ylabel(metric_name[metric_ix])
plt.title('Values of evaluation metric ' + metric_name[metric_ix])

Plot values of hyper-parameters.

In [None]:
plt.figure(figsize=[15, 5])
X = np.arange(Cs.shape[1])
plt.plot(X, Cs[4, :], c='r', ls='--', marker='^', markeredgewidth=0) # RankSVM
plt.xticks(np.arange(Cs.shape[1]), [str(q) for q in keys], fontsize=10, rotation=50, horizontalalignment='right')
plt.xlim(-1, Cs.shape[1])
plt.ylim(0.001, 10000)
plt.plot([-1, Cs.shape[1]], [C_SET[0],  C_SET[0]],  c='b', ls='-')
plt.plot([-1, Cs.shape[1]], [C_SET[-1], C_SET[-1]], c='b', ls='-')
plt.yscale('log')
plt.xlabel('Query')
plt.ylabel('C')
plt.title('Values of hyper-parameter')

Generate LaTeX table for each dataset.

In [None]:
def gen_latex_h(mean, stderr, title, label):
    assert(mean.shape == stderr.shape)
    assert(mean.shape == (len(algo), 3))
    
    max_1st = np.zeros(len(metric_name), dtype=np.int)
    max_2nd = np.zeros(len(metric_name), dtype=np.int)
    
    for j in range(mean.shape[1]):
        max_2nd[j], max_1st[j] = np.argsort(mean[:, j])[-2:]
    
    strs = []
    strs.append('\\begin{table*}[t]\n')
    strs.append('\\caption{' + title + '}\n')
    strs.append('\\label{' + label + '}\n')
    strs.append('\\centering\n')
    strs.append('\\begin{tabular}{l|' + (mean.shape[1])*'c' + '} \\hline\n')
    for j in range(mean.shape[1]):
        strs.append(' & ' + metric_name[j])
    strs.append(' \\\\ \\hline\n')
    for i in range(mean.shape[0]):
        strs.append(algo_name[i] + ' ')
        for j in range(mean.shape[1]):
            strs.append('& $')
            if i == max_1st[j]: strs.append('\\mathbf{')
            if i == max_2nd[j]: strs.append('\\mathit{')
            strs.append('%.3f' % mean[i, j] + '\\pm' + '%.3f' % stderr[i, j])
            if i in [max_1st[j], max_2nd[j]]: strs.append('}')
            strs.append('$ ')
        strs.append('\\\\\n')
    strs.append('\\hline\n')
    strs.append('\\end{tabular}\n')
    strs.append('\\end{table*}\n')
    return ''.join(strs)    

In [None]:
mean   = np.zeros((len(algo), 3), dtype=np.float)
stderr = np.zeros((len(algo), 3), dtype=np.float)
for i in range(len(algo)):
        mean[i, 0] = np.mean(metrics[i, 0, :]); stderr[i, 0] = np.std(metrics[i, 0, :]) / np.sqrt(metrics.shape[2])
        mean[i, 1] = np.mean(metrics[i, 1, :]); stderr[i, 1] = np.std(metrics[i, 1, :]) / np.sqrt(metrics.shape[2])
        mean[i, 2] = np.mean(metrics[i, 2, :]); stderr[i, 2] = np.std(metrics[i, 2, :]) / np.sqrt(metrics.shape[2])
strs = gen_latex_h(mean, stderr, 'Performance', 'tab:performance')
print(strs)

Generate LaTeX table for each evaluation metric.

In [None]:
def calc_metric_mean(metrics_list, metric_ix):
    assert(len(metrics_list) == len(dat_name))
    assert(0 <= metric_ix < len(metric_name))
    assert(type(metric_ix) == int)
    mean   = np.zeros((len(algo), len(dat_name)), dtype=np.float)
    stderr = np.zeros((len(algo), len(dat_name)), dtype=np.float)
    for i in range(len(algo)):
        for j in range(len(dat_name)):
            mean[i, j]   = np.mean(metrics_list[j][i, metric_ix, :])
            stderr[i, j] = np.std(metrics_list[j][i, metric_ix, :]) / np.sqrt(metrics_list[j].shape[2])
    return mean, stderr   

In [None]:
def gen_latex_v(mean, stderr, title, label):
    assert(mean.shape == stderr.shape)
    assert(mean.shape == (len(algo), len(dat_name)))
    
    max_1st = np.zeros(len(dat_name), dtype=np.int)
    max_2nd = np.zeros(len(dat_name), dtype=np.int)
    
    for j in range(mean.shape[1]):
        max_2nd[j], max_1st[j] = np.argsort(mean[:, j])[-2:]
    
    strs = []
    strs.append('\\begin{table*}[t]\n')
    strs.append('\\caption{' + title + '}\n')
    strs.append('\\label{' + label + '}\n')
    strs.append('\\centering\n')
    strs.append('\\begin{tabular}{l|' + (mean.shape[1])*'c' + '} \\hline\n')
    for j in range(mean.shape[1]):
        strs.append(' & ' + dat_name[j])
    strs.append(' \\\\ \\hline\n')
    for i in range(mean.shape[0]):
        strs.append(algo_name[i] + ' ')
        for j in range(mean.shape[1]):
            strs.append('& $')
            if i == max_1st[j]: strs.append('\\mathbf{')
            if i == max_2nd[j]: strs.append('\\mathit{')
            strs.append('%.3f' % mean[i, j] + '\\pm' + '%.3f' % stderr[i, j])
            if i in [max_1st[j], max_2nd[j]]: strs.append('}')
            strs.append('$ ')
        strs.append('\\\\\n')
    strs.append('\\hline\n')
    strs.append('\\end{tabular}\n')
    strs.append('\\end{table*}\n')
    return ''.join(strs)

In [None]:
nb_stdout = sys.stdout  # save the device for notebook output
nb_stdout

In [None]:
metrics_list = []
sys.stdout = open('/dev/stdout', 'w')  # redirect the output of %run to terminal
for dat_ix in range(len(dat_name)):
    fnames = build_fnames(dat_ix)
    %run 'shared.ipynb'
    metrics, keys, Cs = calc_metrics(fnames)
    metrics_list.append(metrics)

In [None]:
sys.stdout = nb_stdout  # restore the output to notebook
sys.stdout

In [None]:
metric_ix = 2

In [None]:
mean, stderr = calc_metric_mean(metrics_list, metric_ix)
title = ['F$_1$ score on points', 'F$_1$ score on pairs', 'Kendall\'s $\\tau$'][metric_ix]
label = 'tab:' + ['f1', 'pf1', 'tau'][metric_ix]
print(gen_latex_v(mean, stderr, title, label))