# Parse Experimental Results & Generate Latex Table

In [1]:
import os, pickle
import numpy as np
import pandas as pd

In [2]:
data_dir = 'data/data-recsys16'

In [3]:
suffix = ['Osak', 'Glas', 'Edin', 'Toro', 'Melb']
datnames = ['Osaka', 'Glasgow', 'Edinburgh', 'Toronto', 'Melbourne']

In [4]:
noshort = True

In [5]:
KX = 100  # 100 folds in user specific setting
kxstr = str(KX) + 'X-'
ALPHA = 0.5
alphastr = str(ALPHA).replace('.', '_') + '-'

Compute the F1 score for recommended trajectory.

In [6]:
def calc_F1(seq_act, seq_rec):
    '''Compute recall, precision and F1 when trajectories contain sub-tours'''
    assert(len(seq_act) > 0)
    assert(len(seq_rec) > 0)
    match_tags = np.zeros(len(seq_act), dtype=np.bool)
    for poi in seq_rec:
        for j in range(len(seq_act)):
            if match_tags[j] == False and poi == seq_act[j]:
                match_tags[j] = True
                break
    intersize = np.nonzero(match_tags)[0].shape[0]
    recall = intersize / len(seq_act)
    precision = intersize / len(seq_rec)
    F1 = 2 * precision * recall / (precision + recall)
    return F1

Load results data.

In [7]:
def load_results(datnames, suffix, dat_ix, noshort, kxstr, alphastr):
    assert(0 <= dat_ix <= len(suffix))
    assert(len(datnames) == len(suffix))
    
    if noshort == True:
        # user specific results
        frecdict_rank_spec = os.path.join(data_dir, 'rank-noshort-specific-' + kxstr + suffix[dat_ix] + '.pkl')
        frecdict_tran_spec = os.path.join(data_dir, 'tran-noshort-specific-' + kxstr + suffix[dat_ix] + '.pkl')
        frecdict_comb_spec = os.path.join(data_dir, 'comb-noshort-specific-' + alphastr + kxstr + suffix[dat_ix]+'.pkl')

        # user agnostic results
        frecdict_rank_agno = os.path.join(data_dir, 'rank-noshort-agnostic-' + suffix[dat_ix] + '.pkl')
        frecdict_tran_agno = os.path.join(data_dir, 'tran-noshort-agnostic-' + suffix[dat_ix] + '.pkl')
        frecdict_comb_agno = os.path.join(data_dir, 'comb-noshort-agnostic-' + alphastr + suffix[dat_ix] + '.pkl')
    else:
        # user specific results
        frecdict_rank_spec = os.path.join(data_dir, 'rank-all-specific-' + kxstr + suffix[dat_ix] + '.pkl')
        frecdict_tran_spec = os.path.join(data_dir, 'tran-all-specific-' + kxstr + suffix[dat_ix] + '.pkl')
        frecdict_comb_spec = os.path.join(data_dir, 'comb-all-specific-' + alphastr + kxstr + suffix[dat_ix] + '.pkl')

        # user agnostic results
        frecdict_rank_agno = os.path.join(data_dir, 'rank-all-agnostic-' + suffix[dat_ix] + '.pkl')
        frecdict_tran_agno = os.path.join(data_dir, 'tran-all-agnostic-' + suffix[dat_ix] + '.pkl')
        frecdict_comb_agno = os.path.join(data_dir, 'comb-all-agnostic-' + alphastr + suffix[dat_ix] + '.pkl')
    
    # load results data
    recdict_rank_spec = pickle.load(open(frecdict_rank_spec, 'rb'))
    recdict_rank_agno = pickle.load(open(frecdict_rank_agno, 'rb'))
    recdict_tran_spec = pickle.load(open(frecdict_tran_spec, 'rb'))
    recdict_tran_agno = pickle.load(open(frecdict_tran_agno, 'rb'))
    recdict_comb_spec = pickle.load(open(frecdict_comb_spec, 'rb'))
    recdict_comb_agno = pickle.load(open(frecdict_comb_agno, 'rb'))
    
    # compute F1
    F1_rank1_spec = []  # rank pop
    F1_rank1_agno = []  # rank pop
    F1_rank2_spec = []  # rank feature
    F1_rank2_agno = []  # rank feature
    for key in sorted(recdict_rank_spec.keys()):
        F1_rank1_spec.append(calc_F1(recdict_rank_spec[key]['REAL'], recdict_rank_spec[key]['REC_POP']))
        F1_rank2_spec.append(calc_F1(recdict_rank_spec[key]['REAL'], recdict_rank_spec[key]['REC_FEATURE']))
    for key in sorted(recdict_rank_agno.keys()):
        F1_rank1_agno.append(calc_F1(recdict_rank_agno[key]['REAL'], recdict_rank_agno[key]['REC_POP']))
        F1_rank2_agno.append(calc_F1(recdict_rank_agno[key]['REAL'], recdict_rank_agno[key]['REC_FEATURE']))
        
    F1_tran1_spec = []  # transition DP
    F1_tran1_agno = []  # transition DP
    F1_tran2_spec = []  # transition ILP
    F1_tran2_agno = []  # transition ILP
    for key in sorted(recdict_tran_spec.keys()):
        F1_tran1_spec.append(calc_F1(recdict_tran_spec[key]['REAL'], recdict_tran_spec[key]['REC_DP']))
        F1_tran2_spec.append(calc_F1(recdict_tran_spec[key]['REAL'], recdict_tran_spec[key]['REC_ILP']))
    for key in sorted(recdict_tran_agno.keys()):
        F1_tran1_agno.append(calc_F1(recdict_tran_agno[key]['REAL'], recdict_tran_agno[key]['REC_DP']))
        F1_tran2_agno.append(calc_F1(recdict_tran_agno[key]['REAL'], recdict_tran_agno[key]['REC_ILP']))

    F1_comb1_spec = []  # combine rank and transition DP
    F1_comb1_agno = []  # combine rank and transition DP
    F1_comb2_spec = []  # combine rank and transition ILP
    F1_comb2_agno = []  # combine rank and transition ILP
    for key in sorted(recdict_comb_spec.keys()):
        F1_comb1_spec.append(calc_F1(recdict_comb_spec[key]['REAL'], recdict_comb_spec[key]['REC_DP']))
        F1_comb2_spec.append(calc_F1(recdict_comb_spec[key]['REAL'], recdict_comb_spec[key]['REC_ILP']))
    for key in sorted(recdict_comb_agno.keys()):
        F1_comb1_agno.append(calc_F1(recdict_comb_agno[key]['REAL'], recdict_comb_agno[key]['REC_DP']))
        F1_comb2_agno.append(calc_F1(recdict_comb_agno[key]['REAL'], recdict_comb_agno[key]['REC_ILP']))
    
    # compute mean and std of F1
    F1dat_agno = [F1_rank1_agno, F1_rank2_agno, F1_tran1_agno, F1_tran2_agno, F1_comb1_agno, F1_comb2_agno]
    F1dat_spec = [F1_rank1_spec, F1_rank2_spec, F1_tran1_spec, F1_tran2_spec, F1_comb1_spec, F1_comb2_spec]
    F1mean_agno = [np.mean(x) for x in F1dat_agno]
    F1mean_spec = [np.mean(x) for x in F1dat_spec]
    F1std_agno  = [np.std(x) for x in F1dat_agno]
    F1std_spec  = [np.std(x) for x in F1dat_spec]
    
    return F1mean_agno, F1std_agno, F1mean_spec, F1std_spec

In [8]:
methods = ['RankP', 'RankF', 'MC-DP', 'MC-ILP', 'Prop-DP', 'Prop-ILP']
F1mean_agno_df = pd.DataFrame(data=np.zeros((len(methods), len(datnames)), dtype=np.float), \
                              columns=datnames, index=methods)
F1std_agno_df  = pd.DataFrame(data=np.zeros((len(methods), len(datnames)), dtype=np.float), \
                              columns=datnames, index=methods)
F1mean_spec_df = pd.DataFrame(data=np.zeros((len(methods), len(datnames)), dtype=np.float), \
                              columns=datnames, index=methods)
F1std_spec_df  = pd.DataFrame(data=np.zeros((len(methods), len(datnames)), dtype=np.float), \
                              columns=datnames, index=methods)

In [9]:
for dat_ix in range(len(suffix)):
    F1mean_agno, F1std_agno, F1mean_spec, F1std_spec = load_results(datnames, suffix, dat_ix, noshort, kxstr, alphastr)
    assert(len(F1mean_agno) == len(F1std_agno) == len(methods))
    assert(len(F1mean_spec) == len(F1std_spec) == len(methods))
    F1mean_agno_df[datnames[dat_ix]] = F1mean_agno
    F1mean_spec_df[datnames[dat_ix]] = F1mean_spec
    F1std_agno_df[datnames[dat_ix]] = F1std_agno
    F1std_spec_df[datnames[dat_ix]] = F1std_spec    

In [10]:
F1mean_agno_df

Unnamed: 0,Osaka,Glasgow,Edinburgh,Toronto,Melbourne
RankP,0.629865,0.704722,0.641277,0.652807,0.569735
RankF,0.639312,0.748766,0.644364,0.712965,0.572559
MC-DP,0.689855,0.71781,0.563657,0.68722,0.524709
MC-ILP,0.651475,0.715778,0.594995,0.686599,0.547168
Prop-DP,0.716848,0.734321,0.615323,0.681341,0.547798
Prop-ILP,0.691175,0.736266,0.625476,0.702569,0.575671


In [11]:
#F1std_agno_df

In [12]:
F1mean_spec_df

Unnamed: 0,Osaka,Glasgow,Edinburgh,Toronto,Melbourne
RankP,0.629865,0.704722,0.641277,0.652807,0.569735
RankF,0.647981,0.742099,0.643294,0.715186,0.570049
MC-DP,0.673551,0.70731,0.574936,0.685142,0.535321
MC-ILP,0.629736,0.712278,0.602385,0.6896,0.554588
Prop-DP,0.713949,0.728488,0.616486,0.693217,0.560325
Prop-ILP,0.681211,0.735433,0.625471,0.705316,0.575341


In [13]:
#F1std_spec_df

In [14]:
ismax_agno_df = pd.DataFrame(data=np.zeros(F1mean_agno_df.shape, dtype=np.bool), \
                             columns=F1mean_agno_df.columns, index=F1mean_agno_df.index)
ismax_spec_df = pd.DataFrame(data=np.zeros(F1mean_spec_df.shape, dtype=np.bool), \
                             columns=F1mean_spec_df.columns, index=F1mean_spec_df.index)

In [15]:
for col in ismax_agno_df.columns:
    maxix = F1mean_agno_df[col].argmax()
    ismax_agno_df.loc[maxix, col] = True
for col in ismax_spec_df.columns:
    maxix = F1mean_spec_df[col].argmax()
    ismax_spec_df.loc[maxix, col] = True

In [16]:
#ismax_agno_df

In [17]:
#ismax_spec_df

In [18]:
def gen_latex_table(F1mean_df, F1std_df, ismax_df, uspecific, noshort):
    assert(isinstance(uspecific, bool))
    assert(isinstance(noshort, bool))
    ustr = 'user specific setting' if uspecific else 'user agnostic setting'
    noshortstr = 'without short trajectories' if noshort else 'with all trajectories'
    strs = []
    strs.append('\\begin{table*}\n')
    strs.append('\\centering\n')
    strs.append('\\caption{Experimental Results: ' + ustr + ' ' + noshortstr + '}\n')
    #strs.append('\\small\n')
    strs.append('\\begin{tabular}{l|' + (F1mean_df.shape[1])*'c' + '} \\hline\n')
    for col in F1mean_df.columns:
        strs.append(' & ' + col)
    strs.append(' \\\\ \\hline\n')
    for ix in F1mean_df.index:
        for j in range(F1mean_df.shape[1]):
            if j == 0: strs.append(ix + ' ')
            jx = F1mean_df.columns[j]
            strs.append('& $')
            if ismax_df.loc[ix, jx] == True: strs.append('\\mathbf{')
            strs.append('%.3f' % F1mean_df.loc[ix, jx] + '\\pm' + '%.3f' % F1std_df.loc[ix, jx])
            if ismax_df.loc[ix, jx] == True: strs.append('}')
            strs.append('$ ')
        strs.append('\\\\\n')
    strs.append('\\hline\n')
    strs.append('\\end{tabular}\n')
    strs.append('\\end{table*}\n')
    return ''.join(strs)

In [19]:
str1 = gen_latex_table(F1mean_agno_df, F1std_agno_df, ismax_agno_df, uspecific=False, noshort=noshort)

In [20]:
print(str1)

\begin{table*}
\centering
\caption{Experimental Results: user agnostic setting without short trajectories}
\begin{tabular}{l|ccccc} \hline
 & Osaka & Glasgow & Edinburgh & Toronto & Melbourne \\ \hline
RankP & $0.630\pm0.175$ & $0.705\pm0.171$ & $0.641\pm0.163$ & $0.653\pm0.145$ & $0.570\pm0.141$ \\
RankF & $0.639\pm0.182$ & $\mathbf{0.749\pm0.174}$ & $\mathbf{0.644\pm0.162}$ & $\mathbf{0.713\pm0.178}$ & $0.573\pm0.138$ \\
MC-DP & $0.690\pm0.205$ & $0.718\pm0.188$ & $0.564\pm0.191$ & $0.687\pm0.185$ & $0.525\pm0.173$ \\
MC-ILP & $0.651\pm0.194$ & $0.716\pm0.186$ & $0.595\pm0.157$ & $0.687\pm0.167$ & $0.547\pm0.150$ \\
Prop-DP & $\mathbf{0.717\pm0.215}$ & $0.734\pm0.183$ & $0.615\pm0.196$ & $0.681\pm0.197$ & $0.548\pm0.175$ \\
Prop-ILP & $0.691\pm0.196$ & $0.736\pm0.175$ & $0.625\pm0.161$ & $0.703\pm0.166$ & $\mathbf{0.576\pm0.157}$ \\
\hline
\end{tabular}
\end{table*}



In [21]:
str2 = gen_latex_table(F1mean_spec_df, F1std_spec_df, ismax_spec_df, uspecific=True, noshort=noshort)

In [22]:
print(str2)

\begin{table*}
\centering
\caption{Experimental Results: user specific setting without short trajectories}
\begin{tabular}{l|ccccc} \hline
 & Osaka & Glasgow & Edinburgh & Toronto & Melbourne \\ \hline
RankP & $0.630\pm0.175$ & $0.705\pm0.171$ & $0.641\pm0.163$ & $0.653\pm0.145$ & $0.570\pm0.141$ \\
RankF & $0.648\pm0.202$ & $\mathbf{0.742\pm0.171}$ & $\mathbf{0.643\pm0.162}$ & $\mathbf{0.715\pm0.178}$ & $0.570\pm0.137$ \\
MC-DP & $0.674\pm0.179$ & $0.707\pm0.179$ & $0.575\pm0.196$ & $0.685\pm0.181$ & $0.535\pm0.170$ \\
MC-ILP & $0.630\pm0.171$ & $0.712\pm0.173$ & $0.602\pm0.164$ & $0.690\pm0.164$ & $0.555\pm0.152$ \\
Prop-DP & $\mathbf{0.714\pm0.207}$ & $0.728\pm0.179$ & $0.616\pm0.196$ & $0.693\pm0.186$ & $0.560\pm0.174$ \\
Prop-ILP & $0.681\pm0.194$ & $0.735\pm0.172$ & $0.625\pm0.162$ & $0.705\pm0.169$ & $\mathbf{0.575\pm0.160}$ \\
\hline
\end{tabular}
\end{table*}

