In [1]:
# import everything as needed
%matplotlib inline

import numpy as np
from matplotlib import pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import math
import csv
import re
import string
from matplotlib.artist import setp

from operator import itemgetter

from irkit.trec import qrels
from irkit.trec import results
from irkit.trec import run
from irkit.query import elasticsearch as eq

from IPython.display import set_matplotlib_formats
# To get all the plots in pdf, change this to 'pdf'!
set_matplotlib_formats('pdf')

# Set general plot properties.
sns.set()
sns.set_context("paper")
sns.set_color_codes("pastel")

In [2]:
def fill_empty_row(df):
    topic_not_found = set()
    for i in range(1, 101):
        topic_not_found.add(i)

    for row in df.iterrows():
        topic_not_found.remove(int(row[0]))
        
    num_colls = len(df.columns)
    
    for row in topic_not_found: 
        df.loc[row] = (0) * num_colls



In [3]:
def pop_runs(runs):
    run_df = {}
    for r in runs:
        run_df[r] = {}
        with open('res-files/{}_standard.csv'.format(r), 'r') as f:
            df = pd.DataFrame.from_csv(f)
            fill_empty_row(df)
            for y in df.iterrows():
                run_df[r][int(y[0])] = {'P1': y[1][' P_1'],
                                                       'P5': y[1][' P_5'],
                                                       'AP5': y[1][' map']}

            with open('res-files/{}_recip.csv'.format(r), 'r') as f2:
                df2 = pd.DataFrame.from_csv(f2)
                fill_empty_row(df2)
                for y in df2.iterrows():
                    row = run_df[r][int(y[0])]
                    row['MRR'] = y[1][' recip_rank']
                    run_df[r][int(y[0])] = row

    return run_df



In [4]:
bools = ["adhocbool1", "adhocbool2", "adhocbool3"]               
bool_dfs = pop_runs(bools)
non_bools = ["adhocsim1", "adhocsim2", "adhocsim3"]
non_bool_dfs = pop_runs(non_bools)

In [5]:
# Create average results for bool queries
av_concat = pd.concat([pd.DataFrame([bool_dfs['adhocbool1'][x] for x in range(1, 101)]), 
                         pd.DataFrame([bool_dfs['adhocbool2'][x] for x in range(1, 101)]),
                        pd.DataFrame([bool_dfs['adhocbool3'][x] for x in range(1, 101)])])
av_bools = av_concat.groupby(av_concat.index).mean()
av_boolrow = pd.DataFrame(av_bools.mean())
std_boolrow = pd.DataFrame(av_bools.std())

# Create average results for non bool queries
av_nb_concat = pd.concat([pd.DataFrame([non_bool_dfs['adhocsim1'][x] for x in range(1, 101)]), 
                         pd.DataFrame([non_bool_dfs['adhocsim2'][x] for x in range(1, 101)]),
                        pd.DataFrame([non_bool_dfs['adhocsim3'][x] for x in range(1, 101)])])
av_non_bools = av_nb_concat.groupby(av_nb_concat.index).mean()
av_non_boolrow = pd.DataFrame(av_non_bools.mean())
std_non_boolrow = pd.DataFrame(av_non_bools.std())


In [6]:
topsen_df = pd.concat([pd.DataFrame([pop_runs(['topsen'])['topsen'][x] for x in range(1, 101)])])
topsen_row = topsen_df.mean()

toppar_df = pd.concat([pd.DataFrame([pop_runs(['toppar'])['toppar'][x] for x in range(1, 101)])])
toppar_row = toppar_df.mean()

keyword_df = pd.concat([pd.DataFrame([pop_runs(['adhockey'])['adhockey'][x] for x in range(1, 101)])])
key_row = keyword_df.mean()


In [7]:
best_runs = {}
runs = ['adhocbool', 'adhocsim']
for r in runs:
    best_runs[r] = {}
    for x in range(1,4):
        best_runs[r][x] = {}
        with open('res-files/{}{}_standard.csv'.format(r, x), 'r') as f:
            df = pd.DataFrame.from_csv(f)
            fill_empty_row(df)
            for y in df.iterrows():
                best_runs[r][x][int(y[0])] = {'P1': y[1][' P_1'],
                                                       'P5': y[1][' P_5'],
                                                       'AP5': y[1][' map']}
        with open('res-files/{}{}_recip.csv'.format(r, x), 'r') as f:
            df = pd.DataFrame.from_csv(f)
            fill_empty_row(df)
            for y in df.iterrows():
                row =  best_runs[r][x][int(y[0])]
                row['MRR'] = y[1][' recip_rank']
                best_runs[r][x][int(y[0])] = row
                
best = {}
for r in runs:
    best[r] = {}
    for x in range(1,4):
        for topic in range(1,101):
            if topic not in best[r]:
                best[r][topic] = {}
            if topic in best_runs[r][x]:
                for m in best_runs[r][x][topic]:
                    if m not in best[r][topic]:
                        best[r][topic][m] = 0.0
                    
                    # Grab the max value.
                    if best_runs[r][x][topic][m] > best[r][topic][m]:
                        best[r][topic][m] = best_runs[r][x][topic][m]


In [8]:
best_df = pd.DataFrame([[np.mean([best['adhocsim'][x]['MRR'] for x in best['adhocsim']]),
                    np.mean([best['adhocsim'][x]['P1'] for x in best['adhocsim']]),
                    np.mean([best['adhocsim'][x]['P5'] for x in best['adhocsim']]),
                    np.mean([best['adhocsim'][x]['AP5'] for x in best['adhocsim']])],
                   [np.mean([best['adhocbool'][x]['MRR'] for x in best['adhocbool']]),
                    np.mean([best['adhocbool'][x]['P1'] for x in best['adhocbool']]),
                    np.mean([best['adhocbool'][x]['AP5'] for x in best['adhocbool']]),
                    np.mean([best['adhocbool'][x]['P5'] for x in best['adhocbool']])]], 
                  columns=['P1','P5', 'AP5', 'MRR'],
                  index=['NBbest','Bbest'])

In [9]:
# Final results table for manual queries and baselines. Order needs to be rearranged however.
all_pd = pd.concat([av_boolrow, av_non_boolrow, key_row, topsen_row, toppar_row], axis=1)
all_df = all_pd.T
all_df = all_df[["P1", "P5", "AP5", "MRR"]]

all_df = pd.concat([all_df, best_df])
all_df.columns = ["P@1", "P@5", "AP@5","MRR"]
# all_df.rows = ["Bavg", "Bbest", "NBavg", "NBbest"]
print(all_df.round(4).to_latex())
#rows bavg, nbavg, k, s, p, nbbest, bbest

\begin{tabular}{lrrrr}
\toprule
{} &     P@1 &     P@5 &    AP@5 &     MRR \\
\midrule
0      &  0.4400 &  0.3140 &  0.1528 &  0.4844 \\
0      &  0.5733 &  0.3913 &  0.2393 &  0.6362 \\
0      &  0.7300 &  0.5020 &  0.3121 &  0.8033 \\
1      &  0.6800 &  0.4540 &  0.2958 &  0.7520 \\
2      &  0.6800 &  0.4440 &  0.3015 &  0.7656 \\
NBbest &  0.8377 &  0.7800 &  0.5520 &  0.3441 \\
Bbest  &  0.7012 &  0.6500 &  0.2530 &  0.4840 \\
\bottomrule
\end{tabular}



In [10]:
# Statistical significance computed using a paired t-test
print("*"*30)
print("T-tests for manual queries")
print()
print('av_bools', 'topsen_df')
print('p1', stats.ttest_rel(av_bools['P1'], topsen_df['P1']).pvalue)
print('p5', stats.ttest_rel(av_bools['P5'], topsen_df['P5']).pvalue)
print('mrr', stats.ttest_rel(av_bools['MRR'], topsen_df['MRR']).pvalue)
print('map', stats.ttest_rel(av_bools['AP5'], topsen_df['AP5']).pvalue)
print()
print('best_bools', 'topsen_df')
print('p1', stats.ttest_rel([best['adhocbool'][x]['P1'] for x in best['adhocbool']], topsen_df['P1']).pvalue)
print('p5', stats.ttest_rel([best['adhocbool'][x]['P5'] for x in best['adhocbool']], topsen_df['P5']).pvalue)
print('mrr', stats.ttest_rel([best['adhocbool'][x]['MRR'] for x in best['adhocbool']], topsen_df['MRR']).pvalue)
print('map', stats.ttest_rel([best['adhocbool'][x]['AP5'] for x in best['adhocbool']], topsen_df['AP5']).pvalue)
print()
print('av_non_bools', 'topsen_df')
print('p1', stats.ttest_rel(av_non_bools['P1'], topsen_df['P1']).pvalue)
print('p5', stats.ttest_rel(av_non_bools['P5'], topsen_df['P5']).pvalue)
print('mrr', stats.ttest_rel(av_non_bools['MRR'], topsen_df['MRR']).pvalue)
print('map', stats.ttest_rel(av_non_bools['AP5'], topsen_df['AP5']).pvalue)
print()
print('best_non_bools', 'topsen_df')
print('p1', stats.ttest_rel([best['adhocsim'][x]['P1'] for x in best['adhocsim']], topsen_df['P1']).pvalue)
print('p5', stats.ttest_rel([best['adhocsim'][x]['P5'] for x in best['adhocsim']], topsen_df['P5']).pvalue)
print('mrr', stats.ttest_rel([best['adhocsim'][x]['MRR'] for x in best['adhocsim']], topsen_df['MRR']).pvalue)
print('map', stats.ttest_rel([best['adhocsim'][x]['AP5'] for x in best['adhocsim']], topsen_df['AP5']).pvalue)
print()
print('keyword', 'topsen_df')
print('p1', stats.ttest_rel(keyword_df['P1'], topsen_df['P1']).pvalue)
print('p5', stats.ttest_rel(keyword_df['P5'], topsen_df['P5']).pvalue)
print('mrr', stats.ttest_rel(keyword_df['MRR'], topsen_df['MRR']).pvalue)
print('map', stats.ttest_rel(keyword_df['AP5'], topsen_df['AP5']).pvalue)
print()
print('toppar_df', 'toppsen_df')
print('p1', stats.ttest_rel(toppar_df['P1'], topsen_df['P1']).pvalue)
print('p5', stats.ttest_rel(toppar_df['P5'], topsen_df['P5']).pvalue)
print('mrr', stats.ttest_rel(toppar_df['MRR'], topsen_df['MRR']).pvalue)
print('map', stats.ttest_rel(toppar_df['AP5'], topsen_df['AP5']).pvalue)
print()
print("*"*30)
print()
print('av_bools', 'toppar_df')
print('p1', stats.ttest_rel(av_bools['P1'], toppar_df['P1']).pvalue)
print('p5', stats.ttest_rel(av_bools['P5'], toppar_df['P5']).pvalue)
print('mrr', stats.ttest_rel(av_bools['MRR'], toppar_df['MRR']).pvalue)
print('map', stats.ttest_rel(av_bools['AP5'], toppar_df['AP5']).pvalue)
print()
print('best_bools', 'toppar_df')
print('p1', stats.ttest_rel([best['adhocbool'][x]['P1'] for x in best['adhocbool']], toppar_df['P1']).pvalue)
print('p5', stats.ttest_rel([best['adhocbool'][x]['P5'] for x in best['adhocbool']], toppar_df['P5']).pvalue)
print('mrr', stats.ttest_rel([best['adhocbool'][x]['MRR'] for x in best['adhocbool']], toppar_df['MRR']).pvalue)
print('map', stats.ttest_rel([best['adhocbool'][x]['AP5'] for x in best['adhocbool']], toppar_df['AP5']).pvalue)
print()
print('av_non_bools', 'toppar_df')
print('p1', stats.ttest_rel(av_non_bools['P1'], toppar_df['P1']).pvalue)
print('p5', stats.ttest_rel(av_non_bools['P5'], toppar_df['P5']).pvalue)
print('mrr', stats.ttest_rel(av_non_bools['MRR'], toppar_df['MRR']).pvalue)
print('map', stats.ttest_rel(av_non_bools['AP5'], toppar_df['AP5']).pvalue)
print()
print('best_non_bools', 'toppar_df')
print('p1', stats.ttest_rel([best['adhocsim'][x]['P1'] for x in best['adhocsim']], toppar_df['P1']).pvalue)
print('p5', stats.ttest_rel([best['adhocsim'][x]['P5'] for x in best['adhocsim']], toppar_df['P5']).pvalue)
print('mrr', stats.ttest_rel([best['adhocsim'][x]['MRR'] for x in best['adhocsim']], toppar_df['MRR']).pvalue)
print('map', stats.ttest_rel([best['adhocsim'][x]['AP5'] for x in best['adhocsim']], toppar_df['AP5']).pvalue)
print()
print('keyword', 'toppar_df')
print('p1', stats.ttest_rel(keyword_df['P1'], toppar_df['P1']).pvalue)
print('p5', stats.ttest_rel(keyword_df['P5'], toppar_df['P5']).pvalue)
print('mrr', stats.ttest_rel(keyword_df['MRR'], toppar_df['MRR']).pvalue)
print('map', stats.ttest_rel(keyword_df['AP5'], toppar_df['AP5']).pvalue)
print()
print('toppsen_df', 'toppar_df')
print('p1', stats.ttest_rel(topsen_df['P1'], toppar_df['P1']).pvalue)
print('p5', stats.ttest_rel(topsen_df['P5'], toppar_df['P5']).pvalue)
print('mrr', stats.ttest_rel(topsen_df['MRR'], toppar_df['MRR']).pvalue)
print('map', stats.ttest_rel(topsen_df['AP5'], toppar_df['AP5']).pvalue)

******************************
T-tests for manual queries

av_bools topsen_df
p1 2.4866458985e-05
p5 0.00017643581094
mrr 2.47030403441e-07
map 7.32942460315e-07

best_bools topsen_df
p1 0.603998576944
p5 0.456588106389
mrr 0.31767638165
map 0.158963904297

av_non_bools topsen_df
p1 0.0221905909132
p5 0.0344180953941
mrr 0.00360400528636
map 0.00335437377397

best_non_bools topsen_df
p1 0.0406091308311
p5 0.00124052101298
mrr 0.0265232835294
map 0.0129481892817

keyword topsen_df
p1 0.338448064014
p5 0.0978938211469
mrr 0.18999576755
map 0.33050317554

toppar_df toppsen_df
p1 1.0
p5 0.705861453943
mrr 0.703782988353
map 0.757167338314

******************************

av_bools toppar_df
p1 3.62125546444e-05
p5 0.000227552472381
mrr 2.21761928731e-08
map 3.31580436949e-06

best_bools toppar_df
p1 0.641750434481
p5 0.295481827679
mrr 0.220581396596
map 0.155401660773

av_non_bools toppar_df
p1 0.031022818698
p5 0.0703808807411
mrr 0.00131593539745
map 0.00845052791397

best_non_bools topp