In [106]:
import pandas as pd
import os
from parse import *
import numpy as np

# This is the table summarizing results from the different query forms AND, OR, PHRASE

In [84]:
meanfiles = list(filter(lambda f: 'mean_' in f, os.listdir(".")))
meandfs = []
for f in meanfiles:
    meandfs.append(pd.read_csv(f, names=["measure", f]).set_index("measure"))
meandf = pd.concat(meandfs, axis=1)
meandf = meandf.T
parses = []
for i in meandf.index:
    parses.append(parse("mean_{method}_{year}.csv",i).named)
indexdf = pd.DataFrame(parses)
meandf.reset_index(inplace=True)
df = pd.concat([indexdf,meandf],axis=1)
df = df.drop("index",axis=1).set_index(["year","method"]).sort_index().round(2)
df2017 = df.query("year == '2017'").sort_values("ratio")
df2018 = df.query("year == '2018'").sort_values("ratio")
dfsorted = pd.concat([df2017,df2018])
print(dfsorted.to_latex())

\begin{tabular}{llrrrrr}
\toprule
     &    &  ratio &  infNDCG &  Rprec &   mean\_r &    std\_r \\
year & method &        &          &        &          &          \\
\midrule
2017 & phrase &  62.69 &    43.53 &  28.76 &   484.45 &   890.36 \\
     & and &  67.92 &    42.04 &  28.62 &   522.74 &   911.55 \\
     & or &  88.89 &    35.12 &  23.48 &  1020.66 &  1466.12 \\
2018 & phrase &  85.87 &    50.17 &  34.82 &   553.15 &  1046.66 \\
     & and &  86.97 &    49.86 &  34.49 &   583.90 &  1107.74 \\
     & or &  91.12 &    44.94 &  30.43 &   804.57 &  1369.79 \\
\bottomrule
\end{tabular}



# This is the table for the baseline classifier evaluation metrics
But note that in the paper there has been some postprocessing! Do not simple copy and paste it as it would remove some manual enhancements in the paper.

In [152]:
logreg2017crossvals = [74.17289220917823, 75.56029882604055, 76.72183662573411, 74.74639615589963, 73.67859049652962, 74.63961558996263, 74.95995728777362, 76.86965811965813, 73.5576923076923, 74.67948717948718]
logreg2018crossvals = [74.818401937046, 72.57869249394673, 73.10720775287704, 74.62144155057541, 75.89339794064203, 74.076317383404, 77.46820109024833, 72.98606904906117, 73.51515151515152, 74.96969696969697]
gru2017crossvals = np.array([0.777, 0.786, 0.781, 0.772, 0.777, 0.770, 0.786, 0.795, 0.799, 0.771])*100
gru2018crossvals = np.array([0.754, 0.761, 0.752, 0.772, 0.770, 0.759, 0.755, 0.750, 0.762, 0.763])*100
values = {"ACC":[np.mean(logreg2017crossvals), 64.48, np.mean(logreg2018crossvals), 65.58,  np.mean(gru2017crossvals), 68.0, np.mean(gru2018crossvals), 67.8], "dataset": ([2017]*2)+([2018]*2)+([2017]*2)+([2018]*2), "classifier": (["LogReg"]*4)+(["GRU"]*4), "evaluation method": ["cross validation", "trained on 2018", "cross validation", "trained on 2017"]*2}
table = pd.DataFrame(values).set_index(["classifier", "dataset", "evaluation method"])
print(table.unstack("classifier").round(2).to_latex())

\begin{tabular}{llrr}
\toprule
     & {} & \multicolumn{2}{l}{ACC} \\
     & classifier &    GRU & LogReg \\
dataset & evaluation method &        &        \\
\midrule
2017 & cross validation &  78.14 &  74.96 \\
     & trained on 2018 &  68.00 &  64.48 \\
2018 & cross validation &  75.98 &  74.40 \\
     & trained on 2017 &  67.80 &  65.58 \\
\bottomrule
\end{tabular}



In [155]:
from scipy import stats
stats.ttest_ind(gru2017crossvals, gru2018crossvals)

Ttest_indResult(statistic=5.50418966752642, pvalue=3.1596951555674927e-05)

In [162]:
abbrevs = ["DSYN", "DPT", "DHYP", "GSYN", "GDES"]
meanings = ["disease synonyms", "disease preferred name", "disease hypernyms ", "gene synonyms", "gene description"]
table = pd.DataFrame([abbrevs, meanings])
print(table.to_latex())

\begin{tabular}{llllll}
\toprule
{} &                 0 &                       1 &                   2 &              3 &                 4 \\
\midrule
0 &              DSYN &                     DPT &                DHYP &           GSYN &              GDES \\
1 &  disease synonyms &  disease preferred name &  disease hypernyms  &  gene synonyms &  gene description \\
\bottomrule
\end{tabular}

