In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns

def percentage_to_float(x):
    return float(x.strip('%'))/100

Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 22 days


Read in results from Google shared document, reflowing it into a "one-number-per-row" format, where additional columns describe the attributes of that number. Relevant attributes here are *submission*, *domain*, and *task*.

In [2]:
result_columns = ['μ-M', 'μ-S', 'μ-C', 'Tw-M', 'Tw-S', 'Tw-C', 'R-M', 'R-S', 'R-C', 'TED-M', 'TED-S', 'TED-C', 'Macro-C'] 
D = pd.read_csv("dimsum16_results.tsv", sep="\t", index_col=0,
                converters={col: percentage_to_float for col in result_columns})

In [3]:
per_measurement = D[result_columns].stack().reset_index()
per_measurement = per_measurement.rename(columns={'level_1': 'measurement', 0: 'value', 'SYS': 'submission'})
per_measurement['submission'] = ["S" + str(val) for val in per_measurement['submission']]

In [4]:
per_measurement['domain'] = per_measurement.measurement.str.split('-').str[0]
for old_str, new_str in [('Tw', 'Twitter'), ('R', 'Reviews'), ('μ', 'Micro-Avg.')]:
    per_measurement.domain = per_measurement.domain.str.replace(old_str, new_str)

per_measurement['task'] = per_measurement.measurement.str.split('-').str[1]
del per_measurement['measurement']

per_measurement.head()

Unnamed: 0,submission,value,domain,task
0,S214,0.5666,Micro-Avg.,M
1,S214,0.5755,Micro-Avg.,S
2,S214,0.5741,Micro-Avg.,C
3,S214,0.5949,Twitter,M
4,S214,0.5599,Twitter,S


## Main results

In [5]:
main = pd.read_csv("main_results.csv", index_col=0)
main = main.sort_values('Macro-C', ascending=False)
main['Rank'] = range(1, len(main) + 1)
main['SYS'] = ["S" + str(val) for val in main['SYS']]
main = main.rename(columns={'SYS': 'System', 'Resources': 'Res.', 'Macro-C': 'Score'})

col_order = ['Rank', 'System', 'Team', 'Score', 'Res.']

print(main[col_order].to_latex(index=False, na_rep=''))

\begin{tabular}{rllrl}
\toprule
 Rank & System &           Team &   Score & Res. \\
\midrule
    1 &   S214 &         ICL-HD &  0.5777 &   ++ \\
    2 &   S249 &         UW-CSE &  0.5771 &   ++ \\
    3 &   S248 &         UW-CSE &  0.5710 &      \\
    4 &   S106 &          UFRGS &  0.5027 &      \\
    5 &   S227 &  VectorWeavers &  0.4994 &   ++ \\
    6 &   S255 &           BCED &  0.4713 &   ++ \\
    7 &   S211 &           BCED &  0.4617 &    + \\
    8 &   S254 &           BCED &  0.4579 &      \\
    9 &   S108 &         WHUNlp &  0.2571 &      \\
\bottomrule
\end{tabular}



Numbers separated by task and domain

In [6]:
def latex_table_per_task(task_name):
    task_S = per_measurement[per_measurement.task == task_name]
    del task_S['task']
    task_S = task_S.set_index(['submission', 'domain']).unstack('domain')
    task_S.columns = task_S.columns.levels[1]
    task_S = task_S[['Reviews', 'TED', 'Twitter', 'Micro-Avg.']]
    print(task_S.to_latex())
    
latex_table_per_task('M')
latex_table_per_task('S')
latex_table_per_task('C')



\begin{tabular}{lrrrr}
\toprule
domain &  Reviews &     TED &  Twitter &  Micro-Avg. \\
submission &          &         &          &             \\
\midrule
S106       &   0.4957 &  0.5676 &   0.5116 &      0.5148 \\
S108       &   0.2639 &  0.3344 &   0.3418 &      0.3098 \\
S211       &   0.0907 &  0.1828 &   0.1576 &      0.1346 \\
S214       &   0.5337 &  0.5714 &   0.5949 &      0.5666 \\
S227       &   0.3618 &  0.4176 &   0.3932 &      0.3849 \\
S248       &   0.5396 &  0.5235 &   0.5448 &      0.5393 \\
S249       &   0.5480 &  0.5348 &   0.6109 &      0.5724 \\
S254       &   0.0705 &  0.1630 &   0.0634 &      0.0820 \\
S255       &   0.0868 &  0.2011 &   0.1550 &      0.1348 \\
\bottomrule
\end{tabular}

\begin{tabular}{lrrrr}
\toprule
domain &  Reviews &     TED &  Twitter &  Micro-Avg. \\
submission &          &         &          &             \\
\midrule
S106       &   0.5093 &  0.4961 &   0.4920 &      0.4998 \\
S108       &   0.2582 &  0.2468 &   0.2463 &      0.2514 \\