In [1]:
import os
import pandas as pd
pd.set_option('display.max_colwidth', 400)
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
def pretty_print(df):
    return display( HTML( df.to_html().replace("\\n","<br>") ) )
import glob

os.sys.path.insert(0, '../evaluation')

set_ = 'test'

In [2]:
models = [os.path.basename(p) for p in glob.glob(f'../data/models/{set_}/*')]

dfs = [pd.read_csv(f'../data/models/{set_}/{model}/system_evaluation.csv', 
                   index_col=['subset', 'references', 'metric']) for model in models]

df = pd.concat(dfs, keys=models)

In [8]:
latex = df.loc[(slice(None), 'old-cat', '[0, 1, 2]'), :].unstack()\
.reset_index(level=[1, 2], drop=True)\
[[('value', 'bleu'), ('value', 'meteor'), ('value', 'ter')]]\
.loc[['adaptcentre',
      'deepnlg-e2ernn',
      '43434_5512_00',
      'gcn',
      'BIU_Chimera_v1',
      'upf-forge',
      'seq2seq_wc_word']]\
.sort_values(('value', 'bleu'), ascending=False)\
.to_latex(float_format='{:0.2f}'.format)

print(latex)

\begin{tabular}{lrrr}
\toprule
{} & \multicolumn{3}{l}{value} \\
metric &  bleu & meteor &  ter \\
\midrule
adaptcentre     & 60.59 &   0.45 & 0.38 \\
deepnlg-e2ernn  & 58.36 &   0.42 & 0.40 \\
43434\_5512\_00   & 58.24 &   0.43 & 0.37 \\
seq2seq\_wc\_word & 55.82 &   0.41 & 0.40 \\
gcn             & 55.35 &   0.39 & 0.40 \\
BIU\_Chimera\_v1  & 53.20 &   0.44 & 0.47 \\
upf-forge       & 40.88 &   0.41 & 0.56 \\
\bottomrule
\end{tabular}



In [5]:
latex = df.loc[(slice(None), 'all-cat', '[0, 1, 2]'), :].unstack()\
.reset_index(level=[1, 2], drop=True)\
[[('value', 'bleu'), ('value', 'meteor'), ('value', 'ter')]]\
.loc[['adaptcentre',
      'deepnlg-e2ernn',
      '43434_5512_00',
      'gcn',
      'BIU_Chimera_v1',
      'upf-forge',
      'seq2seq_wc_word']]\
.sort_values(('value', 'bleu'), ascending=False)\
.to_latex()

print(latex)

\begin{tabular}{lrrr}
\toprule
{} & \multicolumn{3}{l}{value} \\
metric &   bleu &    meteor &       ter \\
\midrule
BIU\_Chimera\_v1  &  47.18 &  0.391898 &  0.510261 \\
43434\_5512\_00   &  46.11 &  0.397602 &  0.480683 \\
seq2seq\_wc\_word &  45.51 &  0.362405 &  0.485740 \\
upf-forge       &  38.65 &  0.391077 &  0.559327 \\
deepnlg-e2ernn  &  34.01 &  0.255134 &  0.668773 \\
adaptcentre     &  31.06 &  0.315980 &  0.849266 \\
gcn             &    NaN &       NaN &       NaN \\
\bottomrule
\end{tabular}



Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  # Remove the CWD from sys.path while we load stuff.


In [3]:
old_cat = !cat ../evaluation/subsets/test/old-cat.txt
old_cat = [int(i) for i in old_cat]

sizes = {}
for i in range(1, 8):
    sizes[i] = !cat ../evaluation/subsets/test/{i}size.txt
    print(f'{i} -> {len(sizes[i])}')
    sizes[i] = [int(j) for j in sizes[i] if int(j) in old_cat]

1 -> 454
2 -> 349
3 -> 386
4 -> 363
5 -> 265
6 -> 24
7 -> 21


In [4]:
from random import Random

show_in_paper = Random(160727).sample(old_cat, 5) # seed é diahoraminuto do momento em que digitei isso :)
r = Random(160729)

qualitative_analysis = [] 
for js in sizes.values():
    chosen = r.sample(js, 10)
    qualitative_analysis.extend(chosen)

In [5]:
# my texts
with open('../data/models/test/43434_5512_00/43434_5512_00_all-cat.lex', 'r', encoding='utf-8') as f:
    my_texts = [l[:-1] for l in f.readlines()]
    
# my texts + LP
#with open('../data/models/test/43434_5512_505/43434_5512_505_all-cat.lex', 'r', encoding='utf-8') as f:
#    my_texts_lp = [l[:-1] for l in f.readlines()]
    
# adaptcentre
with open('../data/models/test/adaptcentre/adaptcentre_all-cat.lex', 'r', encoding='utf-8') as f:
    adaptcentre = [l[:-1] for l in f.readlines()]
    
# upf-forge
with open('../data/models/test/upf-forge/upf-forge_all-cat.lex', 'r', encoding='utf-8') as f:
    upf_forge = [l[:-1] for l in f.readlines()]
    
# deepnlg-e2ernn
with open('../data/models/test/deepnlg-e2ernn/deepnlg-e2ernn_all-cat.lex', 'r', encoding='utf-8') as f:
    deepnlg_e2ernn = [l[:-1] for l in f.readlines()]

In [6]:
def to_string(e):
    
    triples = '\n'.join(f'<{t.subject} , {t.predicate} , {t.object}>' for t in e.triples)
    lexes = '\n'.join(l['text'] for l in e.lexes)
    
    return f'category={e.category}\n\ntriples:\n{triples}\n\nreferences:\n{lexes}'

In [7]:
%run ../template_model/reading_thiagos_templates.py

test = load_shared_task_test()

entries_show_in_paper = [test[i] for i in show_in_paper]
entries_to_qualitative_analysis = [test[i] for i in qualitative_analysis]

In [8]:
texts_show_in_paper = [{'entry': to_string(test[i]),
                        'me': my_texts[i],
                        #'me_lp': my_texts_lp[i],
                        'adaptcentre': adaptcentre[i],
                        'upf_forge': upf_forge[i],
                        'deepnlg_e2ernn': deepnlg_e2ernn[i]} for i in show_in_paper]
sip_df = pd.DataFrame(texts_show_in_paper)

texts_to_qualitative_analysis = [{'entry': to_string(test[i]),
                                  'me': my_texts[i],
                                  'adaptcentre': adaptcentre[i],
                                  'upf_forge': upf_forge[i]} for i in qualitative_analysis]
qa_df = pd.DataFrame(texts_to_qualitative_analysis)

In [9]:
pretty_print(sip_df[['entry', 'me', 'adaptcentre', 'upf_forge', 'deepnlg_e2ernn']])

Unnamed: 0,entry,me,adaptcentre,upf_forge,deepnlg_e2ernn
0,"category=Food triples: <BLT , dishVariation , Club_sandwich> <Bacon_sandwich , dishVariation , BLT> references: A bacon sandwich can be called a BLT which is a variation of a Club Sandwich. BLT is a variation of the club sandwich and the bacon sandwich. Bacon sandwich is a variation of BLT which is a variation of a club sandwich.",the blt is a variation of the club sandwich and bacon .,blt is a variation of both the club sandwich and the bacon sandwich .,club sandwich can a variation of which be blt can be a variation of bacon sandwich .,the blt is a variation of both the club sandwich and the bacon sandwich .
1,"category=Astronaut triples: <Buzz_Aldrin , birthPlace , Glen_Ridge,_New_Jersey> <Buzz_Aldrin , nationality , United_States> <Buzz_Aldrin , was selected by NASA , 1963> <Buzz_Aldrin , was a crew member of , Apollo_11> <Buzz_Aldrin , occupation , Fighter_pilot> <Buzz_Aldrin , almaMater , ""Massachusetts Institute of Technology, Sc.D. 1963""> references: Buzz Aldrin was an American fighter pilot who was born in Glen Ridge, New Jersey. In 1963, he graduated from Massachusetts Institute of Technology, Sc.D. 1963 and was selected by NASA to be part of the Apollo 11 crew. US national Buzz Aldrin, a fighter pilot born in Glen Ridge, NJ, graduated from Massachusetts Institute of Technology, Sc.D. 1963 and went on to become a crew member on Apollo 11 after being recruited by NASA in 1963. The crew on Apollo 11 included American Buzz Aldrin. He was born in Glen Ridge, New Jersey, and went on to serve as a fighter pilot before graduating from MIT with a Sc. D in 1963. In that same year he was selected to work for NASA.","buzz aldrin was an american born in glen ridge , new jersey . he graduated from massachusetts institute of technology in 1963 with a doctorate in science and was selected by nasa in 1963 . he was a fighter pilot and a crew member of apollo 11 .","buzz aldrin was an american who was born in glen ridge , new jersey . he graduated from mit with a sc . d in 1963 . he was selected by nasa in 1963 as a fighter pilot and became a crew member of apollo 11 .","buzz aldrin , who is from the united states , was born in glen ridge ( new jersey ) . nasa selected him in 1963 . he is a fighter pilot . he graduated from massachusetts institute of technology in 1963 with a sc . d . .","american buzz aldrin was born in glen ridge , new jersey . he graduated from massachusetts institute of technology with a sc . d in 1963 , after that he was hired by nasa in 1963 as a fighter pilot . he was a crew member on apollo 11 ."
2,"category=Airport triples: <Andrews_County_Airport , runwayLength , 896.0> references: The runway length of Andrews County Airport is 896.",andrews county airport ' s runway length is 896 .,the runway length of andrews county airport is 896 .,the length of the runway at andrews county airport is 895 . 807 meters .,the runway length of andrews county airport is 896 .
3,"category=SportsTeam triples: <Peter_Stöger , club , SK_Vorwärts_Steyr> <Peter_Stöger , club , FC_Admira_Wacker_Mödling> <1._FC_Köln , manager , Peter_Stöger> <Peter_Stöger , club , FK_Austria_Wien> references: Peter Stöger is the manager at 1. FC Köln. He is a former player of SK Vorwärts Steyr, FC Admira Wacker Mödling and FK Austria Wien. Peter Stöger is at the FK Austria Wien club and is in the SK Vorwärts Steyr club, a player in FC Admira Wacker Mödling, and is the manager of FC Kolin.",the manager of 1 . fc koln is peter stoger who plays for sk vorwarts steyr and fc admira wacker modling . he plays for fk austria wien .,peter stoger is a player in fc admira wacker modling and fc admira wacker modling . he is also the manager of fc kolin and plays for fc admira wacker modling and fc admira wacker modling .,"peter stoger , who plays for sk vorwarts steyr , fc admira wacker modling and fk austria wien , is the manager of 1 . fc koln .",peter stoger is in the sk vorwarts steyr club and manages 1 fc koln . he also plays for fc admira wacker modling and is the manager of 1 fc koln .
4,"category=Food triples: <Bhajji , country , India> <Bhajji , region , Karnataka> <Bhajji , alternativeName , ""Bhaji, bajji""> <Bhajji , ingredient , Vegetable> references: Originating from the Karnataka region in India, Bhajji (also known as Bhali or bajji), has vegetables as a main ingredient. Bhajji comes from the Karnataka region of India.The dish contains vegetables and also known as Bhaji or bajji.",bhajji come from the karnataka region of india and include vegetables . it is also known as bhaji or bajji .,"bhajji , also known as bhaji or bajji , originates from the karnataka region of india . it includes vegetables .","bhajji , found in karnataka , comes from india . a bhajji is also called a bhaji and bajji . it contains vegetable .","bhajji , also known as bhaji or bajji , comes from the karnataka region of india . it includes vegetables ."


In [12]:
pretty_print(qa_df[['entry', 'me', 'adaptcentre', 'upf_forge']])

Unnamed: 0,entry,me,adaptcentre,upf_forge
0,"category=Building triples: <Pacific_Grove,_California , isPartOf , California> references: Pacific Grove, California is part of California. Pacific Grove is part of California.","pacific grove , california is part of california .",pacific grove is part of california .,"pacific grove , california is part of california ."
1,"category=Astronaut triples: <California , gemstone , Benitoite> references: The gemstone of California is Benitoite. California has a gemstone called Benitoite. Benitoite is a gemstone from California. Benitoite is a gemstone found in California.",california gemstone benitoite .,california ' s gemstone is benitoite .,california is well known for the benitoite gemstone .
2,"category=Astronaut triples: <Buzz_Aldrin , almaMater , ""Massachusetts Institute of Technology, Sc.D. 1963""> references: Buzz Aldrin graduated from Massachusetts Institute of Technology with a Sc.D in 1963. Buzz Aldrin graduated in 1963 from MIT with a Sc.D. Buzz Aldrin graduated from MIT with a Sc. D in 1963. Buzz Aldrin graduated from Massachusetts Institute of Technology, Sc.D. 1963. Buzz Aldrin graduated from Massachusetts Institute of Technology in 1963 with a doctorate in Science.",buzz aldrin graduated from mit with a sc . d . in 1963 .,"buzz aldrin graduated from massachusetts institute of technology , sc . d . 1963 .",buzz aldrin graduated from massachusetts institute of technology in 1963 with a sc . d . .
3,"category=Food triples: <Batagor , mainIngredients , ""fried fish dumpling with tofu and vegetables in peanut sauce""> references: A batagor consists of a fried fish dumpling with tofu and vegetables in peanut sauce. Batagor has these main ingredients - fried fish dumplings, tofu and vegetables in peanut sauce. Batagor's main ingredients are fried fish dumpling with tofu and vegetables in peanut sauce.",batagor is a fried fish dumpling with tofu and vegetables in peanut sauce .,"the main ingredients of batagor are fried fish dumplings , tofu and vegetables in peanut sauce .",the main ingredient in batagor are fried fish dumpling with tofu and vegetables inpeanut sauce .
4,"category=Food triples: <India , currency , Indian_rupee> references: The currency of India is the Indian rupee. Indian rupee is the currency in India. the Indian rupee is the currency in India.",the indian rupee is the currency in india .,the currency of india is the indian rupee .,the currency of india is the indian rupee .
5,"category=University triples: <School of Business and Social Sciences at the Aarhus University , numberOfStudents , 16000> references: The School of Business and Social Sciences at Aarhus University has 16000 students. Aarhus University School of Business and Social Sciences' number of students are 16000. There are 16000 students at the School of Business and Social Sciences at the University of Aarhus. There are 16000 students in the Aarhus University School of Business and Social Sciences. Aarhus University School of Business and Social Sciences has 16000 students. The number of students in School of Business and Social Sciences at the Aarhus University is 16000. The number of students at the School of Business and Social Sciences at the Aarhus University is 16000. The School of Business and Social Sciences at the Aarhus University has 16000 students.",the school of business and social sciences at the aarhus university has 16000 students .,the school of business and social sciences at the aarhus university has 16000 students .,"the aarhus university , school of business and social sciences has 16000 students ."
6,"category=Food triples: <New_York , language , Spanish_language> references: Spanish language is one of the languages spoken in New York. one of the languages of New York is Spanish.",spanish is a language in new york .,spanish is one language spoken in new york .,the spanish language is spoken in new york .
7,"category=SportsTeam triples: <Michele_Marcolini , club , Vicenza_Calcio> references: Michele Marcolini played for Vicenza Calcio. Michele Marcolini is in the Vicenza Calcio club. Michele Marcolini plays for Vicenza Calcio.",michele marcolini played for vicenza calcio .,michele marcolini plays for vicenza calcio .,michele marcolini plays for vicenza calcio .
8,"category=Airport triples: <Curitiba , isPartOf , South_Region,_Brazil> references: Curitiba is part of the South Region, Brazil.","curitiba is part of the south region , brazil .","curitiba is part of the south region , brazil .",curitiba is part of south region ( brazil ) .
9,"category=Astronaut triples: <Alan_Bean , nationality , United_States> references: The nationality of Alan Bean is United States. Alan Bean's nationality is the United States. Alan Bean is American. Alan Bean was American. Alan Bean is a US national.",alan bean is a united states national .,alan bean is from the united states .,alan bean is from the united states .


In [13]:
qa_df.to_csv('qualitative_analysis.csv', index=False)

# BLEU

https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/multi-bleu.perl

Download multi-bleu.perl and put it in **../evaluation/tools/**

# METEOR

http://www.cs.cmu.edu/~alavie/METEOR/

Download meteor-1.5 and unpack into **../evaluation/tools/**

The jar path must be **../evaluation/tools/meteor-1.5/meteor-1.5.jar**

# TER

http://www.cs.umd.edu/~snover/tercom/

Download tercom-0.7.25 and unpack into **../evaluation/tools/**

The jar path must be **../evaluation/tools/tercom-0.7.25/tercom-0.7.25.jar**

<img src="../img/competition_results.jpg" />

## Melbourne

<a href="../data/models/melbourne/WebNLG Challenge - Bayu Distiawan T.pdf">report</a>

In [5]:
df.loc[('melbourne', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
melbourne,1size,"[0, 1, 2]",0.252278,9.927313,63.81,87.5,72.5,60.5,49.9,0.253384,0.471922,0.27412
melbourne,1size,"[0, 1]",0.253384,9.927313,57.81,85.4,67.9,54.5,43.0,0.252278,0.454606,0.307993
melbourne,2size,"[0, 1, 2]",0.30631,16.598854,51.25,82.2,59.7,44.8,34.0,0.301743,0.421393,0.389833
melbourne,2size,"[0, 1]",0.301743,16.598854,46.92,79.4,55.5,40.5,30.0,0.30631,0.407715,0.420801
melbourne,3size,"[0, 1, 2]",0.319356,22.341969,43.42,80.3,54.7,37.4,25.6,0.317834,0.376657,0.470233
melbourne,3size,"[0, 1]",0.317834,22.341969,39.02,77.2,50.2,33.2,22.1,0.319356,0.365738,0.496335
melbourne,4size,"[0, 1, 2]",0.329409,28.953168,42.14,78.6,52.7,35.6,24.6,0.326356,0.358887,0.52013
melbourne,4size,"[0, 1]",0.326356,28.953168,38.09,75.9,48.9,32.3,21.8,0.329409,0.350519,0.541834
melbourne,5size,"[0, 1, 2]",0.353569,34.796226,36.72,75.3,47.9,29.7,18.4,0.352673,0.327765,0.579703
melbourne,5size,"[0, 1]",0.352673,34.796226,33.19,73.2,44.8,27.1,16.2,0.353569,0.3212,0.590341


In [6]:
!head -100 ../data/models/melbourne/melbourne.txt | tail -10

castle is english language .
eric flint was born in burbank, california .
macmillan publishers is the parent company of farrar, straus and giroux .
john cowper powys was a glastonbury romance .
soho press is in united states .
the secret scripture was published by faber and faber .
asian americans are an ethnic group in united states .
english language is spoken in united states .
weymouth sands was preceded by a glastonbury romance .
the manager of a.c. chievo verona is rolando maran .


## Tilb-SMT

<a href="../data/models/tilburg-smt/report.pdf">report</a>

In [7]:
df.loc[('tilburg-smt', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
tilburg-smt,1size,"[0, 1, 2]",0.188913,8.03304,52.16,88.4,70.9,56.0,45.7,0.193858,0.412354,0.36707
tilburg-smt,1size,"[0, 1]",0.193858,8.03304,45.28,86.6,66.9,50.8,39.9,0.188913,0.399776,0.395726
tilburg-smt,2size,"[0, 1, 2]",0.209839,15.985673,48.16,76.7,57.7,43.6,33.6,0.207026,0.399773,0.464433
tilburg-smt,2size,"[0, 1]",0.207026,15.985673,44.0,74.5,54.1,39.5,29.8,0.209839,0.388138,0.492882
tilburg-smt,3size,"[0, 1, 2]",0.217507,23.606218,43.42,73.2,51.9,36.8,26.2,0.217845,0.380756,0.535855
tilburg-smt,3size,"[0, 1]",0.217845,23.606218,39.46,70.1,47.8,32.8,22.7,0.217507,0.36894,0.563268
tilburg-smt,4size,"[0, 1, 2]",0.218908,31.418733,42.44,72.2,50.5,35.3,25.2,0.217098,0.372728,0.573584
tilburg-smt,4size,"[0, 1]",0.217098,31.418733,38.76,69.8,46.9,31.6,21.8,0.218908,0.36271,0.592167
tilburg-smt,5size,"[0, 1, 2]",0.234479,38.181132,38.68,71.2,47.8,31.4,20.9,0.234631,0.361045,0.615279
tilburg-smt,5size,"[0, 1]",0.234631,38.181132,36.03,69.3,44.9,28.8,18.8,0.234479,0.355483,0.623996


In [8]:
!head -100 ../data/models/tilburg-smt/tilburg-smt.txt | tail -10

novel castle is english language 
. eric flint was born in burbank , california . 
macmillan publishers is the parent company of farrar , straus and giroux 
john cowper powys notablework a glastonbury romance 
soho press , united states . 
, published by faber and faber 
asian americans are an ethnic group . 
english is the language of the united states . 
weymouth sands was preceded by a glastonbury romance 
the manager of a.c. chievo verona is rolando maran 


## PKUWriter

<a href="../data/models/pkuwriter/PKUWriter_system.pdf">report</a>

In [9]:
df.loc[('pkuwriter', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
pkuwriter,1size,"[0, 1, 2]",0.288924,10.788546,53.15,77.3,59.1,46.8,37.4,0.283585,0.415371,0.371388
pkuwriter,1size,"[0, 1]",0.283585,10.788546,49.09,75.5,55.5,42.5,32.6,0.288924,0.40382,0.397781
pkuwriter,2size,"[0, 1, 2]",0.304937,15.813754,45.34,75.4,53.6,40.9,31.7,0.297699,0.35551,0.481268
pkuwriter,2size,"[0, 1]",0.297699,15.813754,41.06,72.9,50.3,37.5,28.8,0.304937,0.345972,0.502592
pkuwriter,3size,"[0, 1, 2]",0.325455,21.528497,34.89,70.7,44.5,29.8,20.3,0.327196,0.301129,0.602554
pkuwriter,3size,"[0, 1]",0.327196,21.528497,30.73,68.1,40.9,26.6,17.5,0.325455,0.294932,0.621255
pkuwriter,4size,"[0, 1, 2]",0.339113,25.914601,36.8,73.1,48.6,34.3,24.9,0.331136,0.292479,0.589683
pkuwriter,4size,"[0, 1]",0.331136,25.914601,32.5,70.5,44.9,31.0,22.0,0.339113,0.285816,0.610181
pkuwriter,5size,"[0, 1, 2]",0.370858,30.664151,34.17,72.4,45.9,30.7,21.3,0.35811,0.274453,0.623402
pkuwriter,5size,"[0, 1]",0.35811,30.664151,30.43,70.1,42.9,28.5,19.5,0.370858,0.27049,0.637112


In [10]:
!head -100 ../data/models/pkuwriter/pkuwriter.txt | tail -10

the novel into battle is written by the english language .
john cowper powys was born in burbank , california .
the parent company of the macmillan publishers press is the macmillan publishers company of the macmillan publishers .
the novel owen glendower is a notable work by john cowper powys .
soho press is located in united states .
the polish academy for writtenwork is referencenumber _ in _ the _ national _ register _ of _ historic _ places .
asian americans are an ethnic group in the united states .
english is the language spoken in the united states .
the novel into battle is preceded by a glastonbury romance .
rolando maran manages the a . c . chievo verona .


## UPF-FORGe

<a href="../data/models/upf-forge/WebNLG_V0.2.pdf">report</a>

In [11]:
df.loc[('upf-forge', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
upf-forge,1size,"[0, 1, 2]",0.236928,10.055066,60.29,86.5,68.7,55.3,44.8,0.23943,0.469279,0.270624
upf-forge,1size,"[0, 1]",0.23943,10.055066,55.43,84.7,64.8,50.3,39.1,0.236928,0.454752,0.302856
upf-forge,2size,"[0, 1, 2]",0.267511,17.954155,43.24,77.5,52.6,35.9,23.9,0.268273,0.4097,0.467734
upf-forge,2size,"[0, 1]",0.268273,17.954155,39.66,74.5,48.8,32.3,21.0,0.267511,0.397624,0.494693
upf-forge,3size,"[0, 1, 2]",0.288092,26.101036,36.84,74.8,46.9,29.5,17.8,0.287742,0.383582,0.567642
upf-forge,3size,"[0, 1]",0.287742,26.101036,32.93,71.2,42.7,25.9,15.0,0.288092,0.374396,0.593986
upf-forge,4size,"[0, 1, 2]",0.297504,34.876033,35.54,73.6,45.8,28.3,16.8,0.292259,0.376883,0.622324
upf-forge,4size,"[0, 1]",0.292259,34.876033,32.35,70.3,41.9,25.2,14.8,0.297504,0.369145,0.636231
upf-forge,5size,"[0, 1, 2]",0.312257,42.184906,32.66,72.4,44.0,25.1,14.2,0.308078,0.367748,0.665456
upf-forge,5size,"[0, 1]",0.308078,42.184906,30.14,69.8,40.8,22.8,12.7,0.312257,0.362987,0.669954


In [12]:
!head -100 ../data/models/upf-forge/upf-forge.txt | tail -10

the english language is spoken in castle ( novel ) .
eric flint was born in burbank ( california ) .
the parent company of farrar , straus and giroux is macmillan publishers .
a glastonbury romance wrote john cowper powys .
soho press is published in the united states .
the publisher of the secret scripture is faber and faber .
a ethnic group of the united states are asian americans .
the english language is spoken in the united states .
weymouth sands follows a glastonbury romance .
rolando maran is the manager of a.c. chievo verona .


## Tilb-Pipeline

<a href="../data/models/tilburg-template/report.pdf">report</a>

In [13]:
df.loc[('tilburg-template', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
tilburg-template,1size,"[0, 1, 2]",0.149068,6.420705,44.81,90.8,72.5,56.6,43.6,0.198285,0.323192,0.475854
tilburg-template,1size,"[0, 1]",0.198285,6.420705,33.23,89.1,68.6,51.6,37.8,0.149068,0.312392,0.500719
tilburg-template,2size,"[0, 1, 2]",0.197241,11.30086,34.84,85.2,61.8,44.5,32.4,0.24214,0.318193,0.515267
tilburg-template,2size,"[0, 1]",0.24214,11.30086,29.7,82.7,57.2,39.4,27.8,0.197241,0.306919,0.548507
tilburg-template,3size,"[0, 1, 2]",0.231148,16.373057,33.91,82.0,56.3,38.3,26.3,0.267722,0.307431,0.559991
tilburg-template,3size,"[0, 1]",0.267722,16.373057,28.29,79.2,52.0,34.2,22.7,0.231148,0.298195,0.587734
tilburg-template,4size,"[0, 1, 2]",0.240871,21.415978,34.05,81.3,55.8,38.6,26.7,0.271546,0.29762,0.57812
tilburg-template,4size,"[0, 1]",0.271546,21.415978,28.94,78.7,51.4,34.3,23.3,0.240871,0.29008,0.607974
tilburg-template,5size,"[0, 1, 2]",0.267942,28.279245,33.03,78.2,51.0,33.0,21.1,0.302108,0.296692,0.62988
tilburg-template,5size,"[0, 1]",0.302108,28.279245,28.78,76.1,47.7,30.0,18.8,0.267942,0.29079,0.64423


In [14]:
!head -100 ../data/models/tilburg-template/tilburg-template.txt | tail -10

castle is in english . 
eric flint was born in burbank , california . 
macmillan publishers owns farrar , straus , and giroux . 
a glastonbury romance is a notable work by john cowper powys . 
soho press is in united states . 
the secret scripture publisher of faber and faber . 
asian americans live in united states . 
united states is in english . 
weymouth sands was preceded by a glastonbury romance . 
rolando maran manages a.c. chievo verona . 


## Tilb-NMT

<a href="../data/models/tilburg-nmt/report.pdf">report</a>

In [15]:
df.loc[('tilburg-nmt', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
tilburg-nmt,1size,"[0, 1, 2]",0.276343,11.76652,50.33,74.2,56.6,44.2,34.5,0.276675,0.447765,0.429996
tilburg-nmt,1size,"[0, 1]",0.276675,11.76652,46.04,72.2,52.8,39.7,29.7,0.276343,0.432777,0.462297
tilburg-nmt,2size,"[0, 1, 2]",0.313078,19.69341,37.13,67.5,44.4,30.5,20.8,0.305834,0.38422,0.58442
tilburg-nmt,2size,"[0, 1]",0.305834,19.69341,34.33,65.4,41.5,27.7,18.5,0.313078,0.374547,0.605776
tilburg-nmt,3size,"[0, 1, 2]",0.31366,26.632124,31.85,66.4,39.9,25.0,15.5,0.308463,0.345341,0.646517
tilburg-nmt,3size,"[0, 1]",0.308463,26.632124,28.96,63.7,36.6,22.3,13.5,0.31366,0.337008,0.666954
tilburg-nmt,4size,"[0, 1, 2]",0.304591,31.162534,32.92,68.9,41.5,25.8,16.0,0.298091,0.330702,0.622947
tilburg-nmt,4size,"[0, 1]",0.298091,31.162534,30.42,66.7,38.6,23.3,14.3,0.304591,0.323295,0.640823
tilburg-nmt,5size,"[0, 1, 2]",0.315162,36.649057,28.64,67.9,37.9,21.7,12.1,0.313324,0.306137,0.66597
tilburg-nmt,5size,"[0, 1]",0.313324,36.649057,26.35,65.9,35.4,19.8,10.8,0.315162,0.299957,0.675953


In [16]:
!head -100 ../data/models/tilburg-nmt/tilburg-nmt.txt | tail -10

english is the language of castle . 
eric flint was born in burbank , california . 
macmillan publishers is the parent company of farrar , straus , and giroux . 
a glastonbury romance is a notable work by john cowper powys . 
soho press is located in united states . 
th secret scriptures was published by faber and faber . 
asian americans are an ethnic group in united states . 
english is the language of united states . 
weymouth sands was preceded by a glastonbury romance . 
the manager of a.c. chievo verona is rolando maran . 


## Baseline

In [17]:
df.loc[('baseline_sorted', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
baseline_sorted,1size,"[0, 1, 2]",0.354087,9.885463,42.36,65.1,45.2,38.8,33.6,0.345365,0.277388,0.491072
baseline_sorted,1size,"[0, 1]",0.345365,9.885463,38.22,62.9,42.2,34.8,28.8,0.354087,0.268753,0.519211
baseline_sorted,2size,"[0, 1, 2]",0.378686,13.925501,33.83,65.2,42.1,33.2,26.9,0.367284,0.241359,0.575508
baseline_sorted,2size,"[0, 1]",0.367284,13.925501,29.93,62.9,38.9,29.8,23.6,0.378686,0.23573,0.599852
baseline_sorted,3size,"[0, 1, 2]",0.391894,19.044041,30.46,64.3,40.1,29.5,22.2,0.38539,0.222312,0.639728
baseline_sorted,3size,"[0, 1]",0.38539,19.044041,26.6,61.7,36.7,26.3,19.5,0.391894,0.216149,0.661134
baseline_sorted,4size,"[0, 1, 2]",0.389699,24.796143,31.11,64.1,40.8,30.1,23.2,0.373292,0.221627,0.653365
baseline_sorted,4size,"[0, 1]",0.373292,24.796143,27.54,62.0,37.9,27.4,20.6,0.389699,0.217976,0.67376
baseline_sorted,5size,"[0, 1, 2]",0.42093,29.543396,28.88,63.2,38.9,26.7,19.8,0.419083,0.21037,0.669054
baseline_sorted,5size,"[0, 1]",0.419083,29.543396,25.11,60.9,35.9,24.1,17.6,0.42093,0.205097,0.686324


In [18]:
!head -100 ../data/models/baseline_sorted/baseline_sorted.txt | tail -10

the english language is spoken in the republic of ireland .
author was born in burbank , california .
the parent company of wiley - blackwell is macmillan publishers .
a glastonbury romance was written by author .
lahore is in the united states .
the parent company of wiley - blackwell is parentcompany .
asian americans are an ethnic group in the united states .
english is the language of the united states .
a glastonbury romance was preceded by precededby .
the manager of a . c . chievo verona is rolando maran .


## Adapt

<a href="../data/models/adaptcentre/WebNLG_system.pdf">report</a>

In [19]:
df.loc[('adaptcentre', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
adaptcentre,1size,"[0, 1, 2]",0.32142,11.618943,48.13,69.6,51.3,42.2,35.5,0.323791,0.378119,0.442952
adaptcentre,1size,"[0, 1]",0.323791,11.618943,44.21,67.4,47.7,38.1,31.2,0.32142,0.364362,0.481405
adaptcentre,2size,"[0, 1, 2]",0.366955,22.183381,33.77,58.3,37.8,27.9,21.2,0.375872,0.326372,0.727348
adaptcentre,2size,"[0, 1]",0.375872,22.183381,30.55,55.5,34.6,24.7,18.4,0.366955,0.315547,0.754052
adaptcentre,3size,"[0, 1, 2]",0.378001,35.129534,27.78,52.1,32.5,22.2,15.8,0.389602,0.306462,0.935833
adaptcentre,3size,"[0, 1]",0.389602,35.129534,25.25,49.5,29.7,19.9,13.9,0.378001,0.299911,0.957211
adaptcentre,4size,"[0, 1, 2]",0.373533,45.988981,27.56,50.3,32.0,22.2,16.1,0.38409,0.296947,0.982271
adaptcentre,4size,"[0, 1]",0.38409,45.988981,24.88,48.0,29.2,19.7,13.9,0.373533,0.289634,0.997925
adaptcentre,5size,"[0, 1, 2]",0.388933,54.207547,26.35,50.8,31.4,20.8,14.6,0.39812,0.290825,0.99695
adaptcentre,5size,"[0, 1]",0.39812,54.207547,24.35,48.8,29.1,19.0,13.0,0.388933,0.286113,0.999695


In [20]:
!head -100 ../data/models/adaptcentre/adaptcentre.txt | tail -10

the novel castle is the language spoken in the english language .
eric flint was born in burbank , california .
macmillan publishers is the parent company of farrar , straus and giroux .
a glastonbury romance is the notable work of john cowper powys .
soho press is located in the united states .
the novel the secret scriptures , is published by faber and faber .
asian americans are an ethnic group in the u.s .
english is the language of the united states .
a glastonbury romance is the sequel to weymouth sands .
the manager of a.c . chievo verona is rolando maran .


## UIT-VNU

<a href="../data/models/uit-vietnam/Phong-Nguyen-Tran_Dang-Tuan-Nguyen.docx">report</a>

In [21]:
df.loc[('uit-vietnam', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
uit-vietnam,1size,"[0, 1, 2]",0.234653,9.557269,44.65,70.7,52.4,38.1,28.2,0.322885,0.328469,0.62515
uit-vietnam,1size,"[0, 1]",0.322885,9.557269,37.72,68.7,49.2,34.7,24.8,0.234653,0.321464,0.651942
uit-vietnam,2size,"[0, 1, 2]",0.115245,7.252149,16.52,68.6,53.4,38.8,29.1,0.322007,0.156637,0.764153
uit-vietnam,2size,"[0, 1]",0.322007,7.252149,11.43,66.5,50.1,35.6,26.3,0.115245,0.151842,0.791739
uit-vietnam,3size,"[0, 1, 2]",0.065954,5.891192,5.69,69.2,56.9,40.4,29.3,0.30387,0.091948,0.831528
uit-vietnam,3size,"[0, 1]",0.30387,5.891192,2.55,65.7,51.7,35.5,25.0,0.065954,0.090177,0.854926
uit-vietnam,4size,"[0, 1, 2]",0.027495,3.746556,0.31,61.3,57.9,43.1,32.9,0.247059,0.042366,0.866558
uit-vietnam,4size,"[0, 1]",0.247059,3.746556,0.07,60.0,54.9,40.0,30.2,0.027495,0.04158,0.897258
uit-vietnam,5size,"[0, 1, 2]",0.012361,2.532075,0.01,54.5,59.9,42.2,29.3,0.195231,0.021128,0.896699
uit-vietnam,5size,"[0, 1]",0.195231,2.532075,0.0,53.1,56.9,39.9,26.9,0.012361,0.020809,0.921912


In [22]:
!head -100 ../data/models/uit-vietnam/uit-vietnam.txt | tail -10

english language is one of the languages that is spoken in castle ( novel ) .
the birth place of eric flint is burbank , california .
macmillan publishers is the parent company of the farrar , straus and giroux .
null
soho press is located within the country of the united states .
the book the secret scripture was published by faber and faber .
asian americans are part of one of the ethnic groups in the united states .
english language is one of the languages that is spoken in united states .
a glastonbury romance was written prior to weymouth sands .
rolando maran is the manager of the a . c . chievo verona .


# BIU_Chimera_v1

<a href='https://github.com/AmitMY/chimera'>github</a>

In [23]:
df.loc[('BIU_Chimera_v1', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
BIU_Chimera_v1,1size,"[0, 1, 2]",0.247538,9.84141,66.48,90.3,76.1,64.0,54.3,0.248934,0.490127,0.23875
BIU_Chimera_v1,1size,"[0, 1]",0.247538,9.84141,60.57,88.3,71.2,57.7,47.3,0.248934,0.470099,0.27594
BIU_Chimera_v1,2size,"[0, 1, 2]",0.281899,15.876791,52.06,83.2,61.9,47.7,36.9,0.277142,0.416264,0.3966
BIU_Chimera_v1,2size,"[0, 1]",0.281899,15.876791,47.91,80.8,58.1,43.6,33.3,0.277142,0.403977,0.425739
BIU_Chimera_v1,3size,"[0, 1, 2]",0.290848,23.069948,45.2,79.4,54.1,37.9,27.2,0.288378,0.380528,0.502775
BIU_Chimera_v1,3size,"[0, 1]",0.290848,23.069948,40.91,75.8,49.7,33.8,23.6,0.288378,0.370297,0.529209
BIU_Chimera_v1,4size,"[0, 1, 2]",0.295584,31.939394,43.96,76.5,51.7,36.3,26.0,0.29458,0.37623,0.581411
BIU_Chimera_v1,4size,"[0, 1]",0.295584,31.939394,40.38,74.0,47.8,32.6,23.0,0.29458,0.369565,0.595965
BIU_Chimera_v1,5size,"[0, 1, 2]",0.305611,37.932075,39.11,75.8,48.4,31.2,20.4,0.306257,0.358322,0.622888
BIU_Chimera_v1,5size,"[0, 1]",0.305611,37.932075,36.23,73.3,45.1,28.4,18.4,0.306257,0.351618,0.630402


# BIU_Random_0

<a href='https://github.com/AmitMY/chimera'>github</a>

In [10]:
df.loc[('BIU_Random_0', slice(None), slice(None))].unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
BIU_Random_0,1size,"[0, 1, 2]",0.266013,10.110132,57.19,87.8,66.0,51.6,40.8,0.26793,0.449977,0.327587
BIU_Random_0,2size,"[0, 1, 2]",0.276679,17.667622,45.69,75.8,53.0,38.8,27.9,0.280088,0.398281,0.501733
BIU_Random_0,3size,"[0, 1, 2]",0.296883,24.354922,43.17,76.6,51.1,35.7,24.9,0.29811,0.376331,0.551048
BIU_Random_0,4size,"[0, 1, 2]",0.293481,33.341598,41.8,74.2,49.7,34.4,24.1,0.295422,0.376938,0.605781
BIU_Random_0,5size,"[0, 1, 2]",0.305429,38.992453,38.62,74.4,47.4,30.8,20.5,0.305861,0.359049,0.634815
BIU_Random_0,6size,"[0, 1, 2]",0.311606,52.416667,47.35,76.9,56.2,39.6,29.3,0.307931,0.423901,0.56391
BIU_Random_0,7size,"[0, 1, 2]",0.316034,61.47619,40.12,70.7,49.8,33.5,22.0,0.3109,0.403325,0.705197
BIU_Random_0,all-cat,"[0, 1, 2]",0.292987,24.243824,43.56,76.3,51.7,36.1,25.3,0.288223,0.3853,0.556072
BIU_Random_0,new-cat,"[0, 1, 2]",0.267094,20.648709,33.02,73.8,43.4,27.0,16.4,0.259967,0.333524,0.598998
BIU_Random_0,old-cat,"[0, 1, 2]",0.310799,27.542739,49.22,78.0,57.2,42.1,31.2,0.314152,0.429046,0.519728


# All results

## 3 first references

In [7]:
df.loc[(slice(None), 'all-cat', '[0, 1, 2]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
BIU_Chimera_v1,all-cat,"[0, 1, 2]",0.291403,23.131579,47.18,79.2,55.3,39.5,28.6,0.280941,0.391898,0.510261
BIU_nmt,all-cat,"[0, 1, 2]",0.342847,24.14232,46.37,78.4,55.3,38.8,27.5,0.331007,0.391809,0.488673
melbourne,all-cat,"[0, 1, 2]",0.308209,21.827068,45.13,79.9,55.4,38.8,27.4,0.320407,0.376396,0.474403
tilburg-smt,all-cat,"[0, 1, 2]",0.214235,22.660043,44.28,74.4,53.0,37.5,26.8,0.224137,0.380463,0.537692
BIU_Random_0,all-cat,"[0, 1, 2]",0.292987,24.243824,43.56,76.3,51.7,36.1,25.3,0.288223,0.3853,0.556072
BIU_Random_1,all-cat,"[0, 1, 2]",0.290369,24.066595,43.54,76.3,51.7,36.0,25.3,0.285658,0.384991,0.552885
BIU_Random_2,all-cat,"[0, 1, 2]",0.29218,24.255639,43.39,76.1,51.6,35.9,25.1,0.285406,0.384722,0.558334
deepnlg-rnn,all-cat,"[0, 1, 2]",0.291982,19.719656,42.8,76.1,54.8,40.9,30.9,0.283107,0.33041,0.50539
deepnlg-transformer,all-cat,"[0, 1, 2]",0.307802,18.020408,42.52,80.3,58.2,43.8,33.1,0.28966,0.324066,0.499132
deepnlg-major,all-cat,"[0, 1, 2]",0.263954,19.176692,40.16,77.9,54.9,38.9,27.6,0.220395,0.332648,0.583826


In [8]:
df.loc[(slice(None), 'old-cat', '[0, 1, 2]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
delete_4_from_100,old-cat,"[0, 1, 2]",0.340968,23.92173,97.24,100.0,98.7,97.6,96.7,0.333805,0.718396,0.017056
delete_4_from_200,old-cat,"[0, 1, 2]",0.341291,23.509784,94.64,100.0,97.5,95.4,93.6,0.334171,0.668297,0.033941
delete100,old-cat,"[0, 1, 2]",0.3404,22.055613,94.16,100.0,100.0,100.0,100.0,0.300546,0.915135,0.089798
shuffle100,old-cat,"[0, 1, 2]",0.340359,24.333677,93.36,100.0,91.8,91.0,91.0,0.333051,0.633581,0.064044
repeat100,old-cat,"[0, 1, 2]",0.341056,26.850669,90.23,91.2,90.4,89.9,89.5,0.334998,0.783842,0.10421
delete_4_from_400,old-cat,"[0, 1, 2]",0.341248,22.685891,89.26,100.0,94.8,90.6,87.1,0.332637,0.600745,0.067881
delete200,old-cat,"[0, 1, 2]",0.33798,19.727085,87.63,100.0,100.0,100.0,100.0,0.266715,0.82625,0.181728
shuffle200,old-cat,"[0, 1, 2]",0.338878,24.333677,85.82,100.0,82.6,81.0,81.0,0.331023,0.574582,0.137127
delete_4_from_600,old-cat,"[0, 1, 2]",0.342425,21.861998,83.3,100.0,91.8,84.9,79.4,0.333319,0.550446,0.101907
repeat200,old-cat,"[0, 1, 2]",0.341342,29.190525,82.77,84.4,83.0,82.2,81.5,0.334998,0.742711,0.201043


In [11]:
import math
def truncate(number):
    stepper = 10.0 ** 3
    return math.trunc(stepper * number) / stepper

print(df.loc[(slice(None), 'old-cat', '[0, 1, 2]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)\
[[('value', 'bleu'), ('value', 'meteor'), ('value', 'ter')]].reset_index(level=[1, 2], drop=True).to_latex(formatters={('value', 'meteor'): lambda x: '{:.3}'.format(truncate(x)),
                                                                                                                       ('value', 'ter'): lambda x: '{:.3}'.format(truncate(x))}))

\begin{tabular}{lrrr}
\toprule
{} & \multicolumn{3}{l}{value} \\
metric &   bleu & meteor &   ter \\
\midrule
adaptcentre            &  60.59 &  0.445 & 0.378 \\
deepnlg-e2ernn         &  58.36 &  0.415 & 0.397 \\
deepnlg-transformer    &  56.48 &  0.415 &  0.39 \\
deepnlg-rnn            &  56.22 &  0.421 & 0.391 \\
gcn                    &  55.35 &   0.39 & 0.397 \\
melbourne              &  54.52 &  0.414 & 0.402 \\
tilburg-smt            &  54.29 &  0.421 &  0.47 \\
BIU\_nmt                &  53.63 &  0.417 & 0.434 \\
abe                    &  53.22 &  0.419 & 0.424 \\
BIU\_Chimera\_v1         &  53.20 &  0.437 & 0.466 \\
baseline\_sorted        &  52.39 &  0.377 & 0.448 \\
deepnlg-e2etransformer &  51.95 &  0.391 &  0.45 \\
pkuwriter              &  51.18 &  0.377 &  0.45 \\
BIU\_Random\_1           &  49.32 &  0.427 & 0.514 \\
BIU\_Random\_0           &  49.22 &  0.429 & 0.519 \\
BIU\_Random\_2           &  48.83 &  0.427 & 0.527 \\
deepnlg-major          &  47.19 &  0.419 & 0.503

In [8]:
df.loc[(slice(None), 'new-cat', '[0, 1, 2]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
BIU_nmt,new-cat,"[0, 1, 2]",0.348138,23.324355,37.55,72.8,47.1,30.2,19.2,0.332109,0.362192,0.552363
upf-forge,new-cat,"[0, 1, 2]",0.252808,23.294052,35.7,74.2,45.7,28.3,16.9,0.261672,0.373621,0.559565
BIU_Chimera_v1,new-cat,"[0, 1, 2]",0.262603,19.570146,34.41,76.9,46.5,29.5,18.6,0.249781,0.338155,0.561982
melbourne,new-cat,"[0, 1, 2]",0.306252,20.819304,33.27,72.2,44.0,27.1,16.2,0.324205,0.332213,0.559212
BIU_Random_0,new-cat,"[0, 1, 2]",0.267094,20.648709,33.02,73.8,43.4,27.0,16.4,0.259967,0.333524,0.598998
BIU_Random_2,new-cat,"[0, 1, 2]",0.266092,20.574635,32.87,74.0,43.6,27.1,16.5,0.259905,0.333544,0.595221
BIU_Random_1,new-cat,"[0, 1, 2]",0.265066,20.523008,32.48,73.9,43.4,26.7,16.2,0.258994,0.334254,0.598041
tilburg-smt,new-cat,"[0, 1, 2]",0.125485,20.683502,29.88,65.0,40.6,24.5,14.9,0.122253,0.333329,0.617078
pkuwriter,new-cat,"[0, 1, 2]",0.31945,18.693603,25.36,63.3,35.7,22.0,13.5,0.318204,0.248942,0.67459
deepnlg-rnn,new-cat,"[0, 1, 2]",0.209844,16.189675,25.12,60.2,37.7,26.3,18.4,0.220914,0.221879,0.640042


## 2 first references

In [7]:
df.loc[(slice(None), 'all-cat', '[0, 1]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
BIU_Chimera_v1,all-cat,"[0, 1]",0.291403,23.131579,43.43,76.5,51.3,35.7,25.4,0.280941,0.382668,0.530226
BIU_nmt,all-cat,"[0, 1]",0.342847,24.14232,42.42,75.7,51.1,34.8,24.1,0.331007,0.382362,0.509999
melbourne,all-cat,"[0, 1]",0.320407,21.827068,40.86,77.3,51.5,35.0,24.0,0.308209,0.366541,0.49715
tilburg-smt,all-cat,"[0, 1]",0.224137,22.660043,40.31,72.1,49.5,33.8,23.5,0.214235,0.37097,0.55813
deepnlg-rnn,all-cat,"[0, 1]",0.291982,19.719656,37.98,73.8,50.9,36.8,27.1,0.283107,0.320619,0.530915
deepnlg-transformer,all-cat,"[0, 1]",0.307802,18.020408,37.15,78.0,54.3,39.7,29.3,0.28966,0.315176,0.525031
pkuwriter,all-cat,"[0, 1]",0.327627,20.549409,35.72,71.5,46.5,32.5,23.2,0.323077,0.31066,0.572174
upf-forge,all-cat,"[0, 1]",0.289114,25.367347,35.23,72.5,44.9,27.8,17.0,0.278203,0.382659,0.578311
deepnlg-major,all-cat,"[0, 1]",0.263954,19.176692,34.98,75.5,51.0,35.1,24.3,0.220395,0.324435,0.607571
tilburg-nmt,all-cat,"[0, 1]",0.304221,24.492481,31.88,66.6,39.7,24.9,15.7,0.303469,0.340662,0.62789


In [8]:
df.loc[(slice(None), 'old-cat', '[0, 1]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
adaptcentre,old-cat,"[0, 1]",0.348083,24.530381,55.96,83.3,63.4,48.7,38.1,0.339538,0.43482,0.403129
deepnlg-e2ernn,old-cat,"[0, 1]",0.348508,23.324408,53.26,81.8,61.6,47.3,36.9,0.341144,0.404637,0.423236
deepnlg-transformer,old-cat,"[0, 1]",0.339731,22.323378,51.62,84.9,62.5,46.8,35.4,0.336046,0.406261,0.411886
deepnlg-rnn,old-cat,"[0, 1]",0.345131,22.958805,51.41,84.0,61.2,45.3,34.1,0.340176,0.410603,0.415287
tilburg-smt,old-cat,"[0, 1]",0.303148,24.473738,49.95,79.4,58.2,42.7,31.5,0.295673,0.41168,0.492699
melbourne,old-cat,"[0, 1]",0.317219,22.751802,49.73,83.8,60.6,44.0,32.2,0.310004,0.405084,0.425362
gcn,old-cat,"[0, 1]",0.323632,20.028836,49.67,86.0,65.6,51.2,40.0,0.317817,0.38039,0.421196
BIU_Chimera_v1,old-cat,"[0, 1]",0.310993,26.399588,49.3,78.4,57.1,42.0,31.4,0.309533,0.42901,0.486322
BIU_nmt,old-cat,"[0, 1]",0.338298,24.892894,49.25,80.7,57.8,41.4,30.4,0.329996,0.408112,0.456777
baseline_sorted,old-cat,"[0, 1]",0.353545,22.776519,47.88,78.9,56.9,42.7,32.6,0.343509,0.369529,0.469063


In [9]:
df.loc[(slice(None), 'new-cat', '[0, 1]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
BIU_nmt,new-cat,"[0, 1]",0.348138,23.324355,34.11,69.9,43.4,26.9,16.6,0.332109,0.35254,0.572651
upf-forge,new-cat,"[0, 1]",0.261672,23.294052,32.11,70.9,41.8,24.9,14.4,0.252808,0.363954,0.577105
BIU_Chimera_v1,new-cat,"[0, 1]",0.262603,19.570146,30.54,73.8,42.7,26.1,16.1,0.249781,0.328209,0.581909
melbourne,new-cat,"[0, 1]",0.324205,20.819304,29.75,69.6,40.7,24.2,14.1,0.306252,0.322382,0.581659
tilburg-smt,new-cat,"[0, 1]",0.122253,20.683502,27.03,62.7,38.0,22.1,13.0,0.125485,0.324028,0.635156
tilburg-nmt,new-cat,"[0, 1]",0.286822,24.894501,22.92,57.1,30.4,17.1,9.3,0.286198,0.302276,0.736895
pkuwriter,new-cat,"[0, 1]",0.318204,18.693603,21.94,60.6,32.8,19.7,11.6,0.31945,0.242964,0.695158
deepnlg-rnn,new-cat,"[0, 1]",0.209844,16.189675,21.09,58.0,34.6,23.2,15.6,0.220914,0.213877,0.667034
deepnlg-transformer,new-cat,"[0, 1]",0.249537,13.331089,18.29,65.4,38.8,26.0,16.9,0.23911,0.204699,0.658226
deepnlg-major,new-cat,"[0, 1]",0.231215,11.620651,16.85,75.4,48.9,32.0,20.6,0.158583,0.213673,0.706568


# with template

In [10]:
df.loc[(slice(None), 'with-template', '[0, 1, 2]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,avg_n_stop_words,avg_n_tokens,bleu,bleu_1,bleu_2,bleu_3,bleu_4,macro_avg_n_stop_words,meteor,ter
Unnamed: 0_level_2,subset,references,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
adaptcentre,with-template,"[0, 1, 2]",0.33999,19.0,61.46,85.5,68.0,54.8,44.8,0.337027,0.444883,0.353064
deepnlg-rnn,with-template,"[0, 1, 2]",0.330202,18.094862,60.34,88.4,69.2,54.5,43.6,0.32195,0.449125,0.336007
deepnlg-transformer,with-template,"[0, 1, 2]",0.328844,18.001318,60.31,88.2,69.3,54.8,43.5,0.321,0.445574,0.338919
melbourne,with-template,"[0, 1, 2]",0.304047,17.710145,59.15,88.9,69.4,54.1,42.4,0.294784,0.442866,0.343148
BIU_nmt,with-template,"[0, 1, 2]",0.328694,19.216074,58.47,86.1,66.5,51.2,39.9,0.316553,0.443317,0.367139
BIU_Chimera_v1,with-template,"[0, 1, 2]",0.310513,19.687747,58.06,84.2,65.6,51.0,40.3,0.303128,0.455129,0.39106
deepnlg-e2ernn,with-template,"[0, 1, 2]",0.343346,18.860343,56.89,80.9,63.1,50.2,40.9,0.339616,0.408698,0.409088
tilburg-smt,with-template,"[0, 1, 2]",0.294589,17.750988,56.04,85.2,66.0,50.8,39.4,0.281961,0.427105,0.412763
deepnlg-major,with-template,"[0, 1, 2]",0.278232,18.538867,55.13,84.8,63.4,47.8,36.6,0.273485,0.44178,0.375043
pkuwriter,with-template,"[0, 1, 2]",0.321128,18.027668,53.46,81.0,60.9,47.3,37.2,0.317959,0.389511,0.432038


# Paper

## BLEU

In [4]:
df.loc[(slice(None), 'all-cat', '[0, 1, 2]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)\
[[('value', 'bleu'), ('value', 'meteor'), ('value', 'ter')]]\
.reset_index(level=(1, 2), drop=True)

Unnamed: 0_level_0,value,value,value
metric,bleu,meteor,ter
BIU_Chimera_v1,47.18,0.391898,0.510261
BIU_nmt,46.37,0.391809,0.488673
melbourne,45.13,0.376396,0.474403
tilburg-smt,44.28,0.380463,0.537692
BIU_Random_0,43.56,0.3853,0.556072
BIU_Random_1,43.54,0.384991,0.552885
BIU_Random_2,43.39,0.384722,0.558334
deepnlg-rnn,42.8,0.33041,0.50539
deepnlg-transformer,42.52,0.324066,0.499132
deepnlg-major,40.16,0.332648,0.583826


In [5]:
df.loc[(slice(None), 'old-cat', '[0, 1, 2]'), :].unstack().sort_values(('value', 'bleu'), ascending=False)\
[[('value', 'bleu'), ('value', 'meteor'), ('value', 'ter')]]\
.reset_index(level=(1, 2), drop=True)

Unnamed: 0_level_0,value,value,value
metric,bleu,meteor,ter
delete_4_from_100,97.24,0.718396,0.017056
delete_4_from_200,94.64,0.668297,0.033941
delete100,94.16,0.915135,0.089798
shuffle100,93.36,0.633581,0.064044
repeat100,90.23,0.783842,0.10421
delete_4_from_400,89.26,0.600745,0.067881
delete200,87.63,0.82625,0.181728
shuffle200,85.82,0.574582,0.137127
delete_4_from_600,83.3,0.550446,0.101907
repeat200,82.77,0.742711,0.201043
