In [1]:
import os
import pandas as pd

os.sys.path.insert(0, '../script')

from evaluation import evaluate_texts, preprocess_to_evaluate, EVALUATION_SETS

# BLEU

https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/multi-bleu.perl

# METEOR

http://www.cs.cmu.edu/~alavie/METEOR/

Download meteor-1.5 and unpack into **../evaluation/webnlg2017/**

The jar path must be **../evaluation/webnlg2017/meteor-1.5/meteor-1.5.jar**

# TER

http://www.cs.umd.edu/~snover/tercom/

Download tercom-0.7.25 and unpack into **../evaluation/webnlg2017/**

The jar path must be **../evaluation/webnlg2017/tercom-0.7.25/tercom-0.7.25.jar**

<img src="../img/competition_results.jpg" />

# Reference texts

In [4]:
from webnlg_corpus import webnlg
import os

if not os.path.isfile('../model/reference_1.txt'):
    corpus = webnlg.load('webnlg_challenge_2017')
    test = corpus.subset(datasets=['test'])

    with open('../model/reference_1.txt', 'w', encoding='utf-8') as f:

        for e in test:

            f.write(list(e.lexes)[0])
            f.write('\n')
        
preprocess_to_evaluate('../model/reference_1.txt', 'ref-1')
evaluate_texts('../model/ref-1_all-cat.txt')

{'bleu': 100.0, 'meteor': 1.0, 'ter': 0.0}

In [3]:
results = {}

## Melbourne

<a href="../data/webnlg2017/submissions/melbourne/WebNLG Challenge - Bayu Distiawan T.pdf">report</a>

In [4]:
team = 'melbourne'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/GKB_Unimelb_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,45.13,54.52,33.27,63.81,51.25,43.42,42.14,36.72,52.16,48.65
meteor,0.376396,0.414642,0.332213,0.471922,0.421393,0.376657,0.358887,0.327765,0.378531,0.355614
ter,0.474403,0.402598,0.559212,0.27412,0.389833,0.470233,0.52013,0.579703,0.469173,0.467217


In [5]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/GKB_Unimelb_all-cat.txt | tail -10

castle is english language .
eric flint was born in burbank , california .
macmillan publishers is the parent company of farrar , straus and giroux .
john cowper powys was a glastonbury romance .
soho press is in united states .
the secret scripture was published by faber and faber .
asian americans are an ethnic group in united states .
english language is spoken in united states .
weymouth sands was preceded by a glastonbury romance .
the manager of a . c . chievo verona is rolando maran .


## Tilb-SMT

<a href="../data/webnlg2017/submissions/tilburg/report.pdf">report</a>

In [6]:
team = 'tilb-smt'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/Tilburg_University-2_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,44.28,54.29,29.88,52.16,48.16,43.42,42.44,38.68,49.71,44.44
meteor,0.380463,0.421025,0.333329,0.412354,0.399773,0.380756,0.372728,0.361045,0.404062,0.390495
ter,0.537692,0.47048,0.617078,0.36707,0.464433,0.535855,0.573584,0.615279,0.565714,0.636231


In [7]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/Tilburg_University-2_all-cat.txt | tail -10

novel castle is english language
. eric flint was born in burbank , california .
macmillan publishers is the parent company of farrar , straus and giroux
john cowper powys notablework a glastonbury romance
soho press , united states .
, published by faber and faber
asian americans are an ethnic group .
english is the language of the united states .
weymouth sands was preceded by a glastonbury romance
the manager of a . c . chievo verona is rolando maran


## PKUWriter

<a href="../data/webnlg2017/submissions/pkuwriter/PKUWriter_system.pdf">report</a>

In [8]:
team = 'pkuwriter'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/PKUWriter_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,39.88,51.23,25.36,53.15,45.34,34.95,36.84,34.17,52.89,52.86
meteor,0.317421,0.377294,0.248942,0.415371,0.35551,0.301366,0.292635,0.274453,0.389259,0.381921
ter,0.553001,0.450055,0.67459,0.371388,0.481268,0.601907,0.589416,0.623402,0.425865,0.466246


In [9]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/PKUWriter_all-cat.txt | tail -10

the novel into battle is written by the english language .
john cowper powys was born in burbank , california .
the parent company of the macmillan publishers press is the macmillan publishers company of the macmillan publishers .
the novel owen glendower is a notable work by john cowper powys .
soho press is located in united states .
the polish academy for writtenwork is referencenumber _ in _ the _ national _ register _ of _ historic _ places .
asian americans are an ethnic group in the united states .
english is the language spoken in the united states .
the novel into battle is preceded by a glastonbury romance .
rolando maran manages the a . c . chievo verona .


## UPF-FORGe

<a href="../data/webnlg2017/submissions/upf/WebNLG_V0.2.pdf">report</a>

In [10]:
team = 'upf-forge'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/UPF-TALN_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,38.65,40.88,35.7,60.29,43.24,36.84,35.54,32.66,38.33,37.84
meteor,0.391077,0.405867,0.373621,0.469279,0.4097,0.383582,0.376883,0.367748,0.402804,0.399411
ter,0.559327,0.559126,0.559565,0.270624,0.467734,0.567642,0.622324,0.665456,0.598195,0.654687


In [11]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/UPF-TALN_all-cat.txt | tail -10

the english language is spoken in castle ( novel ) .
eric flint was born in burbank ( california ) .
the parent company of farrar , straus and giroux is macmillan publishers .
a glastonbury romance wrote john cowper powys .
soho press is published in the united states .
the publisher of the secret scripture is faber and faber .
a ethnic group of the united states are asian americans .
the english language is spoken in the united states .
weymouth sands follows a glastonbury romance .
rolando maran is the manager of a . c . chievo verona .


## Tilb-Pipeline

<a href="../data/webnlg2017/submissions/tilburg/report.pdf">report</a>

In [12]:
team = 'tilb-pipeline'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/Tilburg_University-3_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,35.29,44.34,20.65,44.81,34.84,33.91,34.05,33.03,39.83,40.68
meteor,0.30834,0.382392,0.216254,0.323192,0.318193,0.307431,0.29762,0.296692,0.358336,0.387253
ter,0.566023,0.488559,0.657518,0.475854,0.515267,0.559991,0.57812,0.62988,0.571128,0.604177


In [13]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/Tilburg_University-3_all-cat.txt | tail -10

castle is in english .
eric flint was born in burbank , california .
macmillan publishers owns farrar , straus , and giroux .
a glastonbury romance is a notable work by john cowper powys .
soho press is in united states .
the secret scripture publisher of faber and faber .
asian americans live in united states .
united states is in english .
weymouth sands was preceded by a glastonbury romance .
rolando maran manages a . c . chievo verona .


## Tilb-NMT

<a href="../data/webnlg2017/submissions/tilburg/report.pdf">report</a>

In [14]:
team = 'tilb-nmt'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/Tilburg_University-1_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,34.6,43.28,25.12,50.33,37.13,31.85,32.92,28.64,39.88,34.26
meteor,0.348784,0.382624,0.310575,0.447765,0.38422,0.345341,0.330702,0.306137,0.346942,0.338627
ter,0.609501,0.514568,0.721628,0.429996,0.58442,0.646517,0.622947,0.66597,0.585564,0.616804


In [15]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/Tilburg_University-1_all-cat.txt | tail -10

english is the language of castle .
eric flint was born in burbank , california .
macmillan publishers is the parent company of farrar , straus , and giroux .
a glastonbury romance is a notable work by john cowper powys .
soho press is located in united states .
th secret scriptures was published by faber and faber .
asian americans are an ethnic group in united states .
english is the language of united states .
weymouth sands was preceded by a glastonbury romance .
the manager of a . c . chievo verona is rolando maran .


## Baseline

In [16]:
team = 'baseline'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/Baseline_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,33.24,52.39,6.13,42.36,33.83,30.46,31.11,28.88,53.25,53.81
meteor,0.235436,0.37772,0.075353,0.277388,0.241359,0.222312,0.221627,0.21037,0.368557,0.378776
ter,0.61308,0.448648,0.807292,0.491072,0.575508,0.639728,0.653365,0.669054,0.426767,0.402137


In [17]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/Baseline_all-cat.txt | tail -10

the english language is spoken in the republic of ireland .
author was born in burbank , california .
the parent company of wiley - blackwell is macmillan publishers .
a glastonbury romance was written by author .
lahore is in the united states .
the parent company of wiley - blackwell is parentcompany .
asian americans are an ethnic group in the united states .
english is the language of the united states .
a glastonbury romance was preceded by precededby .
the manager of a . c . chievo verona is rolando maran .


## Adapt

<a href="../data/webnlg2017/submissions/adaptCenter/WebNLG_system.pdf">report</a>

In [18]:
team = 'adapt'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/ADAPT_Centre_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,31.06,60.59,10.53,48.13,33.77,27.78,27.56,26.35,62.03,62.96
meteor,0.31598,0.445389,0.192333,0.378119,0.326372,0.306462,0.296947,0.290825,0.432208,0.435427
ter,0.849266,0.378763,1.404981,0.442952,0.727348,0.935833,0.982271,0.99695,0.370827,0.372997


In [19]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/ADAPT_Centre_all-cat.txt | tail -10

the novel castle is the language spoken in the english language .
eric flint was born in burbank , california .
macmillan publishers is the parent company of farrar , straus and giroux .
a glastonbury romance is the notable work of john cowper powys .
soho press is located in the united states .
the novel the secret scriptures , is published by faber and faber .
asian americans are an ethnic group in the u . s .
english is the language of the united states .
a glastonbury romance is the sequel to weymouth sands .
the manager of a . c . chievo verona is rolando maran .


## UIT-VNU

<a href="../data/webnlg2017/submissions/uit-danglt-clnlp/Phong-Nguyen-Tran_Dang-Tuan-Nguyen.docx">report</a>

In [20]:
team = 'uit-vnu'
results[team] = {}

for eval_set in EVALUATION_SETS:
    
    results[team][eval_set] = evaluate_texts(f'../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/UIT-DANGNT-CLNLP_{eval_set}.txt', eval_set)
    
df = pd.DataFrame(results[team])
df

Unnamed: 0,all-cat,old-cat,new-cat,1size,2size,3size,4size,5size,6size,7size
bleu,7.07,19.87,0.11,44.65,16.52,5.69,0.31,0.01,0.0,0.0
meteor,0.099207,0.153499,0.033486,0.328469,0.156637,0.091948,0.042366,0.021128,0.0,0.0
ter,0.827492,0.785071,0.877597,0.62515,0.764153,0.831528,0.866558,0.896699,0.929323,0.929577


In [21]:
!head -100 ../evaluation/webnlg2017/webnlg-automatic-evaluation/teams/UIT-DANGNT-CLNLP_all-cat.txt | tail -10

english language is one of the languages that is spoken in castle ( novel ) .
the birth place of eric flint is burbank , california .
macmillan publishers is the parent company of the farrar , straus and giroux .
null
soho press is located within the country of the united states .
the book the secret scripture was published by faber and faber .
asian americans are part of one of the ethnic groups in the united states .
english language is one of the languages that is spoken in united states .
a glastonbury romance was written prior to weymouth sands .
rolando maran is the manager of the a . c . chievo verona .


# All results

In [40]:
dfs = []
for k, v in results.items():
    
    df = pd.DataFrame(v).T
    
    df = pd.concat([df], keys=[k], names=['team'])
    
    dfs.append(df)

In [43]:
df = pd.concat(dfs)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,bleu,meteor,ter
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
melbourne,all-cat,45.13,0.376396,0.474403
melbourne,old-cat,54.52,0.414642,0.402598
melbourne,new-cat,33.27,0.332213,0.559212
melbourne,1size,63.81,0.471922,0.274120
melbourne,2size,51.25,0.421393,0.389833
melbourne,3size,43.42,0.376657,0.470233
melbourne,4size,42.14,0.358887,0.520130
melbourne,5size,36.72,0.327765,0.579703
melbourne,6size,52.16,0.378531,0.469173
melbourne,7size,48.65,0.355614,0.467217


In [44]:
df.to_csv('all_evaluation_results.csv')