In [1]:
%matplotlib inline

import spacy
from spacy import displacy
import networkx as nx

%run ../script/webnlg.py

train_dev = WebNLGCorpus.load(['train', 'dev'])

# Makes graphs from dependency trees

In [2]:
def make_graph(doc):
    
    g = nx.DiGraph()
    
    for token in doc:

        if token.dep_ != 'ROOT':

            g.add_edge(token.head, token, dep=token.dep_)
            g.nodes[token.head]['token'] = token.head
            g.nodes[token]['token'] = token
            
    return g

# Calculate graph distances between dependency trees, using word embedding

In [3]:
from itertools import islice
import numpy as np
from networkx.algorithms import similarity

def node_we_distance(x, y):
    
    x, y = x['token'], y['token']
    
    sim = x.similarity(y)
    
    return 1 - sim


def edge_dep_distance(x, y):
    
    return x['dep'] == y['dep']


def calculate_distance(hypothesis_text, g, doc=None, n=10):
    
    hypothesis_doc = nlp(hypothesis_text)
    hypothesis_g = make_graph(hypothesis_doc)    
    
    print(f'doc 1: "{hypothesis_doc}"')
    if doc is not None:
        print(f'doc 2: "{doc}"')

    sim = list(islice(similarity.optimize_graph_edit_distance(hypothesis_g, g, 
                                                              node_subst_cost=node_we_distance,
                                                              edge_subst_cost=edge_dep_distance), 0, n))[-1]

    print(f'Edit distance = {sim}')

    print("\n")
    
    return sim
    
    
def calculate_distances(hypothesis_text, gs, docs, n=10):
    
    sims = [calculate_distance(hypothesis_text, g, doc, n) for g, doc in zip(gs, docs)]
    
    print(f'Mean: {np.mean(sims)}')
    print(f'Min: {min(sims)}')
    print(f'Std: {np.std(sims)}')

# Example

In [4]:
nlp = spacy.load('en_core_web_lg')

In [5]:
# sample
e = train_dev.sample(idx='5_40')

e

Triple info: {'category': 'Food', 'eid': 'Id41', 'idx': '5_40', 'ntriples': 1}

	Modified triples:

Asam_pedas | alternativeName | "Asam padeh"


	Lexicalizations:

The alternative name for asam pedas is asam padeh.
Asam padeh is also known as Asam pedas.
An alternative name for Asam pedas is Asam padeh.

In [6]:
# generate docs for the lexicalizations

docs = [nlp(lex) for lex in e.lexes()]
gs = [make_graph(doc) for doc in docs]

# Calculating graph similarities between dependencies trees and word embeddings

In [13]:
N = 5

In [14]:
%%time
text = "I call Asam pedas as Asam padeh."
calculate_distances(text, gs, docs, n=N)

doc 1: "I call Asam pedas as Asam padeh."
doc 2: "The alternative name for asam pedas is asam padeh."
Edit distance = 14.949914246797562


doc 1: "I call Asam pedas as Asam padeh."
doc 2: "Asam padeh is also known as Asam pedas."
Edit distance = 13.249835312366486


doc 1: "I call Asam pedas as Asam padeh."
doc 2: "An alternative name for Asam pedas is Asam padeh."
Edit distance = 14.96083489060402


Mean: 14.386861483256022
Min: 13.249835312366486
Std: 0.8040112769031199
CPU times: user 3.98 s, sys: 562 ms, total: 4.55 s
Wall time: 3.79 s


In [15]:
%%time
text = "The New York Times is a good online newspaper."
calculate_distances(text, gs, docs, n=N)

doc 1: "The New York Times is a good online newspaper."
doc 2: "The alternative name for asam pedas is asam padeh."
Edit distance = 18.768319211900234


doc 1: "The New York Times is a good online newspaper."
doc 2: "Asam padeh is also known as Asam pedas."
Edit distance = 20.114024467766285


doc 1: "The New York Times is a good online newspaper."
doc 2: "An alternative name for Asam pedas is Asam padeh."
Edit distance = 19.751934356987476


Mean: 19.54475934555133
Min: 18.768319211900234
Std: 0.5685782109431989
CPU times: user 6.81 s, sys: 3.47 s, total: 10.3 s
Wall time: 9.62 s


In [16]:
%%time
text = e.lexes()[0]
calculate_distances(text, gs, docs, n=N)

doc 1: "The alternative name for asam pedas is asam padeh."
doc 2: "The alternative name for asam pedas is asam padeh."
Edit distance = 8.372375667095184


doc 1: "The alternative name for asam pedas is asam padeh."
doc 2: "Asam padeh is also known as Asam pedas."
Edit distance = 9.247735172510147


doc 1: "The alternative name for asam pedas is asam padeh."
doc 2: "An alternative name for Asam pedas is Asam padeh."
Edit distance = 8.302152782678604


Mean: 8.640754540761312
Min: 8.302152782678604
Std: 0.4301565055206515
CPU times: user 3min 12s, sys: 609 ms, total: 3min 12s
Wall time: 3min 15s


In [17]:
%%time
text = e.lexes()[1]
calculate_distances(text, gs, docs, n=N)

doc 1: "Asam padeh is also known as Asam pedas."
doc 2: "The alternative name for asam pedas is asam padeh."
Edit distance = 12.0442223995924


doc 1: "Asam padeh is also known as Asam pedas."
doc 2: "Asam padeh is also known as Asam pedas."
Edit distance = 6.690341472625732


doc 1: "Asam padeh is also known as Asam pedas."
doc 2: "An alternative name for Asam pedas is Asam padeh."
Edit distance = 11.975278481841087


Mean: 10.23661411801974
Min: 6.690341472625732
Std: 2.5077513928552384
CPU times: user 2.78 s, sys: 391 ms, total: 3.17 s
Wall time: 2.57 s


In [18]:
%%time
text = e.lexes()[2]
calculate_distances(text, gs, docs, n=N)

doc 1: "An alternative name for Asam pedas is Asam padeh."
doc 2: "The alternative name for asam pedas is asam padeh."
Edit distance = 8.302152782678604


doc 1: "An alternative name for Asam pedas is Asam padeh."
doc 2: "Asam padeh is also known as Asam pedas."
Edit distance = 9.164589792490005


doc 1: "An alternative name for Asam pedas is Asam padeh."
doc 2: "An alternative name for Asam pedas is Asam padeh."
Edit distance = 8.23193085193634


Mean: 8.56622447570165
Min: 8.23193085193634
Std: 0.4240782704019897
CPU times: user 3min 18s, sys: 562 ms, total: 3min 18s
Wall time: 3min 26s


# Experiment

I'll apply that metric over the texts generated by one of the competitors, and the reference texts. 

Then, I'll analyze the correlation of the calculated values and the scores given by humans.

## Loads Melbourn texts

In [5]:
import pandas as pd

sample_ids = pd.read_csv('../data/webnlg2017/webnlg-human-evaluation-master/sample-ids.txt', header=None)
sample_ids.columns = ['eid']
sample_ids['eid'] = 'Id' + sample_ids.eid.astype('str')

sample_ids.head()

Unnamed: 0,eid
0,Id18
1,Id27
2,Id37
3,Id40
4,Id41


In [6]:
texts_by_melbourn = pd.read_csv('../data/webnlg2017/submissions/melbourne/final_result.txt', sep='&&&&', engine='python', header=None)

texts_by_melbourn.head()

Unnamed: 0,0
0,abilene regional airport serves the city of ab...
1,adolfo suárez madrid–barajas airport is locate...
2,18l/36r is the runway name of adolfo suárez ma...
3,the icao location identifier of afonso pena in...
4,afonso pena international airport serves the c...


In [7]:
df = pd.DataFrame({'eid': sample_ids.eid, 'text': texts_by_melbourn[0]})

df.head()

Unnamed: 0,eid,text
0,Id18,abilene regional airport serves the city of ab...
1,Id27,adolfo suárez madrid–barajas airport is locate...
2,Id37,18l/36r is the runway name of adolfo suárez ma...
3,Id40,the icao location identifier of afonso pena in...
4,Id41,afonso pena international airport serves the c...


## Loads reference texts

In [8]:
test_with_lex = WebNLGCorpus.load('test_with_lex')

## Let's do everythin in a loop 

In [9]:
test_with_lex.edf.head()

Unnamed: 0,category,content,eid,idx,ntriples,size
0,Airport,"b'<entry category=""Airport"" eid=""Id1"" size=""1""...",Id1,0_0,1,1
1,Airport,"b'<entry category=""Airport"" eid=""Id2"" size=""1""...",Id2,0_1,1,1
2,Airport,"b'<entry category=""Airport"" eid=""Id3"" size=""1""...",Id3,0_2,1,1
3,Airport,"b'<entry category=""Airport"" eid=""Id4"" size=""1""...",Id4,0_3,1,1
4,Airport,"b'<entry category=""Airport"" eid=""Id5"" size=""1""...",Id5,0_4,1,1


In [10]:
references = pd.merge(df, pd.merge(test_with_lex.edf, test_with_lex.ldf))

references.head()

Unnamed: 0,eid,text,category,content,idx,ntriples,size,comment,lid,ltext
0,Id18,abilene regional airport serves the city of ab...,Airport,"b'<entry category=""Airport"" eid=""Id18"" size=""1...",0_17,1,1,good,Id1,The Antwerp International Airport is operated ...
1,Id18,abilene regional airport serves the city of ab...,Airport,"b'<entry category=""Airport"" eid=""Id18"" size=""1...",0_17,1,1,good,Id2,The operating organisation of Antwerp Internat...
2,Id18,abilene regional airport serves the city of ab...,Airport,"b'<entry category=""Airport"" eid=""Id18"" size=""1...",0_17,1,1,good,Id3,Antwerp International Airport is operated by t...
3,Id27,adolfo suárez madrid–barajas airport is locate...,Airport,"b'<entry category=""Airport"" eid=""Id27"" size=""1...",0_26,1,1,good,Id1,Denmark demonym is Danes.
4,Id27,adolfo suárez madrid–barajas airport is locate...,Airport,"b'<entry category=""Airport"" eid=""Id27"" size=""1...",0_26,1,1,good,Id2,The inhabitants of Denmark have the demonym of...


In [12]:
eid_ltexts = references.groupby('eid').apply(lambda r: r.ltext.tolist()).to_frame()
eid_ltexts = pd.merge(df, eid_ltexts, left_on='eid', right_index=True)

eid_ltexts.head()

Unnamed: 0,eid,text,0
0,Id18,abilene regional airport serves the city of ab...,[The Antwerp International Airport is operated...
1,Id27,adolfo suárez madrid–barajas airport is locate...,"[Denmark demonym is Danes., The inhabitants of..."
2,Id37,18l/36r is the runway name of adolfo suárez ma...,[The USAF was involved in the 1986 bombing of ...
3,Id40,the icao location identifier of afonso pena in...,"[To the southeast of Adams County, Pennsylvani..."
4,Id41,afonso pena international airport serves the c...,"[Artur Rasizade was an Azerbaijan leader., Art..."


In [None]:
%%time

from tqdm import tqdm_notebook
import numpy as np

def calculate_distance(hypothesis_text, g, doc=None, n=10):
    
    hypothesis_doc = nlp(hypothesis_text)
    hypothesis_g = make_graph(hypothesis_doc)    
    
    sim = list(islice(similarity.optimize_graph_edit_distance(hypothesis_g, g), 0, n))[-1]
    
    print(sim)

    return sim

def calculate_distances(hypothesis_text, gs, docs, n=10):
    
    sims = [calculate_distance(hypothesis_text, g, doc, n) for g, doc in zip(gs, docs)]
    
    return min(sims), np.mean(sims), max(sims) - min(sims)


beleus = []
celeus = []
deleus = []

for idx, row in tqdm_notebook(eid_ltexts.iterrows()):
    
    print(idx)
    
    hypothesis = row['text']
    references = row[0]
    
    docs = [nlp(lex) for lex in references]
    gs = [make_graph(doc) for doc in docs]
    
    print(idx)
    
    min_d, avg_d, ran_d = calculate_distances(hypothesis, gs, docs, n=2)
    
    beleus.append(min_d)
    celeus.append(avg_d)
    deleus.append(ran_d)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

0
0
4.0
14.0
12.0
1
1
40.0
42.0


In [107]:
metric_scores = pd.DataFrame({'eid': eid_ltexts.eid, 'beleu': beleus, 'celeu': celeus, 'deleu': deleus})

metric_scores.head()

Unnamed: 0,beleu,celeu,deleu,eid
0,18.389785,20.944733,5.142508,Id18
1,44.551278,45.135561,1.655182,Id27
2,24.476887,34.309329,15.784468,Id37
3,24.76563,26.001369,3.415968,Id40
4,17.108603,19.673967,4.343514,Id41


## Human scores

In [91]:
human_scores = pd.read_csv('../data/webnlg2017/webnlg-human-evaluation-master/all_data_final_averaged.csv')

human_scores.head()

Unnamed: 0,id,mr,team,text,category,type,bleu,meteor,ter,systemtype,triplesize,fluency,grammar,semantics
0,1,(29075)_1950_DA | discoverer | Carl_A._Wirtanen,adapt,"the 29075 club is the dictcoverer, carl a. wir...",CelestialBody,unseen,0.232,0.185956,90.909,neural,1triple,1.666667,1.666667,1.333333
1,2,(29075)_1950_DA | discoverer | Carl_A._Wirtanen,baseline,the administrative government is governed by t...,CelestialBody,unseen,0.21,0.046764,90.909,neural,1triple,2.75,2.75,1.0
2,3,(29075)_1950_DA | discoverer | Carl_A._Wirtanen,melbourne,1950 da is carl a. wirtanen.,CelestialBody,unseen,0.126,0.32036,81.818,neural,1triple,2.0,2.333333,1.0
3,4,(29075)_1950_DA | discoverer | Carl_A._Wirtanen,pkuwriter,carl a. wirtanen discovered (29075) 1950 da.,CelestialBody,unseen,0.161,0.561305,54.545,neural,1triple,3.0,2.666667,2.333333
4,5,(29075)_1950_DA | discoverer | Carl_A._Wirtanen,tilburg-nmt,the chair of (29075) 1950 da is carl a. wirtanen.,CelestialBody,unseen,0.225,0.430547,81.818,neural,1triple,1.75,2.25,1.5


In [92]:
melbourn_scores = human_scores[human_scores.team == 'melbourne']

melbourn_scores.head()

Unnamed: 0,id,mr,team,text,category,type,bleu,meteor,ter,systemtype,triplesize,fluency,grammar,semantics
2,3,(29075)_1950_DA | discoverer | Carl_A._Wirtanen,melbourne,1950 da is carl a. wirtanen.,CelestialBody,unseen,0.126,0.32036,81.818,neural,1triple,2.0,2.333333,1.0
12,13,"1001_Gaussia | formerName | ""1923 OAA907 XC""",melbourne,1001 gaussia is 1923 oaa907 xc.,CelestialBody,unseen,0.197,0.390154,52.941,neural,1triple,2.0,1.666667,1.666667
22,23,101_Helena | apoapsis | 441092000.0 (kilometres),melbourne,101 helena is 441092000.0.,CelestialBody,unseen,0.077,0.366728,68.571,neural,1triple,2.333333,2.333333,2.0
32,33,101_Helena | discoverer | James_Craig_Watson<b...,melbourne,"101 helena, which is located in madison, wisco...",CelestialBody,unseen,0.129,0.283442,70.787,neural,5triple,2.0,2.666667,1.333333
42,43,101_Helena | discoverer | James_Craig_Watson<b...,melbourne,peritonitis is the parent company of james cra...,CelestialBody,unseen,0.184,0.355345,108.333,neural,2triple,2.333333,2.666667,2.333333


In [95]:
eid_mr = pd.merge(test_with_lex.edf, test_with_lex.mdf).groupby('eid').apply(lambda r: '<br>'.join(r.mtext.tolist())).to_frame().reset_index()
eid_mr.columns = ['eid', 'mr']

eid_mr.head()

Unnamed: 0,eid,mr
0,Id1,Abilene_Regional_Airport | cityServed | Abilen...
1,Id10,Amsterdam_Airport_Schiphol | 1st_runway_Number...
2,Id100,A.C._Chievo_Verona | manager | Rolando_Maran
3,Id1000,"Alberto_Teisaire | profession | ""Rear Admiral ..."
4,Id1001,Alfons_Gorbach | deathPlace | Austria


In [97]:
melbourn_scores = pd.merge(melbourn_scores, eid_mr)

melbourn_scores[melbourn_scores.eid == 'Id18']

Unnamed: 0,id,mr,team,text,category,type,bleu,meteor,ter,systemtype,triplesize,fluency,grammar,semantics,eid
114,1143,Antwerp_International_Airport | operatingOrgan...,melbourne,antwerp international airport is operated by f...,Airport,seen,0.551,0.534247,27.273,neural,1triple,3.0,2.666667,3.0,Id18


## Juntando tudo

In [108]:
df = pd.merge(melbourn_scores, metric_scores)

df.head()

Unnamed: 0,id,mr,team,text,category,type,bleu,meteor,ter,systemtype,triplesize,fluency,grammar,semantics,eid,beleu,celeu,deleu
0,3,(29075)_1950_DA | discoverer | Carl_A._Wirtanen,melbourne,1950 da is carl a. wirtanen.,CelestialBody,unseen,0.126,0.32036,81.818,neural,1triple,2.0,2.333333,1.0,Id1175,19.231125,21.332002,3.967142
1,13,"1001_Gaussia | formerName | ""1923 OAA907 XC""",melbourne,1001 gaussia is 1923 oaa907 xc.,CelestialBody,unseen,0.197,0.390154,52.941,neural,1triple,2.0,1.666667,1.666667,Id1181,28.019888,28.934797,1.561319
2,23,101_Helena | apoapsis | 441092000.0 (kilometres),melbourne,101 helena is 441092000.0.,CelestialBody,unseen,0.077,0.366728,68.571,neural,1triple,2.333333,2.333333,2.0,Id1183,21.628879,22.037831,0.77967
3,33,101_Helena | discoverer | James_Craig_Watson<b...,melbourne,"101 helena, which is located in madison, wisco...",CelestialBody,unseen,0.129,0.283442,70.787,neural,5triple,2.0,2.666667,1.333333,Id1782,51.400418,58.970679,12.561227
4,43,101_Helena | discoverer | James_Craig_Watson<b...,melbourne,peritonitis is the parent company of james cra...,CelestialBody,unseen,0.184,0.355345,108.333,neural,2triple,2.333333,2.666667,2.333333,Id1325,23.524821,24.915103,2.780565


In [109]:
df[['bleu', 'meteor', 'ter', 'fluency', 'grammar', 'semantics', 'beleu', 'celeu', 'deleu']].corr()

Unnamed: 0,bleu,meteor,ter,fluency,grammar,semantics,beleu,celeu,deleu
bleu,1.0,0.65875,-0.801,0.349453,0.299268,0.46216,-0.261783,-0.25516,-0.065897
meteor,0.65875,1.0,-0.663177,0.349041,0.289786,0.473817,-0.330934,-0.352937,-0.25207
ter,-0.801,-0.663177,1.0,-0.363134,-0.291421,-0.504112,0.233977,0.246003,0.155861
fluency,0.349453,0.349041,-0.363134,1.0,0.778241,0.484663,-0.350324,-0.36075,-0.176659
grammar,0.299268,0.289786,-0.291421,0.778241,1.0,0.394372,-0.285349,-0.294406,-0.129498
semantics,0.46216,0.473817,-0.504112,0.484663,0.394372,1.0,-0.202848,-0.208257,-0.096382
beleu,-0.261783,-0.330934,0.233977,-0.350324,-0.285349,-0.202848,1.0,0.986455,0.278091
celeu,-0.25516,-0.352937,0.246003,-0.36075,-0.294406,-0.208257,0.986455,1.0,0.425826
deleu,-0.065897,-0.25207,0.155861,-0.176659,-0.129498,-0.096382,0.278091,0.425826,1.0
