In [1]:
import os
os.sys.path.insert(0, '../script')
from webnlg import WebNLGCorpus
from evaluation import *
from lexicalization import *
from sentence_generation import *
from content_selection import SelectAllContentSelection
from collections import defaultdict, Counter
from template_extraction import *
from discourse_structuring import DoesntSortDiscourseStructuring
from sentence_aggregation import OneSentenceAggregator
from text_generation import TemplateBasedTextGenerator
import re

In [2]:
v_12 = WebNLGCorpus.load(['dev_1.2', 'train_1.2'])
v_12_ntriples_1 = v_12.subset(ntriples=1)

# content selection
cs = SelectAllContentSelection()

# sentence generation
t = ManualTemplateExtract()
template_db = t.extract(v_12_ntriples_1)

mfe = MostFrequentTemplateSentenceGenerator()
mfe.fit(template_db=template_db)

fsg = FallBackPipelineSentenceGenerator([mfe, JustJoinTripleSentenceGenerator()])


# lexicalization
la = LexicalizeAsAligned()
la.fit(v_12_ntriples_1)

# discourse structuring
ds = DoesntSortDiscourseStructuring()

# sentence aggregation
sa = OneSentenceAggregator()

nlg = TemplateBasedTextGenerator(cs, fsg, la, ds, sa)

In [3]:
evaluate_model(nlg, 'ola')

{'bleu': 41.29, 'meteor': 0.3858469043799935, 'ter': 0.6043062477150711}

# Examples

In [4]:
test = WebNLGCorpus.load(['test_with_lex'])

## Example 1

In [28]:
sample = test.sample(ntriples=3)

text = nlg.predict_entry(sample.get_data())

sample

Triple info: category=Building eid=Id471

	Modified triples:

Alan_B._Miller_Hall | buildingStartDate | "30 March 2007"
Mason_School_of_Business | country | United_States
Alan_B._Miller_Hall | currentTenants | Mason_School_of_Business


	Lexicalizations:

Alan B. Miller Hall was started on March 30,2007 and has The Mason School of Business in the U.S. as a tenant. || 
Alan B. Miller Hall's building opened in 30th March 2007. The Mason School of Business in the United States are the current tenants of Alan B Miller Hall. || 

In [29]:
text

'The construction of alan b miller hall began in 30th march 2007 . the mason school of business is in the united states . the mason school of business are the current tenants of alan b miller hall .'

## Example 2

In [30]:
sample = test.sample()

text = nlg.predict_entry(sample.get_data())

sample

Triple info: category=SportsTeam eid=Id125

	Modified triples:

Michele_Marcolini | club | Atalanta_B.C.


	Lexicalizations:

Michele Marcolini has played for Atalanta BC. || 
Michele Marcolini is part of the Atalanta B.C. club. || 
Michele Marcolini plays for Atalanta B.C. || 

In [31]:
text

'michele marcolini plays for Atalanta B.C. .'