# Testing ideas

In [3]:
import os

os.sys.path.insert(0, '../script')

from template_based import Structure
from webnlg_corpus import webnlg

In [4]:
corpus = webnlg.load('webnlg_challenge_2017')

In [10]:
s = corpus.sample(ntriples=[5])
s

Triple info: Category=Food eid=Id28 idx=train_Food_5_Id28

	Modified Triples:

Arròs_negre | region | Valencian_Community
Valencian_Community | leaderName | Ximo_Puig
Spain | currency | Euro
Arròs_negre | country | Spain
Spain | leaderName | Felipe_VI_of_Spain


	Lexicalizations:

Ximo Puig is a leader in the Valencian Community where Arros negre comes from. It is a traditional dish from Spain, where the currency is the euro and the leader is Felipe VI of Spain.


The Spanish dish Arros negre comes from the region of the Valencian Community, where the leader is Ximo Puig. The leader of Spain is Felipe VI and the currency used in the country is the euro.


In [11]:
Structure.from_triples(s.data)

[Arròs_negre, 

	<region, [
		[Valencian_Community, 

			<leaderName, [Ximo_Puig]>]]>,
	<country, [
		[Spain, 

			<currency, [Euro]>,
			<leaderName, [Felipe_VI_of_Spain]>]]>]

In [12]:
t1 = template_db[Structure.from_triples([{'subject': 'A', 'predicate': 'cityServed', 'object': 'B'}])].most_common(1)[0][0]

t1

NameError: name 'template_db' is not defined

In [10]:
t2 = template_db[Structure.from_triples([{'subject': 'A', 'predicate': 'elevationAboveTheSeaLevel_(in_metres)', 'object': 'B'}])].most_common(1)[0][0]

t2

Structure: <AGENT-1, elevationAboveTheSeaLevel_(in_metres), PATIENT-1>
Text: {AGENT-1} is {PATIENT-1} metres above sea level .

In [19]:
t1.template_text, t2.template_text

('{AGENT-1} serves the city of {PATIENT-1}.',
 '{AGENT-1} is {PATIENT-1} metres above sea level .')

# (AGENT-1, p1, PATIENT-1), (AGENT-1, p2, PATIENT-1) 
# ->  
# [(AGENT-1, p1, PATIENT-1), (AGENT-1, p2, PATIENT2)]

In [48]:
import re

RE_REMOVE_FINAL_DOT = re.compile(r'\.$')
# assumes the sentence is in active voice
RE_REMOVE_AGENT_1 = re.compile(r'^.*?{AGENT-1}')

def make_text(t1, t2):
    
    t1_ = RE_REMOVE_FINAL_DOT.sub('', t1)
    
    t2_ = RE_REMOVE_AGENT_1.sub('', t2).replace('{PATIENT-1}', '{PATIENT-2}')
    
    return '{} and {}'.format(t1_, t2_)

In [31]:
def make_structure(h1, h2):
    
    o1 = Slot('PATIENT-1', [])
    p1 = Predicate(h1.predicates[0].value, [o1])
    o2 = Slot('PATIENT-2', [])
    p2 = Predicate(h2.predicates[0].value, [o2])
    
    s = Slot('AGENT-1', [p1, p2])
    
    return Structure(s)

In [41]:
def make_template(t1, t2):
    
    template_text = make_text(t1.template_text, t2.template_text)
    structure = make_structure(t1.structure.head, t2.structure.head)
    
    return Template(structure, template_text, None)

In [43]:
template1 = make_template(t1, t2)

template1

Structure: <AGENT-1, cityServed, PATIENT-1>
<AGENT-1, elevationAboveTheSeaLevel_(in_metres), PATIENT-2>
Text: {AGENT-1} serves the city of {PATIENT-1} and  is {PATIENT-2} metres above sea level .

In [46]:
template2 = make_template(t2, t1)

template2

Structure: <AGENT-1, elevationAboveTheSeaLevel_(in_metres), PATIENT-1>
<AGENT-1, cityServed, PATIENT-2>
Text: {AGENT-1} is {PATIENT-1} metres above sea level  and  serves the city of {PATIENT-2}.

In [44]:
s = Structure.from_triples([{'subject': 'A', 'predicate': 'cityServed', 'object': 'B'},
                            {'subject': 'A', 'predicate': 'elevationAboveTheSeaLevel_(in_metres)', 'object': 'C'}])

template1.fill(s, lambda x: x)

'A serves the city of B and  is C metres above sea level .'

In [47]:
s = Structure.from_triples([{'subject': 'A', 'predicate': 'elevationAboveTheSeaLevel_(in_metres)', 'object': 'B'},
                            {'subject': 'A', 'predicate': 'cityServed', 'object': 'C'}])

template2.fill(s, lambda x: x)

'A is B metres above sea level  and  serves the city of C.'

# JustJoin

In [51]:
# make a general one
class JustJoinTemplate2:

    def fill(self, data, lexicalization_f):

        s = lexicalization_f(data.head.value)
        p1 = lexicalization_f(data.head.predicates[0].value)
        o1 = lexicalization_f(data.head.predicates[0].objects[0].value)
        p2 = lexicalization_f(data.head.predicates[0].objects[0].predicates[0].value)
        o2 = lexicalization_f(data.head.predicates[0].objects[0].predicates[0].objects[0].value)

        return f'{s} {p1} {o1} {p2} {o2}.'

    def __repr__(self):
        return 'template {s} {p1} {o1} {p2} {o2}.'

In [53]:
s = Structure.from_triples([{'subject': 'A', 'predicate': 'elevationAboveTheSeaLevel_(in_metres)', 'object': 'B'},
                            {'subject': 'B', 'predicate': 'cityServed', 'object': 'C'}])

jjt = JustJoinTemplate2()

jjt.fill(s, lambda x: x)

'A elevationAboveTheSeaLevel_(in_metres) B cityServed C.'

In [54]:
s.head

<A, elevationAboveTheSeaLevel_(in_metres), B>
<B, cityServed, C>