In [2]:
%load_ext autoreload
%autoreload 2
import os

os.sys.path.insert(0, '../script')

from evaluation import evaluate_model
from collections import ChainMap, defaultdict, Counter
from template_based import *
import re

In [3]:
PARENTHESIS_RE = re.compile(r'(.*?)\((.*?)\)')
CAMELCASE_RE = re.compile(r'([a-z])([A-Z])')

def preprocess_so(so):

    parenthesis_preprocessed = PARENTHESIS_RE.sub('\g<2> \g<1>', so)
    underline_removed = parenthesis_preprocessed.replace('_', ' ')
    camelcase_preprocessed = CAMELCASE_RE.sub('\g<1> \g<2>', underline_removed)

    return camelcase_preprocessed.strip('" ')

In [4]:
class TemplateBasedModel:
    
    def __init__(self, template_db):
        
        fallback_template_db = defaultdict(lambda: Counter([JustJoinTemplate()]))
        
        self.template_db = ChainMap(template_db, fallback_template_db)
        self.ss = StructureData(self.template_db)
        self.st = SelectTemplate()
        self.mt = MakeText(preprocess_so)
    
    def predict(self, X):
        
        result = []
        
        for x in X:
            
            structured_data = self.ss.structure(x)
            selected_templates = self.st.select_template(structured_data)
            text = self.mt.make_text(selected_templates)
            
            result.append(text)
        
        return result

In [13]:
from reading_thiagos_templates import read_thiagos_templates

template_db = read_thiagos_templates()

In [15]:
tbm = TemplateBasedModel(template_db)

In [17]:
evaluate_model(tbm, 'template-based-model-w-steroids')

{'bleu': 38.05, 'meteor': 0.39508048381289684, 'ter': 0.6373246618306355}

In [20]:
!head -20 ../model/template-based-model-w-steroids.txt

Abilene Regional Airport serves the city of Abilene, Texas .
Adolfo Suárez Madrid–Barajas Airport is located in Madrid, Paracuellos de Jarama, San Sebastián de los Reyes and Alcobendas .
The runway name of Adolfo Suárez Madrid–Barajas Airport is 18L/36R .
The ICAO Location Identifier of Afonso Pena International Airport is SBCT .
Afonso Pena International Airport serves the city of Curitiba .
Al-Taqaddum Air Base serves the city of Fallujah .
The runway length of Al-Taqaddum Air Base is 3684.0 .
The runway name of Alderney Airport is 14/32 .
The runway length of Allama Iqbal International Airport is 3360.12 .
Amsterdam Airport Schiphol 1st runway is Number 18 .
Amsterdam Airport Schiphol 5th runway Surface Type Asphalt.
The runway name of Amsterdam Airport Schiphol is 06/24 'Kaagbaan' .
Andrews County Airport is 973.0 metres above sea level .
Andrews County Airport is owned by Andrews County, Texas .
The runway length of Andrews County Airport is 896.0 .
The 1st runway a

In [9]:
def get_structure_for_predicate(predicate):
    
    s0 = Slot('', [])
    p0 = Predicate(predicate, [s0])
    s1 = Slot('', [p0])
    
    return Structure(s1)

In [18]:
template_db[get_structure_for_predicate('birthDate')]

Counter({<AGENT-1, birthDate, PATIENT-1>
         {AGENT-1} was born on {PATIENT-1} .: 19,
         <AGENT-1, birthDate, PATIENT-1>
         {AGENT-1} was born {PATIENT-1} .: 4,
         <AGENT-1, birthDate, PATIENT-1>
         {AGENT-1} 's birth date was {PATIENT-1} .: 1,
         <AGENT-1, birthDate, PATIENT-1>
         {AGENT-1} 's birth date is {PATIENT-1} .: 1})