In [1]:
import os

os.sys.path.insert(0, '../script')

In [2]:
from data_alignment import RootDataAlignmentModel, NGramDataAlignmentModel, SPODataAlignmentModel
from textacy import similarity
import spacy
from spacy import displacy
import logging

for clazz in [RootDataAlignmentModel, NGramDataAlignmentModel, SPODataAlignmentModel]:

    logger = logging.getLogger(clazz.__name__)
    handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
    logger.addHandler(handler)
    logger.setLevel(logging.DEBUG)

In [3]:
nlp = spacy.load('en_core_web_lg')

In [4]:
rda = RootDataAlignmentModel(similarity.jaro_winkler, nlp)
rda.render_aligned('Abelardo Vieira Mota is eating apple',
                   {
                        'm_subject': 'Abelardo Vieira Mota',
                        'm_predicate': 'eat',
                        'm_object': 'apple'
                   })

2018-10-27 15:15:13,186 - RootDataAlignmentModel - DEBUG - Initialized with similarity_metric [<function jaro_winkler at 0x7f42c3a29a60>], nlp = [<spacy.lang.en.English object at 0x7f42c3a2d7f0>]
2018-10-27 15:15:13,205 - RootDataAlignmentModel - DEBUG - Aligning [Abelardo Vieira Mota is eating apple] with [{'m_subject': 'Abelardo Vieira Mota', 'm_predicate': 'eat', 'm_object': 'apple'}]
2018-10-27 15:15:13,206 - RootDataAlignmentModel - DEBUG - similarities 
{'m_subject': [(Abelardo Vieira Mota, 1.0), (Mota, 0.43333333333333335), (is, 0.0), (apple, 0.3333333333333333), (Abelardo, 0.96), (Vieira, 0.4388888888888889)], 'm_predicate': [(Abelardo Vieira Mota, 0.4611111111111111), (Mota, 0.5555555555555555), (is, 0.0), (apple, 0.5111111111111111), (Abelardo, 0.4861111111111111), (Vieira, 0.5)], 'm_object': [(Abelardo Vieira Mota, 0.3333333333333333), (Mota, 0.0), (is, 0.0), (apple, 1.0), (Abelardo, 0.3833333333333333), (Vieira, 0.45555555555555555)]}
2018-10-27 15:15:13,207 - RootDataAlign

In [5]:
nda = NGramDataAlignmentModel(similarity_metric=similarity.jaccard, nlp=nlp)
nda.render_aligned('Abelardo Vieira Mota is eating apple',
                   {
                        'm_subject': 'Abelardo Vieira Mota',
                        'm_predicate': 'eat',
                        'm_object': 'apple'
                   })

2018-10-27 15:15:13,240 - NGramDataAlignmentModel - DEBUG - Similarities from m_subject [(Abelardo, (Abelardo, 0.6153846153846154)), (Vieira, (Vieira, 0.38461538461538464)), (Mota, (Mota, 0.3076923076923077)), (is, (is, 0.07142857142857142)), (eating, (eating, 0.26666666666666666)), (apple, (apple, 0.21428571428571427)), (Abelardo Vieira, (Abelardo Vieira, 0.8461538461538461)), (Vieira Mota, (Vieira Mota, 0.6923076923076923)), (Mota is, (Mota is, 0.42857142857142855)), (is eating, (is eating, 0.3125)), (eating apple, (eating apple, 0.375)), (Abelardo Vieira Mota, (Abelardo Vieira Mota, 1.0)), (Vieira Mota is, (Vieira Mota is, 0.6428571428571429)), (Mota is eating, (Mota is eating, 0.4375)), (is eating apple, (is eating apple, 0.35294117647058826)), (Abelardo Vieira Mota is, (Abelardo Vieira Mota is, 0.9285714285714286)), (Vieira Mota is eating, (Vieira Mota is eating, 0.5625)), (Mota is eating apple, (Mota is eating apple, 0.47058823529411764))]
2018-10-27 15:15:13,242 - NGramDataAlign

In [6]:
spo = SPODataAlignmentModel(nlp=nlp)
spo.render_aligned('Abelardo Vieira Mota is eating apple',
                   {
                        'm_subject': 'Abelardo Vieira Mota',
                        'm_predicate': 'eat',
                        'm_object': 'apple'
                   })

2018-10-27 15:15:13,269 - SPODataAlignmentModel - DEBUG - Identified subject = [Abelardo Vieira Mota], predicate = [is eating], object = [apple]
