### Import reviews

In [1]:
%load_ext autoreload
%autoreload 2

import progressbar

In [2]:
import pickle

DATA_FILE1 = 'data/data.pkl'
DATA_FILE2 = 'data/labels.pkl'
DATA_FILE3 = 'data/pos_adjectives_list.pkl'
DATA_FILE4 = 'data/neu_adjectives_list.pkl'
DATA_FILE5 = 'data/neg_adjectives_list.pkl'

with open(DATA_FILE1, 'rb') as f:
    reviews = pickle.load(f)
    
with open(DATA_FILE2, 'rb') as f:
    labels = pickle.load(f)
   
entity_names = ['X', 'Y', 'Z']

In [3]:
from babble.babbler import BabbleStream

babbler = BabbleStream(reviews,labels,balanced=True, shuffled=True, seed=456)

Grammar construction complete.


In [4]:
from babble.utils import display_candidate

candidate = babbler.next()
print(candidate)
print(candidate.text)
print(candidate.entity_positions)

RelationMention(doc_id=917: entities=("bar"(13:16), "water"(33:38), "place"(71:76))
what kind of bar won't serve you water it 120 degrees outside.... this place should get shutdown.... i spent $100 and they won't give me water to sober up
[(13, 16), (33, 38), (71, 76)]


In [4]:
import re

def get_words_before(quantity,sentence,entity):
    sentence = re.sub(r'[^\w\s]','',sentence)
    words = sentence.split()
    if entity in words:
        index = words.index(entity)
        before = index - min(index, quantity)
        return ' '.join(map(str, words[before:index])) 

In [5]:
import re

def get_words_after(quantity,sentence,entity):
    sentence = re.sub(r'[^\w\s]','',sentence)
    words = sentence.split()
    if entity in words:
        index = words.index(entity) +1
        after = index + min(index, quantity)
        return ' '.join(map(str, words[index:after]))

In [6]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()

pos_adjectives_list=[]
neu_adjectives_list=[]
neg_adjectives_list=[]

def check_sentiment_adjective(adjective):
    if (sid.polarity_scores(adjective)['compound']) >= 0.1:
        pos_adjectives_list.append(adjective)
        return 'positive'
    elif (sid.polarity_scores(adjective)['compound']) <= -0.1:
        neg_adjectives_list.append(adjective)
        return 'negative'
    else:
        neu_adjectives_list.append(adjective)
        return 'neutral' 

In [7]:
def check_adjectives_before_pos(sentence,entity):
    words = get_words_before(2,sentence,entity)
    if words == None or len(words.split(" ")) == 0:
        return None
    else:
        spacy_nlp = spacy.load('en_core_web_sm')
        doc = spacy_nlp(words)
        for token in doc:
            if token.pos_ == 'ADJ':
                return token.text
                break

In [8]:
def check_adjectives_after_verb(sentence,entity):
    words = get_words_after(3,sentence,entity)
    if words == None or len(words.split(" ")) < 3:
        return None
    else:
        spacy_nlp = spacy.load('en_core_web_sm')
        doc = spacy_nlp(words)
        if doc[0].pos_ == 'VERB' and (doc[1].pos_ == 'ADJ' or doc[2].pos_ == 'ADJ'):
            return doc[1].text

In [9]:
import spacy

spacy_stopwords = spacy.lang.en.stop_words.STOP_WORDS

print('spaCy Version: %s' % (spacy.__version__))
spacy_nlp = spacy.load('en_core_web_sm')

spaCy Version: 2.1.8


In [10]:
from babble import Explanation

explanations = []
index = 1


for candidate,label in progressbar.progressbar(zip(reviews[0],labels[0])):
#    print(candidate.text)
    for entity,name in zip(candidate.entities,entity_names):
#        print(entity.entity)
        adjective = check_adjectives_after_verb(candidate.text,entity.entity)
        if adjective != None:
            sentiment_value = check_sentiment_adjective(adjective)
            condition='A ' + sentiment_value + ' word is within 3 words to the right of word ' + name
        else:
            adjective = check_adjectives_before_pos(candidate.text,entity.entity)
            if adjective != None:
                sentiment_value = check_sentiment_adjective(adjective)
                condition='A ' + sentiment_value + ' word is within 2 words to the left of word ' + name
        if adjective != None:
            explanation = Explanation(
            name='LF_' + str(index),
            label=label,
            condition=condition,
            candidate=candidate,
            )
            explanations.append(explanation)
            index = index + 1
#            print(str(explanation).upper())

| |                     #                           | 436 Elapsed Time: 0:06:29


In [12]:
for explanation in explanations:
    print(explanation.candidate)

RelationMention(doc_id=1: entities=("bill"(6:10), "service"(29:36), "avoid"(172:177))
RelationMention(doc_id=1: entities=("bill"(6:10), "service"(29:36), "avoid"(172:177))
RelationMention(doc_id=3: entities=("place"(6:11), "staff"(13:18), "help"(71:75))
RelationMention(doc_id=3: entities=("place"(6:11), "staff"(13:18), "help"(71:75))
RelationMention(doc_id=6: entities=("ice"(88:91), "cream"(92:97), "food"(110:114))
RelationMention(doc_id=7: entities=("credit"(19:25), "lake"(106:110), "place"(149:154))
RelationMention(doc_id=8: entities=("hour"(27:31), "food"(56:60), "bread"(107:112))
RelationMention(doc_id=8: entities=("hour"(27:31), "food"(56:60), "bread"(107:112))
RelationMention(doc_id=9: entities=("experience"(3:13), "place"(44:49), "item"(110:114))
RelationMention(doc_id=10: entities=("credit"(54:60), "meal"(80:84), "reason"(102:108))
RelationMention(doc_id=12: entities=("visit"(58:63), "vegas"(90:95), "place"(121:126))
RelationMention(doc_id=12: entities=("visit"(58:63), "vegas"(

In [14]:
print(explanations)

[Explanation(LF_1: 1, "A neutral word is within 2 words to the left of word X"), Explanation(LF_2: 1, "A negative word is within 2 words to the left of word Y"), Explanation(LF_3: 1, "A negative word is within 2 words to the left of word X"), Explanation(LF_4: 1, "A neutral word is within 3 words to the right of word Y"), Explanation(LF_5: 1, "A neutral word is within 3 words to the right of word X"), Explanation(LF_6: 1, "A neutral word is within 2 words to the left of word Z"), Explanation(LF_7: 1, "A positive word is within 2 words to the left of word X"), Explanation(LF_8: 1, "A neutral word is within 3 words to the right of word Y"), Explanation(LF_9: 1, "A neutral word is within 3 words to the right of word Y"), Explanation(LF_10: 1, "A neutral word is within 2 words to the left of word Y"), Explanation(LF_11: 1, "A neutral word is within 2 words to the left of word X"), Explanation(LF_12: 1, "A neutral word is within 2 words to the left of word Y"), Explanation(LF_13: 1, "A neut

In [11]:
pos_adjectives_list = list(dict.fromkeys(pos_adjectives_list))
neu_adjectives_list = list(dict.fromkeys(neu_adjectives_list))
neg_adjectives_list = list(dict.fromkeys(neg_adjectives_list))

In [12]:
with open(DATA_FILE3, 'wb') as f:
    pickle.dump(pos_adjectives_list,f)
    
with open(DATA_FILE4, 'wb') as f:
    pickle.dump(neu_adjectives_list,f)

with open(DATA_FILE5, 'wb') as f:
    pickle.dump(neg_adjectives_list,f)

In [13]:
from babble.utils import ExplanationIO

FILE = "data/my_explanations.tsv"
exp_io = ExplanationIO()
exp_io.write(explanations, FILE)
explanations = exp_io.read(FILE)

Wrote 482 explanations to data/my_explanations.tsv
Read 482 explanations from data/my_explanations.tsv
