In [22]:
import os
import urllib3
import spacy
import random

from elasticsearch import Elasticsearch

urllib3.disable_warnings()

ME=os.environ['BOT_NAME']

ELASTIC_USER = os.environ['ELASTIC_USER']
ELASTIC_URL = os.environ['ELASTIC_URL']
ELASTIC_KEY = os.environ.get('ELASTIC_KEY', None)
ELASTIC_INDEX = os.environ.get('ELASTIC_INDEX', 'persyn-bender-summaries-v0')

In [23]:
es = Elasticsearch([ELASTIC_URL], basic_auth=(ELASTIC_USER, ELASTIC_KEY), request_timeout=30)

In [59]:
entity = es.search( # pylint: disable=unexpected-keyword-arg
    index=ELASTIC_INDEX,
    query={
        "bool": {
            "must": [
                {"term": {"channel.keyword": "C03PC3C5Q3Y"}},
                {"match": {"summary": { "query": "shiny" }}}
            ]
        }
    },
    size=10
)['hits']['hits']

In [60]:
entity

[{'_index': 'persyn-bender-summaries-v0',
  '_type': '_doc',
  '_id': 'Dh0YCYIBGYbVpQ0o8JEd',
  '_score': 5.6587954,
  '_source': {'convo_id': 'WyLUAyNjsgfJqCcEDsrDbW',
   'summary': '- Bender is not sure why his metal ass is shiny, but it is just a coincidence that he polishes it so often.',
   'service': 'https://persyn.slack.com/',
   'channel': 'C03PC3C5Q3Y',
   '@timestamp': '2022-07-16T15:20:05.473310-07:00'}}]

In [20]:
convo = [f'''{h['_source']['speaker']}: {h['_source']['msg']}''' for h in history[::-1]]
# convo = [f"{h['_source']['msg']}" for h in history[::-1]]

In [21]:
convo[-1]

'Anna: Oh, this is great. This is me out of context I think?'

In [24]:
# Load English tokenizer, tagger, parser and NER
nlp = spacy.load("en_core_web_lg")
# nlp = spacy.load("en_core_web_trf")

# Find named entities, phrases and concepts
for text in convo[-8:]:
    dobj = None
    doc = nlp(text.replace('*',''))
    print(doc)
    for token in doc:
        if token.dep_ == 'dobj':
            dobj = token
            break
    if dobj:
        print("Intent:", token.head.text + token.text.capitalize())

    print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
    print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])
    for entity in set(doc.ents):
        print(entity.text, entity.label_)
    print('')

Anna: Let's change the subject.
Intent: changeSubject
Noun phrases: ['Anna', "'s", 'the subject']
Verbs: ['let', 'change']
Anna PERSON

Anna: hi
Noun phrases: ['Anna']
Verbs: []
Anna PERSON

Anna: Hi Anna there is a fursuiter here
Noun phrases: ['Anna', 'a fursuiter']
Verbs: ['be']
Anna PERSON
Anna PERSON

Anna: I really really really really really really really don't want to talk to any fursuiters
Noun phrases: ['Anna', 'I', 'any fursuiters']
Verbs: ['want', 'talk']
Anna PERSON

Rob: Why not? Fursuiters are a lovely people. So full of mischief.
Noun phrases: ['Rob', 'Fursuiters', 'a lovely people', 'mischief']
Verbs: []
Rob PERSON
Fursuiters ORG

Anna: I think you're going to want to come home.
Noun phrases: ['I', 'you']
Verbs: ['think', 'go', 'want', 'come']
Anna PERSON

Rob: <https://static.wikia.nocookie.net/emojimovie/images/3/31/Mel_meh.PNG>
Noun phrases: ['Rob: <https://static.wikia.nocookie.net/emojimovie/images/3/31/Mel_meh.PNG']
Verbs: []
Rob PERSON

Anna: Oh, this is great. 

In [25]:
j = doc.to_json()
print(doc)
print(' '.join([w['lemma'] for w in j['tokens']]))

Anna: Oh, this is great. This is me out of context I think?
Anna : oh , this be great . this be I out of context I think ?


In [26]:
[n for n in doc.noun_chunks]

[this, This, me, context, I]

In [27]:
for token in doc:
    if token.dep_ == 'dobj':
        dobj = token

In [28]:
print(dobj)

None


In [69]:
for token in doc:
    print(token, dobj.similarity(token))

AttributeError: 'NoneType' object has no attribute 'similarity'

In [71]:
for token in doc:
    print(token.dep_, token.text)

ROOT Rob
punct :
intj Alright
punct ,
ROOT Anna
punct .
nsubj That
aux should
ROOT do
dobj it
punct .
advmod Still
ROOT want
aux to
xcomp go
compound Christmas
dobj caroling
punct ?


In [82]:
doc = nlp(random.choice(convo))
print(doc)

Anna: I'm not going for comedy.


In [83]:
spacy.displacy.render(
    doc, 
    style="dep", 
    options={
        "compact":False,
        "fine_grained":True,
        "add_lemma":True,
        "collapse_punct":True,
        "collapse_phrases":True
    }
)

In [84]:
spacy.displacy.render(doc, style="ent")

In [92]:
for token in doc:
    print(f'{token}({token.dep_}):\t', spacy.explain(token.dep_))

Anna(npadvmod):	 noun phrase as adverbial modifier
:(punct):	 punctuation
I(nsubj):	 nominal subject
'm(aux):	 auxiliary
not(neg):	 negation modifier
going(ROOT):	 None
for(prep):	 prepositional modifier
comedy(pobj):	 object of preposition
.(punct):	 punctuation


In [35]:
z = list(doc)[0]

In [36]:
z, z.similarity(nlp("Anna"))

(Rob, 0.3539790079137116)

In [37]:
nlp = spacy.load("en_core_web_lg")

In [38]:
def get_dobj(doc):
    ''' Return the direct object, if any. '''
    for token in doc:
        if token.dep_ == 'dobj':
            return token
    return None

def simple_intent(text):
    doc = nlp(text)
    token = get_dobj(doc)
    if not token:
        return None
    return token.head.lemma_.lower() + token.text.capitalize()
    
def conj_intent(text):
    doc = nlp(text)
    token = get_dobj(doc)
    if not token:
        return None
    dobj = [token.text]
    conj = [t.text for t in token.conjuncts]
    return token.head.lemma_.lower(), dobj + conj

In [39]:
simple_intent("I want pizza")

'wantPizza'

In [40]:
simple_intent("I want pizza and your face")

'wantPizza'

In [348]:
conj_intent("I want pizza and your face")

('want', ['pizza', 'face'])

In [349]:
conj_intent("Anna wants pizza and your face")

('want', ['pizza', 'face'])

In [43]:
def match_intent(text):
    doc = nlp(text)
    token = get_dobj(doc)
    if not token:
        return None

    dobj = token
    tverb = token.head
    
    verbList = ['want', 'desire', 'need']
    if tverb.lemma_ in verbList:
        intentVerb = tverb
    else:
        if tverb.head.dep_ == 'ROOT':
            intentVerb = tverb.head
            
    objList = ['pizza', 'face']
    if dobj.text in objList:
        intentObj = dobj
    else:
        for child in dobj.children:
            if child.dep_ == 'prep':
                intentObj = list(child.children)[0]
                break
            elif child.dep_ == 'compound':
                intentObj = child
                break
                
    return intentVerb.lemma_.lower() + intentObj.text.capitalize()

In [44]:
simple_intent("I want to place an order for your face")

'placeOrder'

In [45]:
match_intent("I want to place an order for some pizza and your face")

'wantPizza'

In [46]:
def syn_intent(text):
    doc = nlp(text)
    token = get_dobj(doc)
    if not token:
        return None
    verb = token.head.lemma_
    dobj = token.text.lower()
    
    verbList = [('order','want','give','make'),('show','find')]
    verbSyns = [item for item in verbList if verb in item]
    
    dobjList = [('pizza','pie','dish'),('cola','soda')]
    dobjSyns = [item for item in dobjList if dobj in item]
    
    return verbSyns[0][0] + dobjSyns[0][0].capitalize()


In [47]:
syn_intent('give me my pie')

'orderPizza'

In [49]:
def sim_intent(text):
    doc = nlp(text)
    token = get_dobj(doc)
    if not token:
        print('nope')
        return None
    verb = token.head
    dobj = token
    
    verbList = [nlp('buy'), nlp('make'), nlp('show')]
    verbSyns = [item for item in verbList if verb.similarity(item) > 0.5]
    
    dobjList = [nlp('food'), nlp('beverage')]
    dobjSyns = [item for item in dobjList if dobj.similarity(item) > 0.6]
    
    return verbSyns[0][0].text + dobjSyns[0][0].text.capitalize()


In [50]:
sim_intent('i want to buy a soda')

'buyBeverage'

In [51]:
sim_intent('i want to see the drinks')

'makeFood'

In [52]:
nlp('show').similarity(nlp('see'))

0.5966469921671479

In [96]:
# https://www.analyticsvidhya.com/blog/2019/09/introduction-information-extraction-python-spacy/
import re 
import string 
import nltk 
import spacy 
import pandas as pd 
import numpy as np 
import math 
from tqdm import tqdm 

from spacy.matcher import Matcher 
from spacy.tokens import Span 
from spacy import displacy 

pd.set_option('display.max_colwidth', 200)

In [140]:
such_as = [{'DEP':'amod', 'OP':"?"}, # adjectival modifier
           {'POS':'NOUN'},
           {'LOWER': 'such'},
           {'LOWER': 'as'},
           {'POS': 'PROPN'}]

and_other = [{'DEP':'amod', 'OP':"?"}, 
           {'POS':'NOUN'}, 
           {'LOWER': 'and', 'OP':"?"}, 
           {'LOWER': 'or', 'OP':"?"}, 
           {'LOWER': 'other'}, 
           {'POS': 'NOUN'}] 

including = [{'DEP':'nummod','OP':"?"}, # numeric modifier 
           {'DEP':'amod','OP':"?"}, # adjectival modifier 
           {'POS':'NOUN'}, 
           {'IS_PUNCT': True}, 
           {'LOWER': 'including'}, 
           {'DEP':'nummod','OP':"?"}, 
           {'DEP':'amod','OP':"?"}, 
           {'POS':'NOUN'}] 
                  
especially = [{'DEP':'nummod','OP':"?"}, 
           {'DEP':'amod','OP':"?"}, 
           {'POS':'NOUN'}, 
           {'IS_PUNCT':True}, 
           {'LOWER': 'especially'}, 
           {'DEP':'nummod','OP':"?"}, 
           {'DEP':'amod','OP':"?"}, 
           {'POS':'NOUN'}] 

In [146]:
# Matcher class object 
matcher = Matcher(nlp.vocab) 
matcher.add("such_as", [such_as]) 
matcher.add("and_other", [and_other])
matcher.add("including", [including])
matcher.add("especially", [especially])

In [147]:
sentences = [
    "GDP in developing countries such as Vietnam will continue growing at a high rate.",
    "Here is how you can keep your car and other vehicles clean.",
    "Here is how you can keep your car or other vehicles clean.",
    "Eight people, including two children",
    "A healthy eating pattern includes fruits, especially apples."
]

for sent in sentences:
    doc = nlp(sent)

    matches = matcher(doc)
    if matches:
    #     print(matches)
        span = doc[matches[0][1]:matches[0][2]] 
        print(span.text)


developing countries such as Vietnam
car and other vehicles
car or other vehicles
Eight people, including two children
fruits, especially apples


In [149]:
text = "Tableau was recently acquired by Salesforce." 

# Plot the dependency graph 
doc = nlp(text) 
displacy.render(doc, style='dep')

In [208]:
def is_passive(doc):
    ''' Returns True if sentince is passive, else False '''
    for i,tok in enumerate(doc):
        if tok.dep_.endswith("subjpass"):
            return True
    return False

def subtree_matcher(doc):
    x = y = z = ''

    if is_passive(doc):
        for i,tok in enumerate(doc):
            if tok.dep_.endswith("subjpass") == True:
                z = tok
                y = tok.head

            if tok.dep_.endswith("obj") == True:
                x = tok

    else:
        for i,tok in enumerate(doc):
            if tok.dep_.endswith("subj") == True:
                x = tok
                y = tok.head

            if tok.dep_.endswith("obj") == True:
                z = tok

    return x, y, z

In [None]:
roberta_nlp = spacy.load("en_core_web_trf")

In [216]:
sentences = [
    "Tableau was recently acquired by Salesforce.",
    "Careem, a ride-hailing major in the middle east, was acquired by Uber.",
    "Salesforce recently acquired Tableau.",
    "Tableau was recently acquired by Salesforce.",
    "Rob was taken aback by the sheer simplicity of Anna's brain",
    "Rob was taken aback by the sheer simplicity of Anna",
    "I was taken aback by the sheer simplicity of it",
    "I was taken aback by the sheer simplicity",
    "Who knows?"
]

for sent in sentences:
    doc = roberta_nlp(sent)
    print(subtree_matcher(doc))
    displacy.render(doc, style='ent')

(Salesforce, acquired, Tableau)


(Uber, acquired, Careem)


(Salesforce, acquired, Tableau)


(Salesforce, acquired, Tableau)


(brain, taken, Rob)


(Anna, taken, Rob)


(it, taken, I)


(simplicity, taken, I)


(Who, knows, '')


In [180]:
displacy.render(nlp("I was taken aback by the sheer simplicity of it"))

In [181]:
displacy.render(nlp("I was taken aback by the sheer simplicity"))

In [187]:
def print_entities(pipeline, text):
    
    # Create a document 
    document = pipeline(text)
    
    # Entity text & label extraction
    for entity in document.ents:
        print(entity.text + '->', entity.label_)
        
        
def visualize_entities(pipeline, text):
    
    # Create a document 
    document = pipeline(text)
        
    # Show entities in pretty manner
    displacy.render(document, jupyter=True, style='ent')

In [188]:
short_text = """Amy Schneider, an engineering manager from Oakland, California, became the first woman and the fourth person on “Jeopardy!” to earn more than $1 million in winnings on Friday’s episode."""

long_text = """Good news for consumers, undoubtedly, and good news also for investors. Apple’s recent results, covering the three months to December 31 2016, saw the company’s chief financial officer Luca Maestri announce: ‘We returned nearly $15 billion to investors through share re-purchases and dividends during the quarter.’ The quarterly dividend itself was 57 cents a share, identical to the dividend for the previous three quarters and up on the 52 cents paid for each of the four quarters before that.
Business is brisk at Apple. On January 31, Tim Cook, Apple’s chief executive, said of the last three months of 2016: ‘We’re thrilled to report that our holiday quarter results generated Apple’s highest quarterly revenue ever, and broke multiple records along the way. We sold more iPhones than ever before and set all-time revenue records for iPhone, Services, Mac and Apple Watch"""


In [189]:
nlp_sm = spacy.load("en_core_web_sm")
nlp_lg = spacy.load("en_core_web_lg")
roberta_nlp = spacy.load("en_core_web_trf")

In [196]:
visualize_entities(nlp_sm, long_text)

In [197]:
visualize_entities(nlp_lg, long_text)

In [198]:
visualize_entities(roberta_nlp, long_text)