In [7]:
import spacy
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [1]:
patterns = {'greet': ['hello', 'hi', 'hey'], 'goodbye': ['bye', 'farewell'], 'thankyou': ['thank', 'thx']}
for intent, key in patterns.items():
    print(intent, " : ", key)

greet  :  ['hello', 'hi', 'hey']
goodbye  :  ['bye', 'farewell']
thankyou  :  ['thank', 'thx']


In [2]:
nlp = spacy.load('en')

In [8]:
sentences = ['Hey', 'Hi', 'How are you']
# Calculate the length of sentences
n_sentences = len(sentences)
print('n_sentences: ', n_sentences)
# Calculate the dimensionality of nlp
embedding_dim = nlp.vocab.vectors_length
print('embedding_dim: ', embedding_dim)
# Initialize the array with zeros: X
X = np.zeros((n_sentences, embedding_dim))

# Iterate over the sentences
for idx, sentence in enumerate(sentences):
    # Pass each each sentence to the nlp object to create a document
    doc = nlp(sentence)
    # Save the document's .vector attribute to the corresponding row in X
    X[idx, :] = doc.vector

n_sentences:  3
embedding_dim:  300


In [31]:
include_entities = ['DATE', 'ORG', 'PERSON']
ents = dict.fromkeys(include_entities)
print(ents)
doc = nlp("My friend Marry is working in Google since 2001")
for ent in doc.ents:
    print(ent.text, ent.label_)
    if ent.label_ in include_entities:
        ents[ent.label_] = ent.text
print(ents)    
    
doc = nlp('a flight to Shanghai from Singapore')
print(doc[3], ' : ', doc[5])
shanghai, singapore = doc[3], doc[5]
print(list(shanghai.ancestors))
print(list(singapore.ancestors))


doc = nlp("let's see that jacket in red and some blue pajama")
items = [doc[4], doc[10]]  # [jacket, jeans]

colors = [doc[6], doc[9]]  # [red, blue]
for color in colors:
    for tok in color.ancestors:
        print(color, " : ", tok)
        if tok in items:
            print("color {} belongs to item {}".format(color, tok))
            break

{'DATE': None, 'ORG': None, 'PERSON': None}
Marry PERSON
Google ORG
2001 DATE
{'DATE': '2001', 'ORG': 'Google', 'PERSON': 'Marry'}
Shanghai  :  Singapore
[to, flight]
[from, flight]
red  :  in
red  :  jacket
color red belongs to item jacket
blue  :  pajama
color blue belongs to item pajama


In [32]:
doc = nlp('not sushi, maybe pizza?')
indices = [1, 4]
ents, negated_ents = [], []
start = 0
for i in indices:
    phrase = "{}".format(doc[start:i])
    if "not" in phrase or "n't" in phrase:
        negated_ents.append(doc[i])
    else:
        ents.append(doc[i])
    start = i
    
print(negated_ents)
print(ents)

[sushi]
[pizza]


In [None]:
test_message = """
i would like to find a flight from charlotte
to las vegas that makes a stop in st. louis"""

test_x = nlp(test_message).vector
scores = [cosine_similarity(X[i,:], test_x)  for i in range(len(sentences_train) ]

labels_train[np.argmax(scores)]