In [1]:
import spacy
import nltk

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
doc = nlp(u'I am learning how to build chatbots') #Creates a doc object

In [4]:
for token in doc:
    print(token.text, token.pos_) #prints the text and POS

I PRON
am AUX
learning VERB
how SCONJ
to PART
build VERB
chatbots NOUN


In [5]:
doc = nlp(u'Google release "Move Mirror" AI experiment that matches your pose from 80,000 images')

In [6]:
for token in doc:
    print((token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop))

('Google', 'Google', 'PROPN', 'NNP', 'compound', 'Xxxxx', True, False)
('release', 'release', 'NOUN', 'NN', 'ROOT', 'xxxx', True, False)
('"', '"', 'PUNCT', '``', 'punct', '"', False, False)
('Move', 'Move', 'PROPN', 'NNP', 'nmod', 'Xxxx', True, True)
('Mirror', 'Mirror', 'PROPN', 'NNP', 'nmod', 'Xxxxx', True, False)
('"', '"', 'PUNCT', "''", 'punct', '"', False, False)
('AI', 'AI', 'PROPN', 'NNP', 'compound', 'XX', True, False)
('experiment', 'experiment', 'NOUN', 'NN', 'appos', 'xxxx', True, False)
('that', 'that', 'PRON', 'WDT', 'nsubj', 'xxxx', True, True)
('matches', 'match', 'VERB', 'VBZ', 'relcl', 'xxxx', True, False)
('your', 'your', 'PRON', 'PRP$', 'poss', 'xxxx', True, True)
('pose', 'pose', 'NOUN', 'NN', 'dobj', 'xxxx', True, False)
('from', 'from', 'ADP', 'IN', 'prep', 'xxxx', True, True)
('80,000', '80,000', 'NUM', 'CD', 'nummod', 'dd,ddd', False, False)
('images', 'image', 'NOUN', 'NNS', 'pobj', 'xxxx', True, False)


In [7]:
doc=nlp(u'fastest')

In [8]:
for token in doc:
    print(token.lemma_)

fast


In [9]:
from nltk.stem.porter import *

In [10]:
from nltk.stem.snowball import SnowballStemmer

In [11]:
porter_stemmer = PorterStemmer()
snowball_stemmer = SnowballStemmer("english")

In [12]:
print(porter_stemmer.stem("fastest"))
print(snowball_stemmer.stem("fastest"))

fastest
fastest


In [13]:
my_string = u"Google has its headquarters in Mountain View, California having revenue amounted to 109.65 billion US dollars"

In [14]:
doc = nlp(my_string)

In [15]:
for ent in doc.ents:
    print(ent.text, ent.label)

Google 383
Mountain View 384
California 384
109.65 billion US dollars 394


In [16]:
my_string = u"Mark Zuckerberg born May 14, 1984 in New York is an American technology entrepreneur and philanthropist best known for co-founding and leading Facebook as its chairman and CEO."
doc = nlp(my_string)

In [17]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Mark Zuckerberg PERSON
May 14, 1984 DATE
New York GPE
American NORP


In [18]:
from spacy.lang.en.stop_words import STOP_WORDS

In [19]:
print(STOP_WORDS)

{'keep', 'about', '’ve', 'itself', 'by', 'through', 'a', 'formerly', 'their', 'as', 'five', 'beyond', 'without', 'anywhere', 'an', 'against', 'afterwards', 'next', 'whoever', 'would', 'quite', 'three', 'in', 'same', 'only', 'twenty', 'must', 'over', 'namely', 'seemed', 'now', 'thereby', 'each', 'seeming', 'nevertheless', 'fifty', 'along', 'his', 'onto', 'from', 'please', 'say', 'who', 'top', 'not', 'of', 'sometimes', 'hereupon', 'nobody', 'often', 'yourself', 'although', 'ourselves', 'whence', 'to', 'yet', 'seem', 'together', 'otherwise', 'used', 'another', 'various', 'even', 'you', 're', 'which', 'put', 'whereas', 'side', 'take', 'still', 'one', 'hereby', 'therein', 'call', 'everyone', 'whose', '’re', 'besides', 'amount', 'nor', 'make', 'where', 'else', 'most', 'it', 'should', 'latter', 'been', 'seems', 'ten', 'him', 'unless', 'until', 'such', 'wherein', 'myself', 'ours', 'six', 'because', '‘s', 'within', 'when', 'nothing', 'before', 'both', 'noone', "n't", 'someone', 'empty', 'alone'

In [20]:
doc = nlp(u'Book me a flight from Bangalore to Goa')

In [21]:
blr, goa = doc[5], doc[7]
list(blr.ancestors)


[from, flight, Book]

In [22]:
list(goa.ancestors)

[to, flight, Book]

In [23]:
doc[3].is_ancestor(doc[5])

True

In [24]:
doc = nlp(u'Book a table at the restaurant and the taxi to the hotel')

In [25]:
tasks = doc[2], doc[8] #(table, taxi)

In [26]:
tasks_target = doc[5], doc[11] #(restaurant, hotel)

In [27]:
for task in tasks_target:
    for tok in task.ancestors:
        if tok in tasks:
            print("Booking of {} belongs to {}".format(tok, task))
            break

Booking of table belongs to restaurant
Booking of taxi belongs to hotel


In [28]:
from spacy import displacy

In [148]:
string = "What are some places to visit in Berlin and stay in Lubeck"

In [149]:
doc = nlp(string)

In [150]:
places=[word.text for word in doc.ents if word.label == 384]
places

['Berlin', 'Lubeck']

In [151]:
indexes =[{places[i]:string.find(places[i]) for i in range(0, len(places))}]
indexes

[{'Berlin': 33, 'Lubeck': 52}]

In [152]:
mydict= indexes[0]
mydict

{'Berlin': 33, 'Lubeck': 52}

In [153]:
verbs = [token.text for token in doc if token.pos_ == "VERB"]
verbs

['visit', 'stay']

In [154]:
def get_location(places, string):
    return [{places[i]:string.find(places[i]) for i in range(0, len(places))}]

In [155]:
def get_lexicon_noun(doc):
    return [word.text for word in doc.ents if word.label == 384]

In [156]:
def get_lexicon_verb(doc):
    return [token.text for token in doc if token.pos_ == "VERB"]

In [160]:
def get(type, string, doc):
    if type.lower() == 'noun':
        return get_location(get_lexicon_noun(doc), string)[0]
    if type.lower() == 'verb':
        return get_location(get_lexicon_verb(doc), string)[0]

In [163]:
result = get('verb', string, doc)

In [164]:
result

{'visit': 24, 'stay': 44}