In [2]:
import spacy

In [3]:
sp = spacy.load('en_core_web_sm')

In [8]:
sentence = sp('Text messaging.Texting, is the act of composing and sending electronic messages.Typically consisting of alphabetic and numeric characters. Between two or more users of mobile devices, desktops/laptops, or other type of compatible computer.')

In [9]:
# word, pos tag and dependency
for word in sentence:
    print(word.text, word.pos_, word.dep_)

Text NOUN compound
messaging NOUN ROOT
. PUNCT punct
Texting NOUN nsubj
, PUNCT punct
is AUX ROOT
the DET det
act NOUN attr
of ADP prep
composing VERB pcomp
and CCONJ cc
sending VERB conj
electronic ADJ amod
messages NOUN dobj
. PUNCT punct
Typically ADV advmod
consisting VERB ROOT
of ADP prep
alphabetic ADJ amod
and CCONJ cc
numeric ADJ conj
characters NOUN pobj
. PUNCT punct
Between ADP ROOT
two NUM nummod
or CCONJ cc
more ADJ conj
users NOUN pobj
of ADP prep
mobile ADJ amod
devices NOUN pobj
, PUNCT punct
desktops NOUN nmod
/ SYM punct
laptops NOUN conj
, PUNCT punct
or CCONJ cc
other ADJ amod
type NOUN conj
of ADP prep
compatible ADJ amod
computer NOUN pobj
. PUNCT punct


In [11]:
# tokenized sentences
for sent in sentence.sents:
    print(sent)

Text messaging.
Texting, is the act of composing and sending electronic messages.
Typically consisting of alphabetic and numeric characters.
Between two or more users of mobile devices, desktops/laptops, or other type of compatible computer.


In [21]:
sentence = sp(u'Manchester United is looking for Ronaldo for $100 million per year contract')

In [22]:
# NER recognition
for entity in sentence.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Manchester United - PERSON - People, including fictional
Ronaldo - ORG - Companies, agencies, institutions, etc.
$100 million - MONEY - Monetary values, including unit


In [23]:
# nouns
for noun in sentence.noun_chunks:
    print(noun.text)

Manchester United
Ronaldo
year contract


In [25]:
# lemmatization
sentence1 = sp(u'compute computer computed computing')

for word in sentence1:
    print(word.text, word.lemma_)

compute compute
computer computer
computed compute
computing computing


In [28]:
spacy.explain(sentence1[2].tag_)

'verb, past tense'

In [30]:
# word post tag with explaination
for word in sentence1:
    print(f'{word.text:{5}} {word.pos_:{5}} {word.tag_:{5}} {spacy.explain(word.tag_)}')

compute PROPN NNP   noun, proper singular
computer NOUN  NN    noun, singular or mass
computed VERB  VBD   verb, past tense
computing NOUN  NN    noun, singular or mass


In [36]:
# google as verb
sen1 = sp(u'Can you google it?')
word = sen1[2]

print(f'{word.text:{5}} {word.pos_:{5}} {word.tag_:{5}} {spacy.explain(word.tag_)}')

google VERB  VB    verb, base form


In [38]:
# google as noun
sen2 = sp(u'Can you search it on google?')
word = sen2[5]

print(f'{word.text:{5}} {word.pos_:{5}} {word.tag_:{5}} {spacy.explain(word.tag_)}')

google PROPN NNP   noun, proper singular


In [39]:
# visualize part of speech
from spacy import displacy

sen = sp(u'AI is going to take over the world. Rubbish. Do you think elon is AI?')

displacy.render(sen, style='dep', jupyter=True, options={'distance':85})

In [48]:
sen = sp(u'Manchester united is going to buy Harry Kane. Ronaldo 100 million')

len([ent for ent in sen.ents if ent.label_ == 'PERSON'])

1

In [51]:
displacy.render(sen, style='ent', jupyter=True)