In [1]:
# Import spaCy and load the language library
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
# Create a string that includes opening and closing quotation marks
mystring = '"We\'re moving to L.A.!"'
print(mystring)

"We're moving to L.A.!"


In [3]:
# Create a Doc object and explore tokens
doc = nlp(mystring)

for token in doc:
    print(token.text, end=' | ')

" | We | 're | moving | to | L.A. | ! | " | 

In [4]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")

for t in doc2:
    print(t, t.pos_, t.dep_)

We PRON nsubj
're AUX ROOT
here ADV advmod
to PART aux
help VERB advcl
! PUNCT punct
Send VERB ROOT
snail NOUN compound
- PUNCT punct
mail NOUN dobj
, PUNCT punct
email NOUN compound
support@oursite.com X dobj
or CCONJ cc
visit VERB conj
us PRON dobj
at ADP prep
http://www.oursite.com X pobj
! PUNCT punct


In [5]:
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')

for t in doc3:
    print(t, t.pos_, t.dep_)

A DET det
5 NUM nummod
km NOUN compound
NYC PROPN compound
cab NOUN compound
ride NOUN nsubj
costs VERB ROOT
$ SYM nmod
10.30 NUM dobj


In [6]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

for t in doc4:
    print(t, t.dep_, t.pos_)

Let ROOT VERB
's nsubj PRON
visit ccomp VERB
St. compound PROPN
Louis dobj PROPN
in prep ADP
the det DET
U.S. pobj PROPN
next amod ADJ
year npadvmod NOUN
. punct PUNCT


In [8]:
len(doc4), len(doc3), len(doc), len(doc2)

(11, 9, 8, 19)

In [9]:
len(doc.vocab)

794

In [10]:
len(doc4.vocab)

794

In [11]:
doc5 = nlp(u'It is better to give than to receive.')

# Retrieve the third token:
doc5[2]

better

In [13]:
doc5[2:5]

better to give

In [14]:
doc5[-4:]

than to receive.

In [15]:
doc6 = nlp(u'My dinner was horrible.')
doc7 = nlp(u'Your dinner was delicious.')

In [16]:
doc6[3] = doc7[3]

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

# Named Entities

In [24]:
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')

for token in doc8:
    print(token.text, end=' | ')

print('\n----')

for ent in doc8.ents:
    print(ent.text+' \n' +ent.label_+' \n '+str(spacy.explain(ent.label_)))

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 
----
Apple 
ORG 
 Companies, agencies, institutions, etc.
Hong Kong 
GPE 
 Countries, cities, states
$6 million 
MONEY 
 Monetary values, including unit


In [22]:
for ent in doc8.ents:
    print(ent)
    print(ent.label_)
    print("\n")

Apple
ORG


Hong Kong
GPE


$6 million
MONEY




In [19]:
dir(doc8)

['_',
 '__bytes__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__pyx_vtable__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 '_bulk_merge',
 '_context',
 '_get_array_attrs',
 '_realloc',
 '_vector',
 '_vector_norm',
 'cats',
 'char_span',
 'copy',
 'count_by',
 'doc',
 'ents',
 'extend_tensor',
 'from_array',
 'from_bytes',
 'from_dict',
 'from_disk',
 'from_docs',
 'from_json',
 'get_extension',
 'get_lca_matrix',
 'has_annotation',
 'has_extension',
 'has_unknown_spaces',
 'has_vector',
 'is_nered',
 'is_parsed',
 'is_sentenced',
 'is_tagged',
 'lang',
 'lang_',
 'mem',
 'noun_chunks',
 'noun_chunks_iterator',
 'remove_extension',
 'retokenize',
 'sentiment'

In [20]:
len(doc8.ents)

3

# Noun Chunks

In [23]:
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

for chunk in doc9.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [25]:
doc10 = nlp(u"Red cars do not carry higher insurance rates.")

for chunk in doc10.noun_chunks:
    print(chunk.text)

Red cars
higher insurance rates


In [26]:
doc11 = nlp(u"He was a one-eyed, one-horned, flying, purple people-eater.")

for chunk in doc11.noun_chunks:
    print(chunk.text)

He
a one-eyed, one-horned, flying, purple people-eater


# Built-in Visualizers

In [27]:
from spacy import displacy

doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 110})

In [28]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')
displacy.render(doc, style='ent', jupyter=True)

# Creating Visualizations Outside of Jupyter

In [1]:
doc = nlp(u'This is a sentence.')
displacy.serve(doc, style='dep', port=5050)

NameError: name 'nlp' is not defined