In [1]:
import spacy

nlp = spacy.load('en_core_web_sm')

In [2]:
my_string = '"We\'re moving to L.A.!"'

In [3]:
my_string

'"We\'re moving to L.A.!"'

In [4]:
print(my_string)

"We're moving to L.A.!"


In [5]:
doc = nlp(my_string)

In [6]:
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [7]:
doc2 = nlp(u"We are here to help! Send snil-mail, email support@outsite.com or visit us at http://www.outsite.com!")

In [8]:
for t in doc2:
    print(t)

We
are
here
to
help
!
Send
snil
-
mail
,
email
support@outsite.com
or
visit
us
at
http://www.outsite.com
!


In [9]:
doc3 = nlp(u"A 5km NYC cab ride costs $10.30")

In [10]:
for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [11]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

In [12]:
for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [13]:
len(doc4)

11

### Vocab entry:

Vocab objects contains a fool library of items so off a doc object you can call the vocab.

In [14]:
doc4.vocab

<spacy.vocab.Vocab at 0x13af3f698b8>

In [15]:
len(doc.vocab)

523

In [16]:
doc5 = nlp(u"It is better to give than to receive.")

In [17]:
doc5[0]      # Single token

It

In [18]:
doc5[0:5]     # slice of token

It is better to give

- Tokens cannot be reassigned.

- Although doc obj can be considered lists of tokens. <b>They do not support item reassignment.</b>

In [19]:
doc5[0] = 'test'

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

## Named Entities

- Now spacy can actually go a step beyond simple tokens it can actually understand named enities.

- Named entities add another layer of context. The language model that you load in the very top recognies that certain words or organization names while others are locations and still other combinations relate to things like money or dates named.

- Named entities are accessible through the ENTS property of a doc object.

In [20]:
doc6 = nlp(u"Apple to build a Hong Kong factory of $6 Million")

In [21]:
for token in doc6:
    print(token.text, end=' | ')

Apple | to | build | a | Hong | Kong | factory | of | $ | 6 | Million | 

In [22]:
for entity in doc6.ents: 
    print(entity)

Apple
Hong Kong
$6 Million


`These all are named entities`

Apple 

Hong Kong

$6 Million

In [23]:
for entity in doc6.ents: 
    print(entity,"<---",entity.label_,"<====", str(spacy.explain(entity.label_)))

Apple <--- ORG <==== Companies, agencies, institutions, etc.
Hong Kong <--- GPE <==== Countries, cities, states
$6 Million <--- MONEY <==== Monetary values, including unit


## Noun Chunks

In [24]:
doc7 = nlp(u"Autonomous cars shifts insurance liablitiy towards manufacturers.")

In [25]:
for chunk in doc7.noun_chunks:
    print(chunk)

Autonomous cars shifts insurance
manufacturers


# Visualization of the Tokens

In [26]:
from spacy import displacy

doc = nlp(u"Apple is going to built a U.K. factory for $6 million.")

displacy.render(doc, style='dep', jupyter=True, options={'distance':90})

In [27]:
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.")

displacy.render(doc, style='ent', jupyter=True)

In [28]:
doc = nlp(u"This is a sentence")

displacy.serve(doc, style='dep')




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [29]:
doc = nlp(u"In 2020 InfinityCodeX made profit of Rs.10 million.")

displacy.render(doc, style='ent', jupyter=True)