In [1]:
import spacy

from spacy import displacy
spacy.prefer_gpu()
nlp = spacy.load("en_core_web_sm")

In [2]:
text = "Apple is looking at buying U.K. startup for $1 billion"
doc = nlp(text)

# Print the named entities
for ent in doc.ents:
    print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


In [3]:
for token in doc:
    print(token.text, token.pos, token.pos_, token.dep_)

Apple 96 PROPN nsubj
is 87 AUX aux
looking 100 VERB ROOT
at 85 ADP prep
buying 100 VERB pcomp
U.K. 96 PROPN nsubj
startup 100 VERB ccomp
for 85 ADP prep
$ 99 SYM quantmod
1 93 NUM compound
billion 93 NUM pobj


In [4]:
print(doc)

Apple is looking at buying U.K. startup for $1 billion


In [5]:
print(spacy.explain('PROPN'))
print(spacy.explain('AUX'))


proper noun
auxiliary


In [6]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

life_quote = doc3[16:30]

print(life_quote)
print()
print(type(life_quote))

"Life is what happens to us while we are making other plans"

<class 'spacy.tokens.span.Span'>


In [7]:
doc4 = nlp('This is the first sentence. This is another sentence. This is the last sentence.')
for sentence in doc4.sents:
    print(sentence)

This is the first sentence.
This is another sentence.
This is the last sentence.


In [8]:
print(f"word: {doc4[6]}, start a sentence: {doc4[6].is_sent_start}")
print(f"word: {doc4[8]}, start a sentence: {doc4[8].is_sent_start}")

word: This, start a sentence: True
word: another, start a sentence: False


In [9]:
mystring = '"We\'re moving to L.A.!"'

# Print the string
print(mystring)
print()

# Process the string with spaCy
doc = nlp(mystring)

# Tokenize and print each token
for token in doc:
    print(token.text)

"We're moving to L.A.!"

"
We
're
moving
to
L.A.
!
"


In [10]:
# Input text for NLP processing
doc8 = nlp("Apple to build a Hong Kong factory for $6 million")

# Tokenization: print each token separated by '|'
for token in doc8:
    print(token.text, end=' | ')
print('\n')

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 



In [11]:
for entity in doc8.ents:
    print(entity)
    print(entity.label_)
    print(spacy.explain(entity.label_))
    print()

Apple
ORG
Companies, agencies, institutions, etc.

Hong Kong
GPE
Countries, cities, states

$6 million
MONEY
Monetary values, including unit



In [12]:
doc9 = nlp("The quick brown fox jumps over the lazy dog.")

print("Noun chunks in first sentence:")
for chunk in doc9.noun_chunks:
    print(chunk.text)

print()

# Example 2: sentence with technical nouns
doc10 = nlp("Autonomous cars shift insurance liability toward manufacturers.")

print("Noun chunks in second sentence:")
for chunk in doc10.noun_chunks:
    print(chunk.text)

Noun chunks in first sentence:
The quick brown fox
the lazy dog

Noun chunks in second sentence:
Autonomous cars
insurance liability
manufacturers


In [13]:
# SpaCy - Display

In [14]:
text = "The quick brown fox jumps over the lazy dog."

nlp = spacy.load("en_core_web_sm")

doc = nlp(text)

displacy.render(doc, style="dep", jupyter=True, options={"distance": 100})

In [15]:
# End of class 20/4