In [1]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [6]:
doc = nlp(u"Tesla is looking t buying U.S. startup for $6 million")

In [10]:
for token in doc:
    print(token.text,token.pos_,token.dep_)

Tesla PROPN nsubj
is AUX aux
looking VERB ROOT
t NOUN dobj
buying VERB acl
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [11]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x11415e030>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x11415deb0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x113d3a570>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1142a4a10>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x1142aa050>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x113d3a7a0>)]

In [12]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [13]:
doc2 = nlp(u"Tesla isn't looking into startups anymore.")

In [15]:
for token in doc2:
    print(token.text,token.pos_,token.dep_)

Tesla PROPN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [16]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [18]:
quote = doc3[16:30]
quote

"Life is what happens to us while we are making other plans"

In [19]:
type(quote)

spacy.tokens.span.Span

In [20]:
doc4 = nlp(u"This is first sentence. This is another sentence. This is last sentence.")
for sentence in doc4.sents:
    print(sentence)

This is first sentence.
This is another sentence.
This is last sentence.


In [23]:
doc4[5].is_sent_start

True

In [24]:
#new vid

In [25]:
import spacy 
nlp = spacy.load('en_core_web_sm')

In [26]:
mystring = '"We\'re moving to L.A.!"'
print(mystring)

"We're moving to L.A.!"


In [29]:
doc = nlp(mystring)
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [30]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")
for token in doc2:
    print(token.text)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com
!


In [31]:
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')

for token in doc3:
    print(token)

A
5
km
NYC
cab
ride
costs
$
10.30


In [32]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [33]:
len(doc4)

11

In [36]:
len(doc4.vocab)

794

In [39]:
doc5 = nlp(u"Better to give than receive")
for t in doc5:
    print(t)

Better
to
give
than
receive


In [41]:
doc5[0:3]

Better to give

In [45]:
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')
for t in doc8:
    print(t,end=" | ")

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 

In [50]:
for entity in doc8.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)),end="\n\n")

Apple
ORG
Companies, agencies, institutions, etc.

Hong Kong
GPE
Countries, cities, states

$6 million
MONEY
Monetary values, including unit



In [52]:
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

for chunk in doc9.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufacturers


In [56]:
from spacy import displacy

doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')

displacy.render(doc, style='dep', jupyter=True, options={'distance': 70})

In [57]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')
displacy.render(doc, style='ent', jupyter=True)

In [61]:
import nltk
from nltk.stem.porter import PorterStemmer

In [68]:
p_stemmer = PorterStemmer()

words = ['run','runner','ran','runs','easily','fairly','fairness']

for word in words:
    print(word + " -----> " + p_stemmer.stem(word))

run -----> run
runner -----> runner
ran -----> ran
runs -----> run
easily -----> easili
fairly -----> fairli
fairness -----> fair


In [64]:
from nltk.stem.snowball import SnowballStemmer

In [69]:
s_stemmer = SnowballStemmer(language='english')

for word in words:
    print(word + " -----> " + s_stemmer.stem(word))

run -----> run
runner -----> runner
ran -----> ran
runs -----> run
easily -----> easili
fairly -----> fair
fairness -----> fair


In [105]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [72]:
doc1 = nlp(u"I am a runner running in a race because I love to run since I ran today")
for token in doc1:
    print(token.text,'\t',token.pos_,'\t',token.lemma,'\t',token.lemma_)

I 	 PRON 	 4690420944186131903 	 I
am 	 AUX 	 10382539506755952630 	 be
a 	 DET 	 11901859001352538922 	 a
runner 	 NOUN 	 12640964157389618806 	 runner
running 	 VERB 	 12767647472892411841 	 run
in 	 ADP 	 3002984154512732771 	 in
a 	 DET 	 11901859001352538922 	 a
race 	 NOUN 	 8048469955494714898 	 race
because 	 SCONJ 	 16950148841647037698 	 because
I 	 PRON 	 4690420944186131903 	 I
love 	 VERB 	 3702023516439754181 	 love
to 	 PART 	 3791531372978436496 	 to
run 	 VERB 	 12767647472892411841 	 run
since 	 SCONJ 	 10066841407251338481 	 since
I 	 PRON 	 4690420944186131903 	 I
ran 	 VERB 	 12767647472892411841 	 run
today 	 NOUN 	 11042482332948150395 	 today


In [85]:
print(nlp.Defaults.stop_words)

{'can', 'once', 'back', 'put', 'around', 'yours', 'go', 'yourself', 'who', 'n‘t', 'she', 'most', 'now', 'everything', 'off', 'along', 'only', 'out', 'hence', 'their', 'nobody', 'within', 'towards', 'see', 'your', 'often', 'made', 'whose', 'eight', 'the', 'if', '‘ll', 'do', 'beforehand', 'without', 'amongst', 'must', 'why', 'until', '’ll', 'six', 'through', 'really', 'by', 'please', 'besides', 'amount', 'whereupon', 'thus', 'elsewhere', 'on', 'they', 'am', 'next', 'not', 'how', 'btw', 'neither', 'was', 'whereafter', 'up', 'itself', 'move', 'a', 'during', 'less', 'you', 'everyone', 'serious', 'since', 'hereafter', 'against', 'another', 'ourselves', 'whereby', 'somewhere', 'yet', 'otherwise', 'than', 'somehow', 'every', 'seems', 'whereas', 'what', 'three', 'could', 'keep', 'where', 'more', 'else', 'everywhere', 'many', 'nor', 'four', 'perhaps', '‘d', 'because', 'front', "n't", 'last', 'wherever', 'full', 'beside', 'did', 'whole', 'name', 'into', 'though', 'further', 'are', 'our', 'for', '

In [86]:
len(nlp.Defaults.stop_words)

326

In [87]:
nlp.vocab['is'].is_stop

True

In [91]:
nlp.Defaults.stop_words.add('btw')

In [89]:
nlp.vocab['btw'].is_stop = True

In [92]:
len(nlp.Defaults.stop_words)

326

In [93]:
nlp.vocab['btw'].is_stop

True

In [95]:
nlp.Defaults.stop_words.remove('she')
len(nlp.Defaults.stop_words)

325

In [108]:
#SolarPower
pattern1 = [{'LOWER':'solarpower'}]
#Solar-Power
pattern2 = [{'LOWER':'solar'},{'IS_PUNCT':True},{'LOWER':'power'}]
#Solar Power
pattern3 = [{'LOWER':"solar"},{'LOWER':'power'}]

In [114]:
from spacy.matcher import Matcher
import spacy

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

# Initialize the Matcher
matcher = Matcher(nlp.vocab)

#SolarPower
pattern1 = [{'LOWER':'solarpower'}]
#Solar-Power
pattern2 = [{'LOWER':'solar'},{'IS_PUNCT':True},{'LOWER':'power'}]
#Solar Power
pattern3 = [{'LOWER':"solar"},{'LOWER':'power'}]

# Add patterns to the Matcher
matcher.add('SolarPower', [pattern1, pattern2, pattern3])

# Now use the matcher to match patterns in your text


In [115]:
doc = nlp(u'The Solar Power industry continues to grow as demand \
for solarpower increases. Solar-power cars are gaining popularity.')

In [117]:
found_matches = matcher(doc)
print(found_matches)

[(8656102463236116519, 1, 3), (8656102463236116519, 10, 11), (8656102463236116519, 13, 16)]


In [118]:
matcher.remove('SolarPower')

In [120]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

In [124]:
with open ('./TextFiles/reaganomics.txt', encoding='latin1') as f:
    doc3 = nlp(f.read())

In [126]:
phrase_list = ['voodoo economics', 'supply-side economics', 'trickle-down economics', 'free-market economics']

phrase_patterns = [nlp(text) for text in phrase_list]

matcher.add('VoodooEconomics', phrase_patterns)

In [128]:
found_matches = matcher(doc3)
found_matches

[(3473369816841043438, 41, 45),
 (3473369816841043438, 49, 53),
 (3473369816841043438, 54, 56),
 (3473369816841043438, 61, 65),
 (3473369816841043438, 673, 677),
 (3473369816841043438, 2986, 2990)]