# Spacy demo

In [5]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [6]:
doc = nlp(u'Microsoft is trying to buy France based startup at $7 Million')
for token in doc:
    print(token.text)

Microsoft
is
trying
to
buy
France
based
startup
at
$
7
Million


In [7]:
for token in doc:
    print(token, token.pos_)

Microsoft PROPN
is AUX
trying VERB
to PART
buy VERB
France PROPN
based VERB
startup NOUN
at ADP
$ SYM
7 NUM
Million NUM


## Chunking

In [8]:
doc4 = nlp("tesla is a automobile based endorsed with high tech work for implimenting the electric cars")
for chunks in doc4.noun_chunks:
    print(chunks)

tesla
a automobile
high tech work
the electric cars


In [9]:
from spacy import displacy
doc = nlp(u'Tesla to build solar electric startup in gujrat for $70 million')
displacy.render(doc, style='dep', jupyter=True, options = {'distance':100})

In [22]:
from spacy.matcher import Matcher

matcher = Matcher(nlp.vocab)
pattern_1 = [{'LOWER': 'solarpower'}] # ----> SolarPower
pattern_2 = [{'LOWER': 'solar'}, {'IS_PUNCT': True}, {'LOWER': 'power'}] # ---> Solar-Power
pattern_3 = [{'LOWER': 'solar'}, {'LOWER': 'power'}] # ---> Solar Power

matcher.add('SolarPower',[pattern_1, pattern_2, pattern_3])

text = u'''Solar Power is the conversion of energy from sunlight into electricity, 
either directly using photovoltaics (PV), indirectly using concentrated SolarPower, 
or a combination. Concentrated Solar-Power systems use lenses or mirrors and solar 
tracking systems to focus a large area of sunlight into a small beam.
'''
doc = nlp(text)
found_matches = matcher(doc)
print(found_matches)


[(8656102463236116519, 0, 2), (8656102463236116519, 24, 25), (8656102463236116519, 32, 35)]


In [23]:
i = 0
for token in doc:
    print(i, token.text)
    i+=1

0 Solar
1 Power
2 is
3 the
4 conversion
5 of
6 energy
7 from
8 sunlight
9 into
10 electricity
11 ,
12 

13 either
14 directly
15 using
16 photovoltaics
17 (
18 PV
19 )
20 ,
21 indirectly
22 using
23 concentrated
24 SolarPower
25 ,
26 

27 or
28 a
29 combination
30 .
31 Concentrated
32 Solar
33 -
34 Power
35 systems
36 use
37 lenses
38 or
39 mirrors
40 and
41 solar
42 

43 tracking
44 systems
45 to
46 focus
47 a
48 large
49 area
50 of
51 sunlight
52 into
53 a
54 small
55 beam
56 .
57 



In [24]:
for match_id, start, end in found_matches:
    string_id = nlp.vocab.strings[match_id]
    span = doc[start:end]
    print(match_id, string_id, start, end, span.text)

8656102463236116519 SolarPower 0 2 Solar Power
8656102463236116519 SolarPower 24 25 SolarPower
8656102463236116519 SolarPower 32 35 Solar-Power


In [12]:
doc = nlp(u'The quick brown fox, snatch the piece of cube from mouth of black crow')
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

The        DET        DT         determiner
quick      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
brown      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
fox        NOUN       NN         noun, singular or mass
,          PUNCT      ,          punctuation mark, comma
snatch     VERB       VB         verb, base form
the        DET        DT         determiner
piece      NOUN       NN         noun, singular or mass
of         ADP        IN         conjunction, subordinating or preposition
cube       NOUN       NN         noun, singular or mass
from       ADP        IN         conjunction, subordinating or preposition
mouth      NOUN       NN         noun, singular or mass
of         ADP        IN         conjunction, subordinating or preposition
black      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
crow       NOUN       NN         noun, singular or mass


In [13]:
pos_counts = doc.count_by(spacy.attrs.POS)
print(pos_counts)
print(doc.vocab[92].text) #check which POS

{90: 2, 84: 3, 92: 5, 97: 1, 100: 1, 85: 3}
NOUN


In [26]:
options = {'distance':110,'compact':'True','color':'#f7f9fa','bg':'#327fa8','font':'arial'}
displacy.render(doc, style='dep', jupyter=True, options=options)

In [15]:
doc3 = nlp(u"Ambani good to go at Gujrat to start a agro based industry in jio Mart for $70 million")
for entity in doc3.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print("n")

Ambani
PERSON
People, including fictional
n
jio Mart
ORG
Companies, agencies, institutions, etc.
n
$70 million
MONEY
Monetary values, including unit
n


In [16]:
from spacy import displacy
displacy.render(doc3, style = 'ent', jupyter = True)