In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [1]:
text = "Dr. Strange loves chicken pie as it only costs $2 per plate."

In [4]:
doc = nlp(text)

for token in doc:
    print(token, "==>", token.i,
          "is_alpha:", token.is_alpha,
          "is_punct:", token.is_punct,
          "like_num:", token.like_num,
          "is_currency:", token.is_currency        
    )

Dr. ==> 0 is_alpha: False is_punct: False like_num: False is_currency: False
Strange ==> 1 is_alpha: True is_punct: False like_num: False is_currency: False
loves ==> 2 is_alpha: True is_punct: False like_num: False is_currency: False
chicken ==> 3 is_alpha: True is_punct: False like_num: False is_currency: False
pie ==> 4 is_alpha: True is_punct: False like_num: False is_currency: False
as ==> 5 is_alpha: True is_punct: False like_num: False is_currency: False
it ==> 6 is_alpha: True is_punct: False like_num: False is_currency: False
only ==> 7 is_alpha: True is_punct: False like_num: False is_currency: False
costs ==> 8 is_alpha: True is_punct: False like_num: False is_currency: False
$ ==> 9 is_alpha: False is_punct: False like_num: False is_currency: True
2 ==> 10 is_alpha: False is_punct: False like_num: True is_currency: False
per ==> 11 is_alpha: True is_punct: False like_num: False is_currency: False
plate ==> 12 is_alpha: True is_punct: False like_num: False is_currency: False

In [8]:
#Customising and adding special case

from spacy.symbols import ORTH

nlp.tokenizer.add_special_case("gimme", [
                               {ORTH: "gim"},
                               {ORTH: "me"}
])

text2 = 'hey! gimme that $2!'

doc2 = nlp(text2)

words = [token.text for token in doc2]
words


['hey', '!', 'gim', 'me', 'that', '$', '2', '!']

In [9]:
#Blank pipeline and adding components

nlp2 = spacy.blank("en")

In [16]:
nlp2.add_pipe('sentencizer')

ValueError: [E007] 'sentencizer' already exists in pipeline. Existing names: ['sentencizer']

In [17]:
doc = nlp2(text)

for sentence in doc.sents:
    print(sentence)

Dr. Strange loves chicken pie as it only costs $2 per plate.


In [20]:
#Exploring the different pipelines of "en_core_web_sm"

text = "Captain America ate 100$ of samosa. Then he said 'I can do this all day'."


In [23]:
doc = nlp(text)

for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_)

Captain  |  PROPN  |  Captain
America  |  PROPN  |  America
ate  |  VERB  |  eat
100  |  NUM  |  100
$  |  NUM  |  $
of  |  ADP  |  of
samosa  |  PROPN  |  samosa
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
'  |  PUNCT  |  '
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
day  |  NOUN  |  day
'  |  PUNCT  |  '
.  |  PUNCT  |  .


In [24]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit
