In [1]:
import spacy 
nlp = spacy.load("en_core_web_sm")

In [2]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x136614230>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x1383513d0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x174d3fd80>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1059c1ad0>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x174ffa090>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x174d3fe60>)]

In [4]:
# Parts of Speech (POS) and Lemma Example 

doc = nlp("I can't believe some cars cost over $15,000 dollars!")

for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_, " | ")

I  |  PRON  |  I  | 
ca  |  AUX  |  can  | 
n't  |  PART  |  not  | 
believe  |  VERB  |  believe  | 
some  |  DET  |  some  | 
cars  |  NOUN  |  car  | 
cost  |  VERB  |  cost  | 
over  |  ADP  |  over  | 
$  |  SYM  |  $  | 
15,000  |  NUM  |  15,000  | 
dollars  |  NOUN  |  dollar  | 
!  |  PUNCT  |  !  | 


In [10]:
# Named Entity Recognition (NER) Example

doc = nlp("Tesla Inc is going to acquire twitter for $45 billion in the US")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit
US  |  GPE  |  Countries, cities, states


In [11]:
# Visualize with Displacy 

from spacy import displacy 

displacy.render(doc, style="ent")

In [14]:
# Customizing Pipeline Compoenets

nlp2 = spacy.blank("en")  # Starts with nothing 
nlp2.add_pipe("ner", source=nlp)
nlp2.pipe_names

['ner']

In [21]:
# Exercise 1

# Get all the proper nouns from a given text in a list and also count how many of them.
# Proper Noun means a noun that names a particular person, place, or thing.

text = '''Ravi and Raju are the best friends from school days.They wanted to go for a world tour and 
visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Mohan to take part of this world tour.
They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!
'''

import spacy
nlp = spacy.load("en_core_web_sm") 

doc2 = nlp(text)
counter = 0
for token in doc2:
    if token.pos_ in ["PROPN"]:
        counter += 1
        print(token, " | ", token.pos_)
print("Number of Proper Nouns Found: ", counter)

Raju  |  PROPN
Paris  |  PROPN
London  |  PROPN
Dubai  |  PROPN
Rome  |  PROPN
Mohan  |  PROPN
Hyderabad  |  PROPN
Number of Proper Nouns Found:  7


In [30]:
# Exercise 2 
# Get all companies names from a given text and also the count of them.

text2 = '''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in 
India are Infosys, Reliance, HDFC Bank, Hindustan Unilever and Bharti Airtel'''

doc3 = nlp(text2)
counter = 0 
for ent in doc3.ents:
    if ent.label_ == "ORG":
        counter +=1
        print(ent, ent.label_)
print("Number of Orgs Found:", counter)

Tesla ORG
Walmart ORG
Amazon ORG
Microsoft ORG
Google ORG
Infosys ORG
Reliance ORG
HDFC Bank ORG
Hindustan Unilever ORG
Bharti ORG
Number of Orgs Found: 10
