In [1]:
import spacy

In [3]:
nlp = spacy.blank("en")
text = "Mr. Anubhav Gupta loves coding. He did a lots of DSA problem solving in this year 2025."
doc = nlp(text)

for token in doc:
    print(token)

Mr.
Anubhav
Gupta
loves
coding
.
He
did
a
lots
of
DSA
problem
solving
in
this
year
2025
.


In [4]:
nlp.pipe_names

[]

In [5]:
nlp = spacy.load("en_core_web_sm")

In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [7]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x202e88e6cf0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x202e88e6210>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x202e7b26880>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x202e7acf950>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x202e808ba10>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x202e7b269d0>)]

In [8]:
text = "Mr. Anubhav Gupta loves coding. He did a lots of DSA problem solving in this year 2025."
doc = nlp(text)

for token in doc:
    print(token," | ",token.pos_," | ",token.lemma_)

Mr.  |  PROPN  |  Mr.
Anubhav  |  PROPN  |  Anubhav
Gupta  |  PROPN  |  Gupta
loves  |  VERB  |  love
coding  |  VERB  |  cod
.  |  PUNCT  |  .
He  |  PRON  |  he
did  |  VERB  |  do
a  |  DET  |  a
lots  |  NOUN  |  lot
of  |  ADP  |  of
DSA  |  PROPN  |  DSA
problem  |  NOUN  |  problem
solving  |  VERB  |  solve
in  |  ADP  |  in
this  |  DET  |  this
year  |  NOUN  |  year
2025  |  NUM  |  2025
.  |  PUNCT  |  .


In [13]:
doc = nlp("Tesla Inc is going to accurire Twitter Pvt Ltd for $5 billion")
for ent in doc.ents:
    print(ent.text," | ",ent.label_," | ",spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter Pvt Ltd  |  ORG  |  Companies, agencies, institutions, etc.
$5 billion  |  MONEY  |  Monetary values, including unit


In [14]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")
nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [15]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY


Exercise

In [16]:
nlp = spacy.load("en_core_web_sm")

In [32]:
text = ''' Ravi and Raju are the best friends from school days.They wanted to go for a world tour and 
visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Mohan to take part of this world tour.
They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!
'''

doc = nlp(text)

count = 0
nouns = []
for token in doc:
    if token.pos_ == 'PROPN':
        count+=1
        nouns.append(token)

print("Proper Nouns: " , nouns)
print("Count: ", len(nouns))

Proper Nouns:  [Ravi, Raju, Paris, London, Dubai, Rome, Mohan, Hyderabad]
Count:  8


In [33]:
text = '''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in 
India are Infosys, Reliance, HDFC Bank, Hindustan Unilever and Bharti Airtel'''


doc = nlp(text)


companies = []
for ent in doc.ents:
    if ent.label_ == 'ORG':
        count+=1
        companies.append(ent)

print("Proper Nouns: " , companies)
print("Count: ", len(companies))


Proper Nouns:  [Tesla, Walmart, Amazon, Microsoft, Google, Infosys, Reliance, HDFC Bank, Hindustan Unilever, Bharti Airtel]
Count:  10
