In [1]:
import spacy

In [2]:
nlp = spacy.blank("en")
## This command makes a blank nlp pipeline
## In the blank nlp pipeline we only have tokenizer
nlp.pipe_names

[]

In [3]:
newNlp = spacy.load("en_core_web_sm")
newNlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [4]:
newNlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x21282eedd60>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x21282f8cfa0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x212831f62e0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x21283401e00>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x2128340a3c0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x212831f6200>)]

In [6]:
doc = newNlp("Captain america ate 100$ of samosa. Then he said I can do this all day.")
for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_)
## pos_ tells us about the part of speech of that token and lemma gives the token after undergoing lemmatisation

Captain  |  PROPN  |  Captain
america  |  PROPN  |  america
ate  |  VERB  |  eat
100  |  NUM  |  100
$  |  NUM  |  $
of  |  ADP  |  of
samosa  |  PROPN  |  samosa
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
day  |  NOUN  |  day
.  |  PUNCT  |  .


In [10]:
info = newNlp("Tesla's founder Elon Musk is going to aquire Twitter at 45 billion $")

## ents is used to tell about the entity in the doc
for ent in info.ents:
    print(ent.text, " | ", ent.label_)

Tesla  |  ORG
Elon Musk  |  PERSON
Twitter  |  PRODUCT
45 billion $  |  MONEY


In [11]:
from spacy import displacy
displacy.render(info, style='ent')

In [13]:
text = newNlp('''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in 
India are Infosys, Reliance, HDFC Bank, Hindustan Unilever and Bharti Airtel which are the biggest companies''')
company = list()
for ent in text.ents:
    if(ent.label_ == 'ORG'):
        company.append(ent.text)
print(company)

['Tesla', 'Walmart', 'Amazon', 'Microsoft', 'Google', 'Infosys', 'Reliance', 'HDFC Bank', 'Hindustan Unilever', 'Bharti Airtel']


In [16]:
text = newNlp('''Ravi and Raju are the best friends from school days.They wanted to go for a world tour and 
visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Mohan to take part of this world tour.
They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!
''')

for token in text:
    if(token.pos_ == 'PROPN'):
        print(token.text, end= " , ")


Raju , Paris , London , Dubai , Rome , Mohan , Hyderabad , 