In [1]:
import spacy

In [2]:
nlp = spacy.load("en_core_web_sm")

In [9]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [8]:
doc = nlp("Elon flew to mars yesterday. He carried biryani masala with him")

for token in doc:
    print(token, "|", token.pos_, "|", spacy.explain(token.pos_))

Elon | PROPN | proper noun
flew | VERB | verb
to | ADP | adposition
mars | NOUN | noun
yesterday | NOUN | noun
. | PUNCT | punctuation
He | PRON | pronoun
carried | VERB | verb
biryani | ADJ | adjective
masala | NOUN | noun
with | ADP | adposition
him | PRON | pronoun


In [14]:
doc = nlp("Wow! Dr. Strange made, 265 million $ on the very first day")

for token in doc:
    print(token, "|", token.pos_, "|", spacy.explain(token.pos_), "|", token.tag_, "|", spacy.explain(token.tag_))

Wow | INTJ | interjection | UH | interjection
! | PUNCT | punctuation | . | punctuation mark, sentence closer
Dr. | PROPN | proper noun | NNP | noun, proper singular
Strange | PROPN | proper noun | NNP | noun, proper singular
made | VERB | verb | VBD | verb, past tense
, | PUNCT | punctuation | , | punctuation mark, comma
265 | NUM | numeral | CD | cardinal number
million | NUM | numeral | CD | cardinal number
$ | NUM | numeral | CD | cardinal number
on | ADP | adposition | IN | conjunction, subordinating or preposition
the | DET | determiner | DT | determiner
very | ADV | adverb | RB | adverb
first | ADJ | adjective | JJ | adjective (English), other noun-modifier (Chinese)
day | NOUN | noun | NN | noun, singular or mass


In [15]:
doc = nlp("He quites the job")

doc[1]
print(doc[1].text, "|", doc[1].tag_, "|", spacy.explain(doc[1].tag_))

quites | VBZ | verb, 3rd person singular present


In [16]:
doc = nlp("He quit the job")

doc[1]
print(doc[1].text, "|", doc[1].tag_, "|", spacy.explain(doc[1].tag_))

quit | VBD | verb, past tense


In [24]:
earnings_text="""Microsoft Corp. will publish fiscal year 2026 first-quarter financial results after the close of the market on Wednesday, Oct. 29, 2025, on the Microsoft Investor Relations website at https://www.microsoft.com/en-us/Investor/. A live webcast of the earnings conference call will be made available at 2:30 p.m. Pacific Time.

Microsoft (Nasdaq “MSFT” @microsoft) creates platforms and tools powered by AI to deliver innovative solutions that meet the evolving needs of our customers, etc. The technology company is committed to making AI available broadly and doing so responsibly, with a mission to empower every person and every organization on the planet to achieve more. """


In [28]:
doc = nlp(earnings_text)

filtered_tokens = []

for token in doc:
    if token.pos_ not in ["SPACE", "X", "PUNCT"]:
        filtered_tokens.append(token)

In [29]:
filtered_tokens[:20]

[Microsoft,
 Corp.,
 will,
 publish,
 fiscal,
 year,
 2026,
 first,
 quarter,
 financial,
 results,
 after,
 the,
 close,
 of,
 the,
 market,
 on,
 Wednesday,
 Oct.]

In [30]:
count = doc.count_by(spacy.attrs.POS)
count

{96: 16,
 87: 4,
 100: 12,
 84: 8,
 92: 22,
 93: 4,
 97: 12,
 85: 12,
 90: 11,
 103: 1,
 89: 3,
 94: 3,
 95: 2,
 101: 2,
 86: 3}

In [32]:
for k,v in count.items():
    print(doc.vocab[k].text, "|", v)

PROPN | 16
AUX | 4
VERB | 12
ADJ | 8
NOUN | 22
NUM | 4
PUNCT | 12
ADP | 12
DET | 11
SPACE | 1
CCONJ | 3
PART | 3
PRON | 2
X | 2
ADV | 3
