In [1]:
import spacy

In [5]:
nlp=spacy.load('en_core_web_sm')

In [8]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [9]:
doc=nlp('Elon flew to mars yesterday. He carried briyani masala with him')
for token in doc:
    print(token, "|",token.pos_,"|",spacy.explain(token.pos_))

Elon | PROPN | proper noun
flew | VERB | verb
to | ADP | adposition
mars | NOUN | noun
yesterday | NOUN | noun
. | PUNCT | punctuation
He | PRON | pronoun
carried | VERB | verb
briyani | ADJ | adjective
masala | NOUN | noun
with | ADP | adposition
him | PRON | pronoun


In [13]:
doc=nlp('Wow! Dr. Stranger made 265 million $ on the very first day')
for token in doc:
    print(token, "|",token.pos_,"|",spacy.explain(token.pos_),"|", token.tag_,"|", spacy.explain(token.tag_))

Wow | INTJ | interjection | UH | interjection
! | PUNCT | punctuation | . | punctuation mark, sentence closer
Dr. | PROPN | proper noun | NNP | noun, proper singular
Stranger | PROPN | proper noun | NNP | noun, proper singular
made | VERB | verb | VBD | verb, past tense
265 | NUM | numeral | CD | cardinal number
million | NUM | numeral | CD | cardinal number
$ | NUM | numeral | CD | cardinal number
on | ADP | adposition | IN | conjunction, subordinating or preposition
the | DET | determiner | DT | determiner
very | ADV | adverb | RB | adverb
first | ADJ | adjective | JJ | adjective (English), other noun-modifier (Chinese)
day | NOUN | noun | NN | noun, singular or mass


In [39]:
earning=nlp(''' Microsoft Corp. today etc. announced the following results for the quarter ended June 30, 2023, as compared to the corresponding period of last fiscal year:
·        Revenue was $56.2 billion and increased 8% (up 10% in constant currency)
·        Operating income was $24.3 billion and increased 18% (up 21% in constant currency)
·        Net income was $20.1 billion and increased 20% (up 23% in constant currency)
·        Diluted earnings per share was $2.69 and increased 21% (up 23% in constant currency)''')
filtered_data=[]
for token in earning:
    if (token.pos_ not in ['SPACE','PUNCT','X']): #we removed blank space, punctuation and unknown others for analysis
        # print(token,"|",token.pos_,"|",spacy.explain(token.pos_))
        filtered_data.append(token)

In [40]:
filtered_data

[Microsoft,
 Corp.,
 today,
 announced,
 the,
 following,
 results,
 for,
 the,
 quarter,
 ended,
 June,
 30,
 2023,
 as,
 compared,
 to,
 the,
 corresponding,
 period,
 of,
 last,
 fiscal,
 year,
 Revenue,
 was,
 $,
 56.2,
 billion,
 and,
 increased,
 8,
 %,
 up,
 10,
 %,
 in,
 constant,
 currency,
 Operating,
 income,
 was,
 $,
 24.3,
 billion,
 and,
 increased,
 18,
 %,
 up,
 21,
 %,
 in,
 constant,
 currency,
 Net,
 income,
 was,
 $,
 20.1,
 billion,
 and,
 increased,
 20,
 %,
 up,
 23,
 %,
 in,
 constant,
 currency,
 Diluted,
 earnings,
 per,
 share,
 was,
 $,
 2.69,
 and,
 increased,
 21,
 %,
 up,
 23,
 %,
 in,
 constant,
 currency]

In [41]:
count=earning.count_by(spacy.attrs.POS)
count

{103: 9,
 96: 3,
 92: 22,
 101: 2,
 100: 10,
 90: 3,
 85: 8,
 93: 17,
 97: 15,
 98: 1,
 84: 8,
 87: 4,
 99: 4,
 89: 4,
 86: 4}

In [44]:
earning.vocab[96].text

'PROPN'

In [45]:
for i,j in count.items():
    print(earning.vocab[i].text,"|",j)

SPACE | 9
PROPN | 3
NOUN | 22
X | 2
VERB | 10
DET | 3
ADP | 8
NUM | 17
PUNCT | 15
SCONJ | 1
ADJ | 8
AUX | 4
SYM | 4
CCONJ | 4
ADV | 4
