In [None]:
import spacy


In [6]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("Elon flow to mars yesterday. He carried biryani masala with him.")

for token in doc:
    print(token, " | ", token.pos_, " | ", spacy.explain(token.pos_))

Elon  |  PROPN  |  proper noun
flow  |  NOUN  |  noun
to  |  ADP  |  adposition
mars  |  NOUN  |  noun
yesterday  |  NOUN  |  noun
.  |  PUNCT  |  punctuation
He  |  PRON  |  pronoun
carried  |  VERB  |  verb
biryani  |  ADJ  |  adjective
masala  |  NOUN  |  noun
with  |  ADP  |  adposition
him  |  PRON  |  pronoun
.  |  PUNCT  |  punctuation


In [9]:
doc = nlp("Wow, Dr. Strange made 265 millioon $ on the very first day")

# to see the tense of a verb i.e. below is 'made' which is past tense
for token in doc:
    print(token, " | ", token.pos_, " | ", spacy.explain(token.pos_),
         " | ", token.tag_, " | ", spacy.explain(token.tag_))



Wow  |  INTJ  |  interjection  |  UH  |  interjection
,  |  PUNCT  |  punctuation  |  ,  |  punctuation mark, comma
Dr.  |  PROPN  |  proper noun  |  NNP  |  noun, proper singular
Strange  |  PROPN  |  proper noun  |  NNP  |  noun, proper singular
made  |  VERB  |  verb  |  VBD  |  verb, past tense
265  |  NUM  |  numeral  |  CD  |  cardinal number
millioon  |  NOUN  |  noun  |  NN  |  noun, singular or mass
$  |  SYM  |  symbol  |  $  |  symbol, currency
on  |  ADP  |  adposition  |  IN  |  conjunction, subordinating or preposition
the  |  DET  |  determiner  |  DT  |  determiner
very  |  ADV  |  adverb  |  RB  |  adverb
first  |  ADJ  |  adjective  |  JJ  |  adjective (English), other noun-modifier (Chinese)
day  |  NOUN  |  noun  |  NN  |  noun, singular or mass


In [15]:
doc = nlp("He quit the job")
doc[1]
print(doc[1].text, doc[1].tag_, spacy.explain(doc[1].tag_))

quit VBD verb, past tense


In [19]:
earnings_text = '''
REDMOND, Wash. — January 24, 2023 — Microsoft Corp. today announced the following results for the quarter ended December 31, 2022, as compared to the corresponding period of last fiscal year:

·        Revenue was $52.7 billion and increased 2%  

·        Operating income was $20.4 billion GAAP and $21.6 billion non-GAAP, and decreased 8% and 3%, respectively

·        Net income was $16.4 billion GAAP and $17.4 billion non-GAAP, and decreased 12% and 7%, respectively

·        Diluted earnings per share was $2.20 GAAP and $2.32 non-GAAP, and decreased 11% and 6%, respectively
'''

doc = nlp(earnings_text)

# what if we want to remove all punctation so we can do some analysis
for token in doc:
    print(token, " | ", token.pos_, " | ", spacy.explain(token.pos_))

# to remove puncation 
filtered_tokens = []
for token in doc:
    if token.pos_ not in ["SAPCE","X","PUNCT"]:
        filtered_tokens.append(token)
        print(token, " | ", token.pos_, " | ", spacy.explain(token.pos_))


  |  SPACE  |  space
REDMOND  |  PROPN  |  proper noun
,  |  PUNCT  |  punctuation
Wash.  |  PROPN  |  proper noun
—  |  PUNCT  |  punctuation
January  |  PROPN  |  proper noun
24  |  NUM  |  numeral
,  |  PUNCT  |  punctuation
2023  |  NUM  |  numeral
—  |  PUNCT  |  punctuation
Microsoft  |  PROPN  |  proper noun
Corp.  |  PROPN  |  proper noun
today  |  NOUN  |  noun
announced  |  VERB  |  verb
the  |  DET  |  determiner
following  |  VERB  |  verb
results  |  NOUN  |  noun
for  |  ADP  |  adposition
the  |  DET  |  determiner
quarter  |  NOUN  |  noun
ended  |  VERB  |  verb
December  |  PROPN  |  proper noun
31  |  NUM  |  numeral
,  |  PUNCT  |  punctuation
2022  |  NUM  |  numeral
,  |  PUNCT  |  punctuation
as  |  SCONJ  |  subordinating conjunction
compared  |  VERB  |  verb
to  |  ADP  |  adposition
the  |  DET  |  determiner
corresponding  |  ADJ  |  adjective
period  |  NOUN  |  noun
of  |  ADP  |  adposition
last  |  ADJ  |  adjective
fiscal  |  ADJ  |  adjective
year  | 

In [23]:
# count how many types of POS you have in your text
count = doc.count_by(spacy.attrs.POS)
count
for k, v in count.items():
    print(doc.vocab[k].text,"|",v)

SPACE | 10
PROPN | 6
PUNCT | 17
NUM | 23
NOUN | 29
VERB | 10
DET | 3
ADP | 4
SCONJ | 1
ADJ | 4
AUX | 4
SYM | 7
CCONJ | 10
ADV | 3
