In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
doc = nlp("Wow! Jonas is so excited to eat waffles, he is going to devour them so quickly!")

for token in doc:
    print(token, " | ", token.pos_, " | ", spacy.explain(token.pos_))

Wow  |  INTJ  |  interjection
!  |  PUNCT  |  punctuation
Jonas  |  PROPN  |  proper noun
is  |  AUX  |  auxiliary
so  |  ADV  |  adverb
excited  |  ADJ  |  adjective
to  |  PART  |  particle
eat  |  VERB  |  verb
waffles  |  NOUN  |  noun
,  |  PUNCT  |  punctuation
he  |  PRON  |  pronoun
is  |  AUX  |  auxiliary
going  |  VERB  |  verb
to  |  PART  |  particle
devour  |  VERB  |  verb
them  |  PRON  |  pronoun
so  |  ADV  |  adverb
quickly  |  ADV  |  adverb
!  |  PUNCT  |  punctuation


In [3]:
doc = nlp("Wow! Jonas is so excited to eat waffles, he is going to devour them so quickly!")

for token in doc:
    print(token, " | ", token.tag_, " | ", spacy.explain(token.tag_))

Wow  |  UH  |  interjection
!  |  .  |  punctuation mark, sentence closer
Jonas  |  NNP  |  noun, proper singular
is  |  VBZ  |  verb, 3rd person singular present
so  |  RB  |  adverb
excited  |  JJ  |  adjective (English), other noun-modifier (Chinese)
to  |  TO  |  infinitival "to"
eat  |  VB  |  verb, base form
waffles  |  NNS  |  noun, plural
,  |  ,  |  punctuation mark, comma
he  |  PRP  |  pronoun, personal
is  |  VBZ  |  verb, 3rd person singular present
going  |  VBG  |  verb, gerund or present participle
to  |  TO  |  infinitival "to"
devour  |  VB  |  verb, base form
them  |  PRP  |  pronoun, personal
so  |  RB  |  adverb
quickly  |  RB  |  adverb
!  |  .  |  punctuation mark, sentence closer


In [7]:
#Remove useless punctuation and spaces
doc = nlp("Wow! Jonas is so excited to eat waffles, he is going to devour them so quickly!")

filtered_tokens = []

for token in doc:
    if token.pos_ not in ["SPACE","X","PUNCT"]:
        filtered_tokens.append(token)

filtered_tokens

[Wow,
 Jonas,
 is,
 so,
 excited,
 to,
 eat,
 waffles,
 he,
 is,
 going,
 to,
 devour,
 them,
 so,
 quickly]

In [10]:
count = doc.count_by(spacy.attrs.POS)

for k,v in count.items():
    print(doc.vocab[k].text, " | ", v)


INTJ  |  1
PUNCT  |  3
PROPN  |  1
AUX  |  2
ADV  |  3
ADJ  |  1
PART  |  2
VERB  |  3
NOUN  |  1
PRON  |  2
