<h2>Parts of Speech (POS) Tagging</h2>

In [15]:
import spacy
import pandas as pd

In [10]:
nlp = spacy.load("en_core_web_sm")

In [11]:
doc = "Google's DeepMind released AlphaGo in 2016. The AI, which is based in London, UK, defeated world champion Lee Sedol 4-1. This was a major milestone in deep learning"

In [12]:
doc = nlp(doc)

In [20]:
tokens = []
pos = []
pos_exp = []
tags = []
tags_exp = []
for token in doc:
    tokens.append(token)
    pos.append(token.pos_)
    pos_exp.append(spacy.explain(token.pos_))
    tags.append(token.tag_)
    tags_exp.append(spacy.explain(token.tag_))

In [23]:
tagged_pos = pd.DataFrame({
    "Tokens": tokens,
    "Parts of Speech": pos,
    "Explanations (POS)": pos_exp,
    "Tags":tags,
    "Explanations (TAGS)": tags_exp
})

In [24]:
tagged_pos

Unnamed: 0,Tokens,Parts of Speech,Explanations (POS),Tags,Explanations (TAGS)
0,Google,PROPN,proper noun,NNP,"noun, proper singular"
1,'s,PART,particle,POS,possessive ending
2,DeepMind,PROPN,proper noun,NNP,"noun, proper singular"
3,released,VERB,verb,VBD,"verb, past tense"
4,AlphaGo,NOUN,noun,NN,"noun, singular or mass"
5,in,ADP,adposition,IN,"conjunction, subordinating or preposition"
6,2016,NUM,numeral,CD,cardinal number
7,.,PUNCT,punctuation,.,"punctuation mark, sentence closer"
8,The,DET,determiner,DT,determiner
9,AI,PROPN,proper noun,NNP,"noun, proper singular"


In [29]:
filtered_tokens = []
for token in doc:
    if token.pos_ not in ["SPACE","X","PUNCT"]:
        filtered_tokens.append(token)
print(filtered_tokens) 
print(f"Length : {len(filtered_tokens)}")

[Google, 's, DeepMind, released, AlphaGo, in, 2016, The, AI, which, is, based, in, London, UK, defeated, world, champion, Lee, Sedol, 4, -, 1, This, was, a, major, milestone, in, deep, learning]
Length : 31


In [30]:
count = doc.count_by(spacy.attrs.POS)

In [32]:
for k,v in count.items():
    print(f"{doc.vocab[k].text} => {v}")

PROPN => 7
PART => 1
VERB => 3
NOUN => 5
ADP => 3
NUM => 3
PUNCT => 5
DET => 2
PRON => 2
AUX => 2
SYM => 1
ADJ => 2
