# Parts of Spees (POS) Tagging

In [12]:
import nltk 
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\netra\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [13]:
sentence = "The quick brown fox, jumped over the lazy dog."
tokens = nltk.word_tokenize(sentence)
tagged_tokens = nltk.pos_tag(tokens)
print(tagged_tokens)

[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), (',', ','), ('jumped', 'VBD'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]


In [14]:
from nltk.tokenize import word_tokenize

In [16]:
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\netra\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\netra\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Tokenize the sentence

In [17]:
text = "Sure, I'd be happy to guide you through Parts of Speech (POS) tagging using the Python NLTK library."
tokens = word_tokenize(text)
tokens

['Sure',
 ',',
 'I',
 "'d",
 'be',
 'happy',
 'to',
 'guide',
 'you',
 'through',
 'Parts',
 'of',
 'Speech',
 '(',
 'POS',
 ')',
 'tagging',
 'using',
 'the',
 'Python',
 'NLTK',
 'library',
 '.']

## POS Tagging

In [18]:
tagged_tokens = nltk.pos_tag(tokens)
print(tagged_tokens)

[('Sure', 'JJ'), (',', ','), ('I', 'PRP'), ("'d", 'MD'), ('be', 'VB'), ('happy', 'JJ'), ('to', 'TO'), ('guide', 'VB'), ('you', 'PRP'), ('through', 'IN'), ('Parts', 'NNS'), ('of', 'IN'), ('Speech', 'NNP'), ('(', '('), ('POS', 'NNP'), (')', ')'), ('tagging', 'VBG'), ('using', 'VBG'), ('the', 'DT'), ('Python', 'NNP'), ('NLTK', 'NNP'), ('library', 'NN'), ('.', '.')]


### Examples of POS Tagging 

In [19]:
text = "Python is a high-level programming language. It's great for data analysis and data science for the future of AI."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

print(tagged_tokens)

[('Python', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('high-level', 'JJ'), ('programming', 'NN'), ('language', 'NN'), ('.', '.'), ('It', 'PRP'), ("'s", 'VBZ'), ('great', 'JJ'), ('for', 'IN'), ('data', 'NNS'), ('analysis', 'NN'), ('and', 'CC'), ('data', 'NNS'), ('science', 'NN'), ('for', 'IN'), ('the', 'DT'), ('future', 'NN'), ('of', 'IN'), ('AI', 'NNP'), ('.', '.')]


The POS tags are standardized codes. For instance, NNP stands for proper noun, singular, VBZ is a verb, 3rd person singular present, JJ is an adjective, etc. You can look up a full list of POS tags and their meanings in the NLTK documentation or by querying nltk.help.upenn_tagset() if you're curious about a specific tag.

## Tagsets

In [30]:
nltk.download('tagsets')
nltk.help.upenn_tagset('NNP')

NNP: noun, proper, singular
    Motown Venneboerger Czestochwa Ranzer Conchita Trumplane Christos
    Oceanside Escobar Kreisler Sawyer Cougar Yvette Ervin ODI Darryl CTCA
    Shannon A.K.C. Meltex Liverpool ...


[nltk_data] Downloading package tagsets to
[nltk_data]     C:\Users\netra\AppData\Roaming\nltk_data...
[nltk_data]   Package tagsets is already up-to-date!


In [31]:
nltk.help.upenn_tagset('NNP')

NNP: noun, proper, singular
    Motown Venneboerger Czestochwa Ranzer Conchita Trumplane Christos
    Oceanside Escobar Kreisler Sawyer Cougar Yvette Ervin ODI Darryl CTCA
    Shannon A.K.C. Meltex Liverpool ...


In [33]:
nltk.help.upenn_tagset('VB')

VB: verb, base form
    ask assemble assess assign assume atone attention avoid bake balkanize
    bank begin behold believe bend benefit bevel beware bless boil bomb
    boost brace break bring broil brush build ...


In [34]:
nltk.help.upenn_tagset('IN')

IN: preposition or conjunction, subordinating
    astride among uppon whether out inside pro despite on by throughout
    below within for towards near behind atop around if like until below
    next into if beside ...


In [35]:
nltk.help.upenn_tagset('VBZ')

VBZ: verb, present tense, 3rd person singular
    bases reconstructs marks mixes displeases seals carps weaves snatches
    slumps stretches authorizes smolders pictures emerges stockpiles
    seduces fizzes uses bolsters slaps speaks pleads ...


In [36]:
nltk.help.upenn_tagset('CC')

CC: conjunction, coordinating
    & 'n and both but either et for less minus neither nor or plus so
    therefore times v. versus vs. whether yet


![image.png](attachment:image.png)

# Other practice Examples 

### Simple Statements 

In [37]:
text = "Social Constructivism: This is based on the work of Lev Vygotsky who emphasized the influence of culture and social interaction. Vygotsky believed that learning is a collaborative activity, and that social interaction plays a critical role in the process of constructing knowledge."
text

'Social Constructivism: This is based on the work of Lev Vygotsky who emphasized the influence of culture and social interaction. Vygotsky believed that learning is a collaborative activity, and that social interaction plays a critical role in the process of constructing knowledge.'

In [38]:
tokens = word_tokenize(text)
tagged_tokens = nltk.pos_tag(tokens)
tagged_tokens

[('Social', 'JJ'),
 ('Constructivism', 'NN'),
 (':', ':'),
 ('This', 'DT'),
 ('is', 'VBZ'),
 ('based', 'VBN'),
 ('on', 'IN'),
 ('the', 'DT'),
 ('work', 'NN'),
 ('of', 'IN'),
 ('Lev', 'NNP'),
 ('Vygotsky', 'NNP'),
 ('who', 'WP'),
 ('emphasized', 'VBD'),
 ('the', 'DT'),
 ('influence', 'NN'),
 ('of', 'IN'),
 ('culture', 'NN'),
 ('and', 'CC'),
 ('social', 'JJ'),
 ('interaction', 'NN'),
 ('.', '.'),
 ('Vygotsky', 'NNP'),
 ('believed', 'VBD'),
 ('that', 'IN'),
 ('learning', 'NN'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('collaborative', 'JJ'),
 ('activity', 'NN'),
 (',', ','),
 ('and', 'CC'),
 ('that', 'IN'),
 ('social', 'JJ'),
 ('interaction', 'NN'),
 ('plays', 'VBZ'),
 ('a', 'DT'),
 ('critical', 'JJ'),
 ('role', 'NN'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('process', 'NN'),
 ('of', 'IN'),
 ('constructing', 'VBG'),
 ('knowledge', 'NN'),
 ('.', '.')]

### Complex sentences

In [39]:
text = "If it rains tomorrow, the picnic will be canceled."
tokens = word_tokenize(text)
tagged_tokens = nltk.pos_tag(tokens)    
print(tagged_tokens)

[('If', 'IN'), ('it', 'PRP'), ('rains', 'VBZ'), ('tomorrow', 'NN'), (',', ','), ('the', 'DT'), ('picnic', 'NN'), ('will', 'MD'), ('be', 'VB'), ('canceled', 'VBN'), ('.', '.')]


### Use of Conjuctions 

In [40]:
text = "She plays the guitar and signs beautifully."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('She', 'PRP'),
 ('plays', 'VBZ'),
 ('the', 'DT'),
 ('guitar', 'NN'),
 ('and', 'CC'),
 ('signs', 'NNS'),
 ('beautifully', 'RB'),
 ('.', '.')]

In [41]:
nltk.help.upenn_tagset('RB')

RB: adverb
    occasionally unabatingly maddeningly adventurously professedly
    stirringly prominently technologically magisterially predominately
    swiftly fiscally pitilessly ...


### Use of Future Tense 

In [42]:
text = "They will start the construction of the new building next month."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

print(tagged_tokens)

[('They', 'PRP'), ('will', 'MD'), ('start', 'VB'), ('the', 'DT'), ('construction', 'NN'), ('of', 'IN'), ('the', 'DT'), ('new', 'JJ'), ('building', 'NN'), ('next', 'JJ'), ('month', 'NN'), ('.', '.')]


In [43]:
nltk.help.upenn_tagset('PRP')

PRP: pronoun, personal
    hers herself him himself hisself it itself me myself one oneself ours
    ourselves ownself self she thee theirs them themselves they thou thy us


In [44]:
nltk.help.upenn_tagset('MD')

MD: modal auxiliary
    can cannot could couldn't dare may might must need ought shall should
    shouldn't will would


### Past Perfect Tense 

In [45]:
text = "He had finished the work before I arrived."

tokens = word_tokenize(text)    

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('He', 'PRP'),
 ('had', 'VBD'),
 ('finished', 'VBN'),
 ('the', 'DT'),
 ('work', 'NN'),
 ('before', 'IN'),
 ('I', 'PRP'),
 ('arrived', 'VBD'),
 ('.', '.')]

### Present Continous Tense

In [46]:
text = "I am working in the field of data science."

tokens = word_tokenize(text)    

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('I', 'PRP'),
 ('am', 'VBP'),
 ('working', 'VBG'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('field', 'NN'),
 ('of', 'IN'),
 ('data', 'NNS'),
 ('science', 'NN'),
 ('.', '.')]

In [47]:
nltk.help.upenn_tagset('VBG') #Gerund or Present participle 

VBG: verb, present participle or gerund
    telegraphing stirring focusing angering judging stalling lactating
    hankerin' alleging veering capping approaching traveling besieging
    encrypting interrupting erasing wincing ...


In [48]:
nltk.help.upenn_tagset('VBP') # Present tense

VBP: verb, present tense, not 3rd person singular
    predominate wrap resort sue twist spill cure lengthen brush terminate
    appear tend stray glisten obtain comprise detest tease attract
    emphasize mold postpone sever return wag ...


### List Items

In [50]:
text = "Eggs, milk, and butter are essential ingredients for baking a cake."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('Eggs', 'NNP'),
 (',', ','),
 ('milk', 'NN'),
 (',', ','),
 ('and', 'CC'),
 ('butter', 'NN'),
 ('are', 'VBP'),
 ('essential', 'JJ'),
 ('ingredients', 'NNS'),
 ('for', 'IN'),
 ('baking', 'VBG'),
 ('a', 'DT'),
 ('cake', 'NN'),
 ('.', '.')]

### Adjective Heavy Sentences 

In [51]:
text = "The old wooden cottage stood alone on the hill."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('The', 'DT'),
 ('old', 'JJ'),
 ('wooden', 'JJ'),
 ('cottage', 'NN'),
 ('stood', 'VBD'),
 ('alone', 'RB'),
 ('on', 'IN'),
 ('the', 'DT'),
 ('hill', 'NN'),
 ('.', '.')]

### Technical Contents 

In [52]:
text = "Python utilizes dynamic typing and manages memory automatically."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('Python', 'NNP'),
 ('utilizes', 'JJ'),
 ('dynamic', 'JJ'),
 ('typing', 'NN'),
 ('and', 'CC'),
 ('manages', 'VBZ'),
 ('memory', 'NN'),
 ('automatically', 'RB'),
 ('.', '.')]

In [53]:
nltk.help.upenn_tagset('NN')

NN: noun, common, singular or mass
    common-carrier cabbage knuckle-duster Casino afghan shed thermostat
    investment slide humour falloff slick wind hyena override subhumanity
    machinist ...


## Literary Style 

In [54]:
text = "Beneath the clear sky, the river glittered under the sun."

tokens = word_tokenize(text)    

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('Beneath', 'IN'),
 ('the', 'DT'),
 ('clear', 'JJ'),
 ('sky', 'NN'),
 (',', ','),
 ('the', 'DT'),
 ('river', 'NN'),
 ('glittered', 'VBN'),
 ('under', 'IN'),
 ('the', 'DT'),
 ('sun', 'NN'),
 ('.', '.')]

### Imperative Sentences 

In [55]:
text = "Close the door quietly."

tokens = word_tokenize(text)    

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('Close', 'RB'), ('the', 'DT'), ('door', 'NN'), ('quietly', 'RB'), ('.', '.')]

### Interrogative with Model

In [56]:
text = "Can you open the window?"

tokens = word_tokenize(text)    

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('Can', 'MD'),
 ('you', 'PRP'),
 ('open', 'VB'),
 ('the', 'DT'),
 ('window', 'NN'),
 ('?', '.')]

### Complex Compound Sentences 

In [57]:
text = "I ran to the store, but they were out of milk, so I bought eggs instead."

tokens = word_tokenize(text)    

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('I', 'PRP'),
 ('ran', 'VBD'),
 ('to', 'TO'),
 ('the', 'DT'),
 ('store', 'NN'),
 (',', ','),
 ('but', 'CC'),
 ('they', 'PRP'),
 ('were', 'VBD'),
 ('out', 'IN'),
 ('of', 'IN'),
 ('milk', 'NN'),
 (',', ','),
 ('so', 'IN'),
 ('I', 'PRP'),
 ('bought', 'VBD'),
 ('eggs', 'NNS'),
 ('instead', 'RB'),
 ('.', '.')]

### use of Passive Voice

In [58]:
text = "The book was written by Netra Kumar Manandhar."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('The', 'DT'),
 ('book', 'NN'),
 ('was', 'VBD'),
 ('written', 'VBN'),
 ('by', 'IN'),
 ('Netra', 'NNP'),
 ('Kumar', 'NNP'),
 ('Manandhar', 'NNP'),
 ('.', '.')]

### Historical Facts 

In [59]:
text = "In 1991 AD, Kathmandu University was established in Dhulikhel."

tokens = word_tokenize(text)

tagged_tokens= nltk.pos_tag(tokens)
tagged_tokens

[('In', 'IN'),
 ('1991', 'CD'),
 ('AD', 'NNP'),
 (',', ','),
 ('Kathmandu', 'NNP'),
 ('University', 'NNP'),
 ('was', 'VBD'),
 ('established', 'VBN'),
 ('in', 'IN'),
 ('Dhulikhel', 'NNP'),
 ('.', '.')]

In [60]:
nltk.help.upenn_tagset('CD')

CD: numeral, cardinal
    mid-1890 nine-thirty forty-two one-tenth ten million 0.5 one forty-
    seven 1987 twenty '79 zero two 78-degrees eighty-four IX '60s .025
    fifteen 271,124 dozen quintillion DM2,000 ...


### Technical and Scientific statements 

In [61]:
text = "Carbon dioxide levels in the atmosphere have risen dramatically over the past century."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('Carbon', 'NNP'),
 ('dioxide', 'NN'),
 ('levels', 'NNS'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('atmosphere', 'NN'),
 ('have', 'VBP'),
 ('risen', 'VBN'),
 ('dramatically', 'RB'),
 ('over', 'IN'),
 ('the', 'DT'),
 ('past', 'JJ'),
 ('century', 'NN'),
 ('.', '.')]

### Conditional Sentences 

In [62]:
text = "If you save money, you can by a new laptop later."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('If', 'IN'),
 ('you', 'PRP'),
 ('save', 'VBP'),
 ('money', 'NN'),
 (',', ','),
 ('you', 'PRP'),
 ('can', 'MD'),
 ('by', 'IN'),
 ('a', 'DT'),
 ('new', 'JJ'),
 ('laptop', 'JJ'),
 ('later', 'NN'),
 ('.', '.')]

### Use of Infinitives 

In [63]:
text = "To understand recursion, you must first understand recursion."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens

[('To', 'TO'),
 ('understand', 'VB'),
 ('recursion', 'NN'),
 (',', ','),
 ('you', 'PRP'),
 ('must', 'MD'),
 ('first', 'VB'),
 ('understand', 'JJ'),
 ('recursion', 'NN'),
 ('.', '.')]

### Statement with Gerunds 

In [64]:
text = "Swimming in the ocean has been Sharon's passion since she was five years old."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)

tagged_tokens   

[('Swimming', 'VBG'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('ocean', 'NN'),
 ('has', 'VBZ'),
 ('been', 'VBN'),
 ('Sharon', 'NNP'),
 ("'s", 'POS'),
 ('passion', 'NN'),
 ('since', 'IN'),
 ('she', 'PRP'),
 ('was', 'VBD'),
 ('five', 'CD'),
 ('years', 'NNS'),
 ('old', 'JJ'),
 ('.', '.')]

In [65]:
nltk.help.upenn_tagset('POS')

POS: genitive marker
    ' 's


### Complex sentence with multiple clauses 

In [66]:
text = "When I arrive, if the weather is nice, we will go to the park."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('When', 'WRB'),
 ('I', 'PRP'),
 ('arrive', 'VBP'),
 (',', ','),
 ('if', 'IN'),
 ('the', 'DT'),
 ('weather', 'NN'),
 ('is', 'VBZ'),
 ('nice', 'JJ'),
 (',', ','),
 ('we', 'PRP'),
 ('will', 'MD'),
 ('go', 'VB'),
 ('to', 'TO'),
 ('the', 'DT'),
 ('park', 'NN'),
 ('.', '.')]

### Descriptive Narratives 

In [67]:
text = "The ancient tree cast a long shadow over the old, crumbling walls."
tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('The', 'DT'),
 ('ancient', 'JJ'),
 ('tree', 'NN'),
 ('cast', 'VBD'),
 ('a', 'DT'),
 ('long', 'JJ'),
 ('shadow', 'NN'),
 ('over', 'IN'),
 ('the', 'DT'),
 ('old', 'JJ'),
 (',', ','),
 ('crumbling', 'VBG'),
 ('walls', 'NNS'),
 ('.', '.')]

### Simple Questions 

In [68]:
text = "Where are my glasses?"

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('Where', 'WRB'),
 ('are', 'VBP'),
 ('my', 'PRP$'),
 ('glasses', 'NNS'),
 ('?', '.')]

In [69]:
nltk.help.upenn_tagset('WP')

WP: WH-pronoun
    that what whatever whatsoever which who whom whosoever


In [70]:
nltk.help.upenn_tagset('WRB')

WRB: Wh-adverb
    how however whence whenever where whereby whereever wherein whereof why


### Use of Conjuctions and Commas 

In [71]:
text = "She bought apples, oranges, and bananas, but forgot milk."
tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('She', 'PRP'),
 ('bought', 'VBD'),
 ('apples', 'NNS'),
 (',', ','),
 ('oranges', 'NNS'),
 (',', ','),
 ('and', 'CC'),
 ('bananas', 'NNS'),
 (',', ','),
 ('but', 'CC'),
 ('forgot', 'VBD'),
 ('milk', 'NN'),
 ('.', '.')]

### Simple Future Tense 

In [72]:
text = "They will go to the cinema tonight."
tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('They', 'PRP'),
 ('will', 'MD'),
 ('go', 'VB'),
 ('to', 'TO'),
 ('the', 'DT'),
 ('cinema', 'NN'),
 ('tonight', 'NN'),
 ('.', '.')]

In [73]:
nltk.help.upenn_tagset("TO")

TO: "to" as preposition or infinitive marker
    to


### Complex sentence with adverbial clause 

In [74]:
text = "Although it was raining, we decided to go for a walk."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('Although', 'IN'),
 ('it', 'PRP'),
 ('was', 'VBD'),
 ('raining', 'VBG'),
 (',', ','),
 ('we', 'PRP'),
 ('decided', 'VBD'),
 ('to', 'TO'),
 ('go', 'VB'),
 ('for', 'IN'),
 ('a', 'DT'),
 ('walk', 'NN'),
 ('.', '.')]

### Imperative with a direct object 

In [76]:
text = "Please pass me the salt."
tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('Please', 'NNP'),
 ('pass', 'VB'),
 ('me', 'PRP'),
 ('the', 'DT'),
 ('salt', 'NN'),
 ('.', '.')]

### Conditional and Future 

In [77]:
text = "If she arrives late, we will start without her."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('If', 'IN'),
 ('she', 'PRP'),
 ('arrives', 'VBZ'),
 ('late', 'RB'),
 (',', ','),
 ('we', 'PRP'),
 ('will', 'MD'),
 ('start', 'VB'),
 ('without', 'IN'),
 ('her', 'PRP'),
 ('.', '.')]

### Use of Multiple Adjectives 

In [78]:
text = "The tall, dark figure stood silently in the doorway."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('The', 'DT'),
 ('tall', 'NN'),
 (',', ','),
 ('dark', 'JJ'),
 ('figure', 'NN'),
 ('stood', 'VBD'),
 ('silently', 'RB'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('doorway', 'NN'),
 ('.', '.')]

1. Context Sensitivity
Limited Context: POS taggers typically look at a limited context around each word to determine its tag. If the surrounding context is ambiguous or not indicative of the typical usage of a word, the tagger might assign an incorrect tag.
Model Limitations: Many POS taggers are based on statistical models that use probabilities derived from training data. If the training data doesn't include sufficient examples of a word being used in a particular way, the tagger might not learn to tag it correctly in all contexts.
2. Homonyms and Homographs
Words that are spelled the same but have different meanings in different contexts (homographs) can confuse POS taggers. For example, "lead" (to go in front) vs. "lead" (a metal).
3. Tagging Accuracy
Most POS taggers, including the commonly used models in NLTK (like the averaged_perceptron_tagger), have accuracies in the range of 95-97%. This means they still make mistakes in some cases.
4. Complex Sentences
Sentences with complex structures, multiple clauses, or embedded phrases can challenge taggers, especially if the syntactic structure influences the role of words in unusual ways.
5. Fixed Tag Sets
Most taggers use a fixed set of tags, which might not include all possible uses of a word across different dialects, technical fields, or modern slang.
6. Training Data Bias
The performance of a POS tagger is heavily dependent on the corpus it was trained on. If the training data does not reflect the diversity of language use across different domains or registers, the tagger might perform poorly on text that diverges from the training data.
7. Rare and Neologisms
New words or rare words might not be recognized properly by the tagger if they were not present in the training data.

`Corrected Version`

In [79]:
import nltk
from nltk.tokenize import word_tokenize

# Example text where "tall" should be an adjective
text = "The tall man walked quickly."
tokens = word_tokenize(text)
tagged_tokens = nltk.pos_tag(tokens)
print(tagged_tokens)


[('The', 'DT'), ('tall', 'JJ'), ('man', 'NN'), ('walked', 'VBD'), ('quickly', 'RB'), ('.', '.')]


`Using Another Model`

In [81]:
import spacy

nlp = spacy.load('en_core_web_sm')  
doc = nlp("The tall, dark figure stood silently in the doorway.")
print([(token.text, token.pos_) for token in doc])

[('The', 'DET'), ('tall', 'ADJ'), (',', 'PUNCT'), ('dark', 'ADJ'), ('figure', 'NOUN'), ('stood', 'VERB'), ('silently', 'ADV'), ('in', 'ADP'), ('the', 'DET'), ('doorway', 'NOUN'), ('.', 'PUNCT')]


### Scientific Explanation 

In [82]:
text = "Photosynthesis is a process used by plants and other organisms to convert light energy into chemical energy that can later be released to fuel the organisms' activities."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('Photosynthesis', 'NN'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('process', 'NN'),
 ('used', 'VBN'),
 ('by', 'IN'),
 ('plants', 'NNS'),
 ('and', 'CC'),
 ('other', 'JJ'),
 ('organisms', 'NNS'),
 ('to', 'TO'),
 ('convert', 'VB'),
 ('light', 'JJ'),
 ('energy', 'NN'),
 ('into', 'IN'),
 ('chemical', 'JJ'),
 ('energy', 'NN'),
 ('that', 'WDT'),
 ('can', 'MD'),
 ('later', 'RB'),
 ('be', 'VB'),
 ('released', 'VBN'),
 ('to', 'TO'),
 ('fuel', 'VB'),
 ('the', 'DT'),
 ('organisms', 'NNS'),
 ("'", 'POS'),
 ('activities', 'NNS'),
 ('.', '.')]

### Formal Announcement

In [83]:
text = "The president will address the nation tomorrow evening at eight o'clock."

tokens = word_tokenize(text)

tagged_tokens = nltk.pos_tag(tokens)    

tagged_tokens

[('The', 'DT'),
 ('president', 'NN'),
 ('will', 'MD'),
 ('address', 'VB'),
 ('the', 'DT'),
 ('nation', 'NN'),
 ('tomorrow', 'NN'),
 ('evening', 'VBG'),
 ('at', 'IN'),
 ('eight', 'CD'),
 ("o'clock", 'NN'),
 ('.', '.')]