# NLTK POS TAGGER

In [1]:
import nltk
from nltk.tokenize import word_tokenize

# Ensure the necessary NLTK resources are available
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

data = "The big black dog barked at the white cat and chased away."

# Tokenizing and POS Tagging using NLTK
nltk_pos_tagged = nltk.pos_tag(word_tokenize(data))
print("NLTK POS Tagging:", nltk_pos_tagged)


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


NLTK POS Tagging: [('The', 'DT'), ('big', 'JJ'), ('black', 'JJ'), ('dog', 'NN'), ('barked', 'VBD'), ('at', 'IN'), ('the', 'DT'), ('white', 'JJ'), ('cat', 'NN'), ('and', 'CC'), ('chased', 'VBD'), ('away', 'RB'), ('.', '.')]


# TextBlob POS Tagger


In [2]:
from textblob import TextBlob

data = "The big black dog barked at the white cat and chased away."

# Creating a TextBlob object and POS Tagging using TextBlob
tb = TextBlob(data)
textblob_pos_tagged = tb.tags
print("TextBlob POS Tagging:", textblob_pos_tagged)

TextBlob POS Tagging: [('The', 'DT'), ('big', 'JJ'), ('black', 'JJ'), ('dog', 'NN'), ('barked', 'VBD'), ('at', 'IN'), ('the', 'DT'), ('white', 'JJ'), ('cat', 'NN'), ('and', 'CC'), ('chased', 'VBD'), ('away', 'RB')]


#Regular Expression Tagger

In [3]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import RegexpTagger

data = "The big black dog barked at the white cat and chased away."

# Define the regex patterns for POS tagging
patterns = [
    (r'^-?[0-9]+(.[0-9]+)?$', 'CD'),   # cardinal numbers
    (r'(The|the|A|a|An|an)$', 'DT'),   # articles
    (r'.*able$', 'JJ'),                # adjectives
    (r'.*ness$', 'NN'),                # nouns formed from adjectives
    (r'.*ly$', 'RB'),                  # adverbs
    (r'.*\'s$', 'POS'),                # possessive nouns
    (r'.*ment$', 'NN'),                # nouns that end in -ment
    (r'.*town$', 'NN'),                # nouns that end in -town
    (r'.*ness$', 'NN'),                # nouns ending in -ness
    (r'.*ing$', 'VBG'),                # gerunds
    (r'.*ed$', 'VBD'),                 # simple past
    (r'.*es$', 'VBZ'),                 # 3rd singular present
    (r'^[A-Z].*$', 'NNP'),             # proper nouns
    (r'.*ould$', 'MD'),                # modals
    (r'.*s$', 'NNS'),                  # plural nouns
    (r'.*', 'NN')                      # nouns (default)
]

# Apply the Regular Expression Tagger
regexp_tagger = RegexpTagger(patterns)
regexp_pos_tagged = regexp_tagger.tag(word_tokenize(data))
print("Regular Expression POS Tagging:", regexp_pos_tagged)


Regular Expression POS Tagging: [('The', 'DT'), ('big', 'NN'), ('black', 'NN'), ('dog', 'NN'), ('barked', 'VBD'), ('at', 'NN'), ('the', 'DT'), ('white', 'NN'), ('cat', 'NN'), ('and', 'NN'), ('chased', 'VBD'), ('away', 'NN'), ('.', 'NN')]
