In [14]:
import nltk
nltk.download("all")

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_rus to
[nltk_data]    |     /root

True

In [15]:
import nltk
import spacy
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.tag import RegexpTagger

# Download necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nlp = spacy.load("en_core_web_sm")

# Monday: Basic POS Tagging with NLTK
def basic_pos_tagging(text):
    tokens = word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)
    return pos_tags

# Tuesday: Tokenization and POS Tagging for a paragraph
def pos_tag_paragraph(text):
    sentences = sent_tokenize(text)
    tagged_sentences = [nltk.pos_tag(word_tokenize(sent)) for sent in sentences]
    return tagged_sentences

# Wednesday: POS Tagging with spaCy
def spacy_pos_tagging(text):
    doc = nlp(text)
    return [(token.text, token.pos_) for token in doc]

# Thursday: Rule-based POS Tagging using RegexpTagger
patterns = [
    (r'.*ing$', 'VBG'),  # Present participle (e.g., running)
    (r'.*ed$', 'VBD'),   # Past tense (e.g., played)
    (r'.*ly$', 'RB'),    # Adverbs (e.g., quickly)
    (r'^The$', 'DT'),      # Determiner (e.g., The)
    (r'.*', 'NN')        # Default to noun
]
regexp_tagger = RegexpTagger(patterns)

def rule_based_pos_tagging(text):
    tokens = word_tokenize(text)
    return regexp_tagger.tag(tokens)

# Sample input text
test_text = "The quick brown fox jumps over the lazy dog."
test_paragraph = "Natural Language Processing is an interesting field. It helps in text analysis."

# Running all implementations
print("Basic POS Tagging:", basic_pos_tagging(test_text))
print("POS Tagging on Paragraph:", pos_tag_paragraph(test_paragraph))
print("spaCy POS Tagging:", spacy_pos_tagging(test_text))
print("Rule-based POS Tagging:", rule_based_pos_tagging(test_text))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Basic POS Tagging: [('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]
POS Tagging on Paragraph: [[('Natural', 'JJ'), ('Language', 'NNP'), ('Processing', 'NNP'), ('is', 'VBZ'), ('an', 'DT'), ('interesting', 'JJ'), ('field', 'NN'), ('.', '.')], [('It', 'PRP'), ('helps', 'VBZ'), ('in', 'IN'), ('text', 'JJ'), ('analysis', 'NN'), ('.', '.')]]
spaCy POS Tagging: [('The', 'DET'), ('quick', 'ADJ'), ('brown', 'ADJ'), ('fox', 'NOUN'), ('jumps', 'VERB'), ('over', 'ADP'), ('the', 'DET'), ('lazy', 'ADJ'), ('dog', 'NOUN'), ('.', 'PUNCT')]
Rule-based POS Tagging: [('The', 'DT'), ('quick', 'NN'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'NN'), ('over', 'NN'), ('the', 'NN'), ('lazy', 'NN'), ('dog', 'NN'), ('.', 'NN')]
