In [None]:
import re

# Improved rule-based POS tagger

def rule_based_pos_tag(tokens):
    tagged = []
    for word in tokens:
        lw = word.lower()
        if lw in {"the", "a", "an"}:
            tag = "DT"
        elif lw in {"over", "under", "on", "in", "by", "with"}:
            tag = "IN"
        elif lw in {"is", "am", "are", "was", "were", "be", "being", "been"}:
            tag = "VB"
        elif lw.endswith("ly"):
            tag = "RB"
        elif lw.endswith("ing"):
            tag = "VBG"
        elif lw.endswith("ed"):
            tag = "VBD"
        elif lw.endswith("s") and lw not in {"is", "was", "this"}:
            tag = "VBZ"  # Assume verb for present tense ending in 's'
        elif lw in {"quick", "lazy", "brown"}:
            tag = "JJ"
        else:
            tag = "NN"
        tagged.append((word, tag))
    return tagged

# Sample sentence
tokens = "The quick brown fox jumps over the lazy dog".split()

# Use custom POS tagger
pos_tags = rule_based_pos_tag(tokens)
print("POS Tags:")
print(pos_tags)

# Define a simple chunk grammar
chunk_grammar = """
    NP: {<DT>?<JJ>*<NN|NNP>}   # Noun Phrase
    VP: {<VB.*><IN>?}          # Verb Phrase
"""

# Create a chunk parser using nltk's regex-based chunking
from nltk.chunk import RegexpParser
chunk_parser = RegexpParser(chunk_grammar)

# Apply chunking
chunked = chunk_parser.parse(pos_tags)
print("\nChunked Output:")
print(chunked)

POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN')]

Chunked Output:
(S
  (NP The/DT quick/JJ brown/JJ fox/NN)
  (VP jumps/VBZ over/IN)
  (NP the/DT lazy/JJ dog/NN))
