In [9]:
import nltk
from collections import Counter

nltk.download('brown')
nltk.download('universal_tagset')

def build_hmm():
    sents = nltk.corpus.brown.tagged_sents(tagset='universal')
    tags = Counter()
    emits = Counter()
    trans = Counter()
    vocab = set()
    
    for sent in sents:
        prev = '<s>'
        for w, t in sent:
            w = w.lower()
            vocab.add(w)
            tags[t] += 1
            emits[(w, t)] += 1
            trans[(prev, t)] += 1
            prev = t
        trans[(prev, '</s>')] += 1
        tags['</s>'] += 1
    
    V, T = len(vocab), len(tags)
    # Proper Laplace smoothing
    emit_p = {(w, t): (c + 1) / (tags[t] + V) for (w, t), c in emits.items()}
    trans_p = {(t1, t2): (c + 1) / (tags[t1] + T) for (t1, t2), c in trans.items()}
    return emit_p, trans_p, tags, max(tags, key=tags.get)

def tag_hmm(text, emit_p, trans_p, tags, default):
    words = text.lower().split()
    result, prev = [], '<s>'
    valid_tags = [t for t in tags if t not in ['<s>', '</s>', '.']]  # Exclude punctuation
    for w in words:
        best, score, ep, tp = default, 0, 0, 0
        for t in valid_tags:
            ep = emit_p.get((w, t), 1 / (tags[t] + len(tags)))
            tp = trans_p.get((prev, t), 1 / (tags[prev] + len(tags)))
            if ep * tp > score:
                score, best, ep, tp = ep * tp, t, ep, tp
        result.append((w, best, ep, tp))
        prev = best
    return result

emit_p, trans_p, tags, default = build_hmm()
print("Greedy HMM POS Tagging:")
for w, t, ep, tp in tag_hmm("The sun shines bright", emit_p, trans_p, tags, default):
    print(f"{w}: {t} (Emission: {ep:.4e}, Transition: {tp:.4e})")

[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\divya\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     C:\Users\divya\AppData\Roaming\nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!


Greedy HMM POS Tagging:
the: DET (Emission: 7.8123e-05, Transition: 2.3846e+00)
sun: NOUN (Emission: 7.1480e-04, Transition: 1.4230e-03)
shines: VERB (Emission: 7.1480e-04, Transition: 3.3748e-04)
bright: ADJ (Emission: 7.1480e-04, Transition: 1.9150e-04)
