<a href="https://colab.research.google.com/github/bsvinay9/Assignment0/blob/main/G24AIT154.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

ModuleNotFoundError: No module named 'hmmlearn'

In [3]:
from collections import defaultdict

import numpy as np
import pandas as pd
import hmmlearn

# PoS Tagger Using HMMs - Based on Provided Inline Data (Simulating Penn Treebank-like Structure)

# Inline dataset for testing
# Format: ["sentence string", [list of POS tags]]
data = [
    ["the cat sat", ["DET", "NOUN", "VERB"]],
    ["the dog barked", ["DET", "NOUN", "VERB"]],
    ["a dog sat", ["DET", "NOUN", "VERB"]],
    ["the dog ran", ["DET", "NOUN", "VERB"]],
    ["a cat barked", ["DET", "NOUN", "VERB"]],
    ["a dog barked", ["DET", "NOUN", "VERB"]],  # Additional test case
    ["the cat ran", ["DET", "NOUN", "VERB"]]     # Additional test case
]

# Split into sentences and tags
sentences = [entry[0].split() for entry in data]
tags = [entry[1] for entry in data]

# Train/Test Split
train_size = int(0.8 * len(sentences))
train_sentences, test_sentences = sentences[:train_size], sentences[train_size:]
train_tags, test_tags = tags[:train_size], tags[train_size:]

# Count tag and word-tag frequencies
transition_counts = defaultdict(lambda: defaultdict(int))
transition2_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
emission_counts = defaultdict(lambda: defaultdict(int))
context_counts = defaultdict(int)
word_tag_prevword_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

for sent, tags in zip(train_sentences, train_tags):
    prev_tag = "<s>"
    prev_word = "<s>"
    for i in range(len(sent)):
        word = sent[i]
        tag = tags[i]
        context_counts[tag] += 1
        emission_counts[tag][word] += 1
        transition_counts[prev_tag][tag] += 1
        word_tag_prevword_counts[prev_word][tag][word] += 1
        if i > 0:
            prev_prev_tag = tags[i-2] if i > 1 else "<s>"
            transition2_counts[prev_prev_tag][tags[i-1]][tag] += 1
        prev_tag = tag
        prev_word = word

tag_set = list(context_counts.keys())

# Smoothing function
def smoothed_prob(numerator, denominator):
    return (numerator + 1) / (denominator + len(context_counts))

# First Order HMM (P(word | tag))
def predict_first_order(sent):
    prediction = []
    for word in sent:
        best_tag = max(tag_set, key=lambda tag: smoothed_prob(emission_counts[tag][word], context_counts[tag]))
        prediction.append(best_tag)
    return prediction

# Second Order HMM (P(tag | prev_tag) and Viterbi)
def predict_second_order(sent):
    V = [{}]
    path = {}

    for tag in tag_set:
        V[0][tag] = smoothed_prob(transition_counts['<s>'][tag], sum(transition_counts['<s>'].values())) * \
                    smoothed_prob(emission_counts[tag][sent[0]], context_counts[tag])
        path[tag] = [tag]

    for i in range(1, len(sent)):
        V.append({})
        new_path = {}
        for curr_tag in tag_set:
            (prob, prev_tag) = max((V[i-1][pt] * smoothed_prob(transition_counts[pt][curr_tag], context_counts[pt]) * \
                                      smoothed_prob(emission_counts[curr_tag][sent[i]], context_counts[curr_tag]), pt)
                                     for pt in tag_set)
            V[i][curr_tag] = prob
            new_path[curr_tag] = path[prev_tag] + [curr_tag]
        path = new_path

    final_tag = max(V[-1], key=V[-1].get)
    return path[final_tag]

# First Order HMM with Previous Word (P(word | tag, prev_word))
def predict_with_prev_word(sent):
    prediction = []
    prev_word = "<s>"
    for word in sent:
        best_tag = max(tag_set, key=lambda tag: smoothed_prob(word_tag_prevword_counts[prev_word][tag][word], context_counts[tag]))
        prediction.append(best_tag)
        prev_word = word
    return prediction

# Evaluation function
def evaluate(predict_func):
    correct = total = 0
    for sent, true_tags in zip(test_sentences, test_tags):
        pred_tags = predict_func(sent)
        for pt, tt in zip(pred_tags, true_tags):
            if pt == tt:
                correct += 1
            total += 1
    return correct / total if total > 0 else 0

# Run Evaluations
print("First Order HMM Accuracy:", evaluate(predict_first_order))
print("Second Order HMM Accuracy:", evaluate(predict_second_order))
print("First Order + Prev Word Accuracy:", evaluate(predict_with_prev_word))



First Order HMM Accuracy: 1.0
Second Order HMM Accuracy: 1.0
First Order + Prev Word Accuracy: 0.8333333333333334


In [2]:
pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Downloading hmmlearn-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (165 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.9/165.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: hmmlearn
Successfully installed hmmlearn-0.3.3
