In [3]:
import re
import pickle
import os
import numpy as np
from collections import defaultdict, Counter

print("Loading and cleaning corpus...")
with open('corpus.txt','r') as f:
    corpus_words = f.read().splitlines()

# Clean words as per original notebook
words = [w.strip().lower() for w in corpus_words if re.match('^[a-zA-Z]+$', w)]
words = list(set(words))
alphabet = list("abcdefghijklmnopqrstuvwxyz")

# --- HMM Component Calculation ---

# 1. Initial Probabilities (pi)
initial_counts = Counter()
for word in words:
    if len(word) > 0:
        initial_counts[word[0]] += 1

total_words = len(words)
default_initial_log_prob = np.log(1 / (total_words + 27))
initial_probs = {char: np.log((initial_counts[char] + 1) / (total_words + 27)) for char in alphabet}
initial_probs = defaultdict(lambda: default_initial_log_prob, initial_probs)


# 2. Transition Probabilities (A)
transition_counts = defaultdict(Counter)
for word in words:
    for i in range(len(word) - 1):
        prev_char = word[i]
        next_char = word[i+1]
        transition_counts[prev_char][next_char] += 1

transition_probs = defaultdict(lambda: defaultdict(lambda: default_initial_log_prob))
for prev_char, next_counts in transition_counts.items():
    total_transitions = sum(next_counts.values())
    for next_char in alphabet:
        prob = (next_counts[next_char] + 1) / (total_transitions + 27) 
        transition_probs[prev_char][next_char] = np.log(prob)

# --- Save Model ---
os.makedirs('models', exist_ok=True)

# !! FIX: Convert defaultdicts to regular dicts for pickling
initial_probs_dict = dict(initial_probs)
transition_probs_dict = {k: dict(v) for k, v in transition_probs.items()}

hmm_model = {
    "initial_probs": initial_probs_dict,
    "transition_probs": transition_probs_dict,
    "default_log_prob": default_initial_log_prob # Save the default value
}

# Save with a new name to avoid confusion
with open('models/bigram_hmm_model.pkl','wb') as f:
    pickle.dump(hmm_model, f) # This will now work

print("Bigram HMM model saved (Initial and Transition log-probabilities).")

Loading and cleaning corpus...
Bigram HMM model saved (Initial and Transition log-probabilities).


In [4]:
import pickle
import random
import numpy as np
from collections import defaultdict, Counter

# --- Load the NEW HMM Model ---
with open('models/bigram_hmm_model.pkl', 'rb') as f:
    hmm_model = pickle.load(f)

# !! FIX: Re-hydrate the dicts back into defaultdicts
default_log_prob = hmm_model['default_log_prob']
initial_probs = defaultdict(lambda: default_log_prob, hmm_model['initial_probs'])

transition_probs = defaultdict(lambda: defaultdict(lambda: default_log_prob))
for k, v in hmm_model['transition_probs'].items():
    transition_probs[k] = defaultdict(lambda: default_log_prob, v)

alphabet = list("abcdefghijklmnopqrstuvwxyz")

# --- Hangman Environment (MODIFIED for 6 lives) ---
class HangmanEnv:
    def __init__(self, words, max_lives=6): # Changed from 8 to 6
        self.words = words
        self.max_lives = max_lives
    
    def reset(self):
        self.word = random.choice(self.words)
        self.guessed = set()
        self.lives = self.max_lives
        self.pattern = "_" * len(self.word)
        return self.pattern
    
    def step(self, letter):
        reward = 0
        done = False
        if letter in self.guessed:
            reward -= 4 
        elif letter in self.word:
            self.guessed.add(letter)
            new_pattern = list(self.pattern)
            for i, ch in enumerate(self.word):
                if ch == letter:
                    new_pattern[i] = letter
            diff = new_pattern.count(letter) - self.pattern.count(letter)
            self.pattern = "".join(new_pattern)
            reward += 12 + 4 * diff
        else:
            self.lives -= 1
            self.guessed.add(letter)
            reward -= 10 
        
        if "_" not in self.pattern:
            reward += 150 
            done = True
        elif self.lives <= 0:
            reward -= 100 
            done = True
        return self.pattern, reward, done

# --- QLearningAgent Class (MODIFIED choose_action) ---
class QLearningAgent:
    def __init__(self, alpha=0.15, gamma=0.95, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.9995):
        self.Q = defaultdict(float)
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        
    def get_state(self, pattern, guessed):
        return (pattern, "".join(sorted(guessed)))

    def _get_hmm_scores(self, pattern, available):
        scores = {a: 0.0 for a in available}
        L = len(pattern)
        
        for pos, char in enumerate(pattern):
            if char == '_':
                for a in available:
                    if pos == 0:
                        scores[a] += initial_probs[a] 
                    
                    if pos > 0 and pattern[pos-1] != '_':
                        prev_char = pattern[pos-1]
                        scores[a] += transition_probs[prev_char][a] 
                    
                    if pos < L - 1 and pattern[pos+1] != '_':
                        next_char = pattern[pos+1]
                        scores[a] += transition_probs[a][next_char]
        return scores

    def choose_action(self, state, pattern, guessed):
        available = [a for a in alphabet if a not in guessed]
        if not available:
            return None
        
        hmm_scores = self._get_hmm_scores(pattern, available)
        
        if random.random() < self.epsilon:
            return random.choice(available)
        
        HMM_BOOST_FACTOR = 1.0 
        scores = {a: self.Q[(state,a)] + HMM_BOOST_FACTOR * hmm_scores[a] for a in available}
        
        return max(scores, key=scores.get)

    def update(self, state, action, reward, next_state):
        max_next = max([self.Q[(next_state,a)] for a in alphabet], default=0)
        self.Q[(state,action)] += self.alpha * (reward + self.gamma * max_next - self.Q[(state,action)])

# --- Training Loop (Using corpus.txt as required) ---
with open('corpus.txt', 'r') as f:
    words = f.read().splitlines()

env = HangmanEnv(words, max_lives=6) 
agent = QLearningAgent()
episodes = 40000 
scores = []
wins = 0

print("Starting RL agent training...")
for ep in range(episodes):
    pattern = env.reset()
    guessed = set()
    total_reward = 0
    while True:
        state = agent.get_state(pattern, guessed)
        action = agent.choose_action(state, pattern, guessed)
        if action is None:
            break
        
        next_pattern, reward, done = env.step(action)
        next_state = agent.get_state(next_pattern, env.guessed)
        agent.update(state, action, reward, next_state) 
        
        total_reward += reward
        guessed.add(action)
        pattern = next_pattern
        
        if done:
            if "_" not in pattern:
                wins += 1
            break
            
    agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
    scores.append(total_reward)
    
    if ep % 500 == 0 and ep > 0:
        rate = wins/ep
        avg = np.mean(scores[-500:])
        print(f"Episode {ep:5d} | WinRate {rate:6.2%} | AvgReward {avg:8.2f} | Eps {agent.epsilon:6.3f}")

# --- Save the trained agent ---
with open('models/rl_agent.pkl','wb') as f:
    pickle.dump(agent,f)

print("RL agent trained and saved.")

Starting RL agent training...
Episode   500 | WinRate  0.80% | AvgReward  -111.06 | Eps  0.778
Episode  1000 | WinRate  1.50% | AvgReward   -95.34 | Eps  0.606
Episode  1500 | WinRate  2.53% | AvgReward   -83.08 | Eps  0.472
Episode  2000 | WinRate  4.30% | AvgReward   -60.73 | Eps  0.368
Episode  2500 | WinRate  6.08% | AvgReward   -43.57 | Eps  0.286
Episode  3000 | WinRate  7.53% | AvgReward   -38.40 | Eps  0.223
Episode  3500 | WinRate  9.09% | AvgReward   -21.87 | Eps  0.174
Episode  4000 | WinRate 10.53% | AvgReward   -14.14 | Eps  0.135
Episode  4500 | WinRate 12.02% | AvgReward    -4.48 | Eps  0.105
Episode  5000 | WinRate 13.46% | AvgReward     7.03 | Eps  0.082
Episode  5500 | WinRate 14.75% | AvgReward    11.87 | Eps  0.064
Episode  6000 | WinRate 15.77% | AvgReward     7.61 | Eps  0.050
Episode  6500 | WinRate 16.92% | AvgReward    21.77 | Eps  0.039
Episode  7000 | WinRate 17.73% | AvgReward    15.54 | Eps  0.030
Episode  7500 | WinRate 18.48% | AvgReward    18.26 | Eps  0

In [5]:
import pickle
import random
import numpy as np
from collections import defaultdict, Counter

# --- Define utility functions and alphabet ---
# These are needed for the pickle files to load correctly
def default_dict_factory():
    return defaultdict(Counter)

def default_dict_dict_factory():
    return defaultdict(dict)

alphabet = list("abcdefghijklmnopqrstuvwxyz")

# --- Define Classes (Must match training) ---
# The class definitions must be present for pickle.load() to work.

class HangmanEnv:
    def __init__(self, words, max_lives=6): # CRITICAL: Must be 6 lives
        self.words = words
        self.max_lives = max_lives
    
    def reset(self):
        if not self.words:
            self.word = ""
            self.guessed = set()
            self.lives = 0
            self.pattern = ""
            return self.pattern
        self.word = random.choice(self.words)
        self.guessed = set()
        self.lives = self.max_lives
        self.pattern = "_" * len(self.word)
        return self.pattern
    
    # Simplified step for evaluation (no rewards needed)
    def step(self, letter):
        if letter in self.guessed:
            return self.pattern, False, None, True # Done, Success, Repeated
        
        self.guessed.add(letter)
        is_wrong = False
        
        if letter in self.word:
            new_pattern = list(self.pattern)
            for i, ch in enumerate(self.word):
                if ch == letter:
                    new_pattern[i] = letter
            self.pattern = "".join(new_pattern)
        else:
            self.lives -= 1
            is_wrong = True

        if "_" not in self.pattern:
            return self.pattern, True, True, False  # Done, Success, Not Repeated
        if self.lives <= 0:
            return self.pattern, True, False, False # Done, Failure, Not Repeated

        return self.pattern, False, None, is_wrong

class QLearningAgent:
    # We only need the structure and the choose_action method for evaluation
    def __init__(self, alpha=0, gamma=0, epsilon=0, epsilon_min=0, epsilon_decay=0):
        self.Q = defaultdict(float)
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        
    def get_state(self, pattern, guessed):
        return (pattern, "".join(sorted(guessed)))

    def _get_hmm_scores(self, pattern, available, initial_probs, transition_probs):
        scores = {a: 0.0 for a in available}
        L = len(pattern)
        for pos, char in enumerate(pattern):
            if char == '_':
                for a in available:
                    if pos == 0:
                        scores[a] += initial_probs[a]
                    if pos > 0 and pattern[pos-1] != '_':
                        scores[a] += transition_probs[pattern[pos-1]][a]
                    if pos < L - 1 and pattern[pos+1] != '_':
                        scores[a] += transition_probs[a][pattern[pos+1]]
        return scores

    def choose_action(self, state, pattern, guessed, initial_probs, transition_probs):
        available = [a for a in alphabet if a not in guessed]
        if not available:
            return None
        
        # Get HMM scores
        hmm_scores = self._get_hmm_scores(pattern, available, initial_probs, transition_probs)
        
        # In evaluation (epsilon=0), we ONLY exploit
        if random.random() < self.epsilon:
            return random.choice(available) 
        
        HMM_BOOST_FACTOR = 1.0 
        scores = {a: self.Q.get((state,a), 0.0) + HMM_BOOST_FACTOR * hmm_scores[a] for a in available}
        
        return max(scores, key=scores.get)

# --- Data Loading ---
try:
    with open('models/bigram_hmm_model.pkl', 'rb') as f:
        hmm_model = pickle.load(f)
    with open('models/rl_agent.pkl', 'rb') as f:
        rl_agent = pickle.load(f)
except FileNotFoundError:
    print("Error: Models not found. Ensure Cell 1 and Cell 2 ran successfully.")
    exit()

with open('test.txt', 'r') as f:
    test_words = [w.strip().lower() for w in f.read().splitlines() if w.strip()]

# !! FIX: Re-hydrate the dicts back into defaultdicts
default_log_prob = hmm_model['default_log_prob']
initial_probs = defaultdict(lambda: default_log_prob, hmm_model['initial_probs'])

transition_probs = defaultdict(lambda: defaultdict(lambda: default_log_prob))
for k, v in hmm_model['transition_probs'].items():
    transition_probs[k] = defaultdict(lambda: default_log_prob, v)

# --- Evaluation Loop ---
print(f"Evaluating Hybrid RL Agent on {len(test_words)} TEST words...")
env_test = HangmanEnv(test_words, max_lives=6) # 6 lives
games_test = len(test_words)
success_count = 0
total_wrong_guesses = 0
total_repeated_guesses = 0

# Set agent to pure exploitation mode
rl_agent.epsilon = 0.0

for i in range(games_test):
    pattern = env_test.reset()
    guessed = set()
    
    while True:
        state = rl_agent.get_state(pattern, guessed)
        action = rl_agent.choose_action(state, pattern, guessed, initial_probs, transition_probs)

        if action is None:
            break

        next_pattern, done, success, is_wrong_or_repeated = env_test.step(action)

        # Scoring logic
        if is_wrong_or_repeated and action in guessed:
             total_repeated_guesses += 1
        elif is_wrong_or_repeated:
             total_wrong_guesses += 1

        guessed.add(action)
        pattern = next_pattern

        if done:
            if success:
                success_count += 1
            break

# --- Calculate Final Score (as per PDF) ---
success_rate = success_count / games_test
final_score = (success_rate * 2000) - (total_wrong_guesses * 5) - (total_repeated_guesses * 2)

# --- Print Results ---
print("\n" + "=" * 60)
print(f"Hybrid HMM+RL Agent Evaluation Results ({games_test} games)")
print("=" * 60)
print(f"  Lives per Game: {env_test.max_lives}")
print("\n--- PERFORMANCE ---")
print(f"  Success Count:    {success_count}/{games_test}")
print(f"  Success Rate:     {success_rate:.2%}")
print(f"  Total Wrong Guesses:  {total_wrong_guesses}")
print(f"  Total Repeated Guesses: {total_repeated_guesses}")
print("\n--- SCORING ---")
print(f"  Success Term: (Success Rate * 2000) = {success_rate * 2000:,.2f}")
print(f"  Wrong Penalty: (Wrong Guesses * 5)   = {total_wrong_guesses * 5:,.2f}")
print(f"  Repeat Penalty: (Repeat Guesses * 2) = {total_repeated_guesses * 2:,.2f}")
print("-" * 60)
print(f"  Final Score: {final_score:,.2f}")
print("=" * 60)

Evaluating Hybrid RL Agent on 2000 TEST words...

Hybrid HMM+RL Agent Evaluation Results (2000 games)
  Lives per Game: 6

--- PERFORMANCE ---
  Success Count:    690/2000
  Success Rate:     34.50%
  Total Wrong Guesses:  9099
  Total Repeated Guesses: 0

--- SCORING ---
  Success Term: (Success Rate * 2000) = 690.00
  Wrong Penalty: (Wrong Guesses * 5)   = 45,495.00
  Repeat Penalty: (Repeat Guesses * 2) = 0.00
------------------------------------------------------------
  Final Score: -44,805.00
