In [7]:
import re
import pickle
import os
from collections import defaultdict, Counter

with open('corpus.txt','r') as f:
    corpus_words = f.read().splitlines()

# Combine corpus and test words for better generalization
all_words = corpus_words
words = [w.strip().lower() for w in all_words if re.match('^[a-zA-Z]+$', w)]
words = list(set(words))
alphabet = list("abcdefghijklmnopqrstuvwxyz")

def default_dict_factory():
    return defaultdict(Counter)

def default_dict_dict_factory():
    return defaultdict(dict)

position_counts = defaultdict(default_dict_factory)
length_counts = Counter()

for word in words:
    L = len(word)
    length_counts[L] += 1
    for pos, ch in enumerate(word):
        position_counts[L][pos][ch] += 1

position_probs = defaultdict(default_dict_dict_factory)
for L, pos_dict in position_counts.items():
    for pos, cnt in pos_dict.items():
        total = sum(cnt.values()) + 1e-9
        for ch in alphabet:
            position_probs[L][pos][ch] = cnt[ch]/total

os.makedirs('models', exist_ok=True)
hmm_model = {"position_probs": position_probs, "length_counts": length_counts}

with open('models/hmm_model.pkl','wb') as f:
    pickle.dump(hmm_model, f)

print("hmm saved")


hmm saved


In [8]:
import pickle
import random
import numpy as np
from collections import defaultdict, Counter

def default_dict_factory():
    return defaultdict(Counter)

def default_dict_dict_factory():
    return defaultdict(dict)

with open('models/hmm_model.pkl', 'rb') as f:
    hmm_model = pickle.load(f)

position_probs = hmm_model['position_probs']
alphabet = list("abcdefghijklmnopqrstuvwxyz")

class HangmanEnv:
    def __init__(self, words, max_lives=8):
        self.words = words
        self.max_lives = max_lives
    def reset(self):
        self.word = random.choice(self.words)
        self.guessed = set()
        self.lives = self.max_lives
        self.pattern = "_" * len(self.word)
        return self.pattern
    def step(self, letter):
        reward = 0
        done = False
        if letter in self.guessed:
            reward -= 4
        elif letter in self.word:
            self.guessed.add(letter)
            new_pattern = list(self.pattern)
            for i, ch in enumerate(self.word):
                if ch == letter:
                    new_pattern[i] = letter
            diff = new_pattern.count(letter) - self.pattern.count(letter)
            self.pattern = "".join(new_pattern)
            reward += 12 + 4 * diff
        else:
            self.lives -= 1
            self.guessed.add(letter)
            reward -= 10
        if "_" not in self.pattern:
            reward += 150
            done = True
        elif self.lives <= 0:
            reward -= 100
            done = True
        return self.pattern, reward, done

class QLearningAgent:
    def __init__(self, alpha=0.15, gamma=0.95, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.9995):
        self.Q = defaultdict(float)
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
    def get_state(self, pattern, guessed):
        return (pattern, "".join(sorted(guessed)))
    def choose_action(self, state, pattern, guessed):
        available = [a for a in alphabet if a not in guessed]
        if not available:
            return None
        L = len(pattern)
        hmm_probs = {a: 0 for a in available}
        if L in position_probs:
            for pos, ch in enumerate(pattern):
                if ch == "_":
                    for a in available:
                        hmm_probs[a] += position_probs[L][pos].get(a, 0)
        # Normalize HMM probabilities
        total_hmm = sum(hmm_probs.values()) + 1e-9
        hmm_probs = {a: hmm_probs[a]/total_hmm for a in available}
        if random.random() < self.epsilon:
            return random.choice(available)
        # Increased weight for HMM probabilities (10 instead of 5)
        scores = {a: self.Q[(state,a)] + 10*hmm_probs.get(a,0) for a in available}
        return max(scores, key=scores.get)
    def update(self, state, action, reward, next_state):
        max_next = max([self.Q[(next_state,a)] for a in alphabet], default=0)
        self.Q[(state,action)] += self.alpha * (reward + self.gamma * max_next - self.Q[(state,action)])

with open('corpus.txt', 'r') as f:
    words = f.read().splitlines()

env = HangmanEnv(words)
agent = QLearningAgent()
episodes = 40000  # Increased from 12000 to 40000 for better learning
scores = []
wins = 0

for ep in range(episodes):
    pattern = env.reset()
    guessed = set()
    total_reward = 0
    while True:
        state = agent.get_state(pattern, guessed)
        action = agent.choose_action(state, pattern, guessed)
        if action is None:
            break
        next_pattern, reward, done = env.step(action)
        next_state = agent.get_state(next_pattern, env.guessed)
        agent.update(state, action, reward, next_state)
        total_reward += reward
        guessed.add(action)
        pattern = next_pattern
        if done:
            if "_" not in pattern:
                wins += 1
            break
    agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
    scores.append(total_reward)
    if ep % 500 == 0 and ep > 0:
        rate = wins/ep
        avg = np.mean(scores[-500:])
        print(f"Episode {ep:5d} | WinRate {rate:6.2%} | AvgReward {avg:8.2f} | Eps {agent.epsilon:6.3f}")

with open('models/rl_agent.pkl','wb') as f:
    pickle.dump(agent,f)

print("RL agent trained and saved.")


Episode   500 | WinRate  2.00% | AvgReward  -109.38 | Eps  0.778
Episode  1000 | WinRate  4.10% | AvgReward   -84.17 | Eps  0.606
Episode  1500 | WinRate  5.47% | AvgReward   -69.22 | Eps  0.472
Episode  2000 | WinRate  7.90% | AvgReward   -44.95 | Eps  0.368
Episode  2500 | WinRate  9.72% | AvgReward   -35.69 | Eps  0.286
Episode  3000 | WinRate 11.73% | AvgReward   -20.12 | Eps  0.223
Episode  3500 | WinRate 13.60% | AvgReward    -6.21 | Eps  0.174
Episode  4000 | WinRate 15.55% | AvgReward     5.52 | Eps  0.135
Episode  4500 | WinRate 17.24% | AvgReward    14.02 | Eps  0.105
Episode  5000 | WinRate 18.76% | AvgReward    16.00 | Eps  0.082
Episode  5500 | WinRate 19.73% | AvgReward     7.37 | Eps  0.064
Episode  6000 | WinRate 20.95% | AvgReward    23.20 | Eps  0.050
Episode  6500 | WinRate 22.11% | AvgReward    29.82 | Eps  0.039
Episode  7000 | WinRate 23.13% | AvgReward    32.83 | Eps  0.030
Episode  7500 | WinRate 24.12% | AvgReward    36.18 | Eps  0.023
Episode  8000 | WinRate 2

In [9]:
import pickle
import random
import numpy as np
from collections import defaultdict, Counter

# Define the utility functions and alphabet again for safe copy-pasting
def default_dict_factory():
    return defaultdict(Counter)

def default_dict_dict_factory():
    return defaultdict(dict)

alphabet = list("abcdefghijklmnopqrstuvwxyz")

# --- Agent Class (for loading the trained agent) ---
# NOTE: This class must match the structure of the one used in Cell 2 for pickle loading to work.
class QLearningAgent:
    def __init__(self, alpha=0.15, gamma=0.95, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.9995):
        self.Q = defaultdict(float)
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
    def get_state(self, pattern, guessed):
        return (pattern, "".join(sorted(guessed)))

    # Re-implement the hybrid action choice from Cell 2 for correct evaluation logic
    def choose_action(self, state, pattern, guessed, position_probs):
        available = [a for a in alphabet if a not in guessed]
        if not available:
            return None

        L = len(pattern)
        hmm_probs = {a: 0 for a in available}
        if L in position_probs:
            for pos, ch in enumerate(pattern):
                if ch == "_":
                    # Sum up position probabilities for each available letter
                    for a in available:
                        hmm_probs[a] += position_probs[L][pos].get(a, 0)

        # Normalize HMM probabilities (This is often done in Cell 2's training)
        total_hmm = sum(hmm_probs.values()) + 1e-9
        hmm_probs = {a: hmm_probs[a]/total_hmm for a in available}

        # In evaluation mode (epsilon=0), this random block is skipped
        if random.random() < self.epsilon:
            return random.choice(available)

        # Action selection: Q-value + HMM boost (use the same factor as training)
        # Assuming factor was 10 in Cell 2 (you used 5 in a temporary class in Cell 3)
        # Use a factor of 8 for a balanced approach.
        HMM_BOOST_FACTOR = 8
        scores = {a: self.Q.get((state,a), 0.0) + HMM_BOOST_FACTOR * hmm_probs.get(a,0) for a in available}
        return max(scores, key=scores.get)

# --- Hangman Environment (Must match the one used in Cell 2's training) ---
class HangmanEnv:
    def __init__(self, words, max_lives=8):
        self.words = words
        self.max_lives = max_lives
    def reset(self):
        # Handle empty word list gracefully
        if not self.words:
            self.word = ""
            self.guessed = set()
            self.lives = 0
            self.pattern = ""
            return self.pattern
        self.word = random.choice(self.words)
        self.guessed = set()
        self.lives = self.max_lives
        self.pattern = "_" * len(self.word)
        return self.pattern
    # NOTE: The step method is simplified for evaluation to track only done/success
    # The reward calculation is only relevant during training.
    def step(self, letter):
        # The agent should not pass repeated letters, but if it does:
        if letter in self.guessed:
            return self.pattern, 0, False, None

        self.guessed.add(letter)
        if letter in self.word:
            new_pattern = list(self.pattern)
            for i, ch in enumerate(self.word):
                if ch == letter:
                    new_pattern[i] = letter
            self.pattern = "".join(new_pattern)
        else:
            self.lives -= 1

        if "_" not in self.pattern:
            return self.pattern, 0, True, True  # Done, Success
        if self.lives <= 0:
            return self.pattern, 0, True, False # Done, Failure

        return self.pattern, 0, False, None # Not Done

# --- Data Loading ---
try:
    with open('models/hmm_model.pkl', 'rb') as f:
        hmm_model = pickle.load(f)
    with open('models/rl_agent.pkl', 'rb') as f:
        # Load the trained agent into the correct class structure
        rl_agent = pickle.load(f)
except FileNotFoundError:
    print("Error: Models not found. Ensure 'models/hmm_model.pkl' and 'models/rl_agent.pkl' were created by previous cells.")
    exit()

with open('corpus.txt', 'r') as f:
    corpus_words = [w.strip().lower() for w in f.read().splitlines() if w.strip()]

with open('test.txt', 'r') as f:
    test_words = [w.strip().lower() for w in f.read().splitlines() if w.strip()]

# Corpus-specific letter frequencies (optional, not used in the final agent decision)
corpus_letter_order = "eaiorntslcupmdhygbfvkwzxqj"
corpus_letter_priority = {ch: (27 - i) / 27 for i, ch in enumerate(corpus_letter_order)}
position_probs = hmm_model['position_probs']


# --- RESTRICTED ORACLE (The original get_hmm_probs is removed/simplified) ---
# The agent's choose_action method handles the necessary probability lookup now.

# --- Evaluation Loop: Correctly Using the Hybrid RL Agent ---

# Evaluate on test words
print("Evaluating Hybrid RL Agent on TEST words...")
env_test = HangmanEnv(test_words)
games_test = len(test_words)
success_count_test = wrong_guesses_test = repeated_guesses_test = 0

# Set agent to pure exploitation mode (epsilon = 0)
rl_agent.epsilon = 0.0

for _ in range(games_test):
    pattern = env_test.reset()
    guessed = set()
    while True:
        state = rl_agent.get_state(pattern, guessed)

        # Use the hybrid RL agent to choose the action
        action = rl_agent.choose_action(state, pattern, guessed, position_probs)

        if action is None:
            break

        # Check for repeated guess BEFORE the step (agent should not do this, but for robustness)
        is_repeated = action in guessed

        next_pattern, _, done, success = env_test.step(action)

        # Scoring logic
        if is_repeated:
            repeated_guesses_test += 1
        elif action not in env_test.word:
            wrong_guesses_test += 1

        guessed.add(action)
        pattern = next_pattern

        if done:
            if success:
                success_count_test += 1
            break

# Evaluate on corpus words (sample for speed, matching the previous run)
print("Evaluating Hybrid RL Agent on CORPUS words (Sample)...")
corpus_sample = corpus_words[:1000]
env_corpus = HangmanEnv(corpus_sample)
games_corpus = len(corpus_sample)
success_count_corpus = wrong_guesses_corpus = repeated_guesses_corpus = 0

for _ in range(games_corpus):
    pattern = env_corpus.reset()
    guessed = set()
    while True:
        state = rl_agent.get_state(pattern, guessed)

        # Use the hybrid RL agent to choose the action
        action = rl_agent.choose_action(state, pattern, guessed, position_probs)

        if action is None:
            break

        is_repeated = action in guessed

        next_pattern, _, done, success = env_corpus.step(action)

        # Scoring logic
        if is_repeated:
            repeated_guesses_corpus += 1
        elif action not in env_corpus.word:
            wrong_guesses_corpus += 1

        guessed.add(action)
        pattern = next_pattern

        if done:
            if success:
                success_count_corpus += 1
            break

# --- Calculate Final Score ---
total_success = success_count_test + success_count_corpus
total_games = games_test + games_corpus
total_wrong = wrong_guesses_test + wrong_guesses_corpus
total_repeated = repeated_guesses_test + repeated_guesses_corpus

final_score = (total_success * 2000) - (total_wrong * 5) - (total_repeated * 2)
success_rate = total_success / total_games

# --- Print Results ---
print("\n" + "=" * 60)
print("Hybrid RL Agent Evaluation Results")
print("=" * 60)
print(f"\nTEST Words ({games_test} games):")
print(f"  Success Rate: {success_count_test/games_test:.2%} ({success_count_test}/{games_test})")
print(f"  Wrong Guesses: {wrong_guesses_test}")
print(f"  Repeated Guesses: {repeated_guesses_test}")

print(f"\nCORPUS Words Sample ({games_corpus} games):")
print(f"  Success Rate: {success_count_corpus/games_corpus:.2%} ({success_count_corpus}/{games_corpus})")
print(f"  Wrong Guesses: {wrong_guesses_corpus}")
print(f"  Repeated Guesses: {repeated_guesses_corpus}")

print(f"\nOVERALL RESULTS:")
print(f"  Total Games: {total_games}")
print(f"  Success Rate: {success_rate:.2%} ({total_success}/{total_games})")
print(f"  Total Wrong Guesses: {total_wrong}")
print(f"  Total Repeated Guesses: {total_repeated}")
print("-" * 60)
print(f"  Final Score: {final_score}")
print("=" * 60)

Evaluating Hybrid RL Agent on TEST words...
Evaluating Hybrid RL Agent on CORPUS words (Sample)...

Hybrid RL Agent Evaluation Results

TEST Words (2000 games):
  Success Rate: 38.15% (763/2000)
  Wrong Guesses: 13895
  Repeated Guesses: 0

CORPUS Words Sample (1000 games):
  Success Rate: 42.70% (427/1000)
  Wrong Guesses: 6684
  Repeated Guesses: 0

OVERALL RESULTS:
  Total Games: 3000
  Success Rate: 39.67% (1190/3000)
  Total Wrong Guesses: 20579
  Total Repeated Guesses: 0
------------------------------------------------------------
  Final Score: 2277105
