In [1]:
import numpy as np
import pickle
from collections import defaultdict, Counter
import re

class HangmanHMM:
    """
    Hidden Markov Model for Hangman letter prediction.
    Trains separate models for each word length.
    """
    
    def __init__(self):
        self.models = {}  # Dictionary of models by word length
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
        self.letter_to_idx = {letter: idx for idx, letter in enumerate(self.alphabet)}
        self.idx_to_letter = {idx: letter for idx, letter in enumerate(self.alphabet)}
        
    def train(self, corpus_file):
        """Train HMM on corpus file."""
        print("Loading corpus...")
        with open(corpus_file, 'r') as f:
            words = [line.strip().lower() for line in f if line.strip()]
        
        # Filter only alphabetic words
        words = [w for w in words if w.isalpha()]
        
        print(f"Loaded {len(words)} words")
        
        # Group words by length
        words_by_length = defaultdict(list)
        for word in words:
            words_by_length[len(word)].append(word)
        
        print(f"Training models for {len(words_by_length)} different word lengths...")
        
        # Train a model for each word length
        for length, word_list in words_by_length.items():
            if length < 2:  # Skip very short words
                continue
            print(f"Training length {length}: {len(word_list)} words")
            self.models[length] = self._train_length_model(word_list, length)
        
        print("Training complete!")
        
    def _train_length_model(self, words, length):
        """Train HMM for specific word length."""
        model = {
            'length': length,
            'position_probs': np.zeros((length, 26)),  # P(letter | position)
            'transition_probs': np.zeros((26, 26)),     # P(letter_t | letter_t-1)
            'initial_probs': np.zeros(26),              # P(first letter)
            'word_count': len(words)
        }
        
        # Count letter occurrences at each position
        position_counts = np.zeros((length, 26))
        
        for word in words:
            # Initial letter
            if len(word) > 0:
                model['initial_probs'][self.letter_to_idx[word[0]]] += 1
            
            # Position-specific counts
            for pos, letter in enumerate(word):
                if letter in self.letter_to_idx:
                    position_counts[pos][self.letter_to_idx[letter]] += 1
            
            # Transition counts (bigrams)
            for i in range(len(word) - 1):
                curr_letter = word[i]
                next_letter = word[i + 1]
                if curr_letter in self.letter_to_idx and next_letter in self.letter_to_idx:
                    curr_idx = self.letter_to_idx[curr_letter]
                    next_idx = self.letter_to_idx[next_letter]
                    model['transition_probs'][curr_idx][next_idx] += 1
        
        # Normalize to get probabilities
        # Position probabilities with smoothing
        for pos in range(length):
            total = position_counts[pos].sum() + 26  # Laplace smoothing
            model['position_probs'][pos] = (position_counts[pos] + 1) / total
        
        # Initial probabilities with smoothing
        total = model['initial_probs'].sum() + 26
        model['initial_probs'] = (model['initial_probs'] + 1) / total
        
        # Transition probabilities with smoothing
        for i in range(26):
            total = model['transition_probs'][i].sum() + 26
            model['transition_probs'][i] = (model['transition_probs'][i] + 1) / total
        
        return model
    
    def get_letter_probabilities(self, masked_word, guessed_letters):
        """
        Get probability distribution over letters given current game state.
        
        Args:
            masked_word: str, e.g., "_ppl_"
            guessed_letters: set of already guessed letters
            
        Returns:
            numpy array of probabilities for each letter (26 dimensions)
        """
        length = len(masked_word)
        
        if length not in self.models:
            # Fallback to general letter frequency
            return self._get_frequency_based_probs(guessed_letters)
        
        model = self.models[length]
        letter_scores = np.zeros(26)
        
        # Aggregate probabilities from each blank position
        for pos, char in enumerate(masked_word):
            if char == '_':
                # Add position-based probability
                letter_scores += model['position_probs'][pos]
                
                # Consider transitions from known adjacent letters
                if pos > 0 and masked_word[pos - 1] != '_':
                    prev_letter = masked_word[pos - 1]
                    prev_idx = self.letter_to_idx[prev_letter]
                    letter_scores += model['transition_probs'][prev_idx] * 0.5
                
                if pos < length - 1 and masked_word[pos + 1] != '_':
                    # Use reverse transition as approximation
                    next_letter = masked_word[pos + 1]
                    next_idx = self.letter_to_idx[next_letter]
                    for i in range(26):
                        letter_scores[i] += model['transition_probs'][i][next_idx] * 0.3
        
        # Zero out already guessed letters
        for letter in guessed_letters:
            if letter in self.letter_to_idx:
                letter_scores[self.letter_to_idx[letter]] = 0
        
        # Normalize
        if letter_scores.sum() > 0:
            letter_scores = letter_scores / letter_scores.sum()
        else:
            # Uniform distribution over unguessed letters
            letter_scores = np.ones(26)
            for letter in guessed_letters:
                if letter in self.letter_to_idx:
                    letter_scores[self.letter_to_idx[letter]] = 0
            if letter_scores.sum() > 0:
                letter_scores = letter_scores / letter_scores.sum()
        
        return letter_scores
    
    def _get_frequency_based_probs(self, guessed_letters):
        """Fallback: general English letter frequency."""
        freq = np.array([
            0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015,
            0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749,
            0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758,
            0.00978, 0.02360, 0.00150, 0.01974, 0.00074
        ])
        
        for letter in guessed_letters:
            if letter in self.letter_to_idx:
                freq[self.letter_to_idx[letter]] = 0
        
        if freq.sum() > 0:
            freq = freq / freq.sum()
        
        return freq
    
    def save(self, filename):
        """Save trained model to file."""
        with open(filename, 'wb') as f:
            pickle.dump(self.models, f)
        print(f"Model saved to {filename}")
    
    def load(self, filename):
        """Load trained model from file."""
        with open(filename, 'rb') as f:
            self.models = pickle.load(f)
        print(f"Model loaded from {filename}")


# Training script
if __name__ == "__main__":
    print("=" * 50)
    print("Hangman HMM Training")
    print("=" * 50)
    
    # Initialize and train HMM
    hmm = HangmanHMM()
    hmm.train('corpus.txt')
    
    # Save the trained model
    hmm.save('hangman_hmm_model.pkl')
    
    # Test the model
    print("\n" + "=" * 50)
    print("Testing HMM")
    print("=" * 50)
    
    test_cases = [
        ("_ppl_", set(['e', 's'])),
        ("h_ll_", set(['e'])),
        ("_____", set()),
        ("pro___m", set(['a', 'e', 'i']))
    ]
    
    for masked_word, guessed in test_cases:
        probs = hmm.get_letter_probabilities(masked_word, guessed)
        
        # Get top 5 letters
        top_indices = np.argsort(probs)[-5:][::-1]
        top_letters = [(hmm.idx_to_letter[idx], probs[idx]) for idx in top_indices]
        
        print(f"\nMasked word: {masked_word}")
        print(f"Guessed: {guessed}")
        print(f"Top 5 predictions: {[(l, f'{p:.4f}') for l, p in top_letters]}")

Hangman HMM Training
Loading corpus...
Loaded 49979 words
Training models for 24 different word lengths...
Training length 11: 5452 words
Training length 6: 3755 words
Training length 9: 6787 words
Training length 16: 698 words
Training length 14: 2019 words
Training length 10: 6465 words
Training length 8: 6348 words
Training length 12: 4292 words
Training length 13: 3094 words
Training length 5: 2340 words
Training length 18: 174 words
Training length 4: 1169 words
Training length 3: 388 words
Training length 7: 5111 words
Training length 15: 1226 words
Training length 17: 375 words
Training length 22: 8 words
Training length 19: 88 words
Training length 2: 84 words
Training length 20: 40 words
Training length 21: 16 words
Training length 23: 3 words
Training length 24: 1 words
Training complete!
Model saved to hangman_hmm_model.pkl

Testing HMM

Masked word: _ppl_
Guessed: {'e', 's'}
Top 5 predictions: [('a', '0.1192'), ('y', '0.0804'), ('o', '0.0784'), ('l', '0.0713'), ('t', '0.062

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque, defaultdict
import pickle

class DQN(nn.Module):
    """Deep Q-Network for Hangman."""
    
    def __init__(self, state_size=55, action_size=26):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, action_size)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        return self.fc4(x)


class ReplayMemory:
    """Experience replay buffer."""
    
    def __init__(self, capacity=10000):
        self.memory = deque(maxlen=capacity)
    
    def push(self, state, action, reward, next_state, done):
        """Save a transition."""
        self.memory.append((state, action, reward, next_state, done))
    
    def sample(self, batch_size):
        """Sample a batch of transitions."""
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        return len(self.memory)


class HangmanDQNAgent:
    """DQN Agent for Hangman."""
    
    def __init__(self, state_size=55, action_size=26, learning_rate=0.0005,
                 gamma=0.95, epsilon=1.0, epsilon_min=0.05, epsilon_decay=0.9995,
                 memory_size=10000, batch_size=64):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.batch_size = batch_size
        
        # Main network
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = DQN(state_size, action_size).to(self.device)
        
        # Target network
        self.target_model = DQN(state_size, action_size).to(self.device)
        self.update_target_model()
        
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.criterion = nn.MSELoss()
        
        # Experience replay
        self.memory = ReplayMemory(memory_size)
        
        self.steps = 0
        self.target_update_freq = 100
        
    def update_target_model(self):
        """Copy weights from main model to target model."""
        self.target_model.load_state_dict(self.model.state_dict())
    
    def act(self, state, valid_actions, use_hmm_probs=True):
        """
        Choose action using epsilon-greedy policy.
        
        Args:
            state: Current game state (dict with 'hmm_probs' and other info)
            valid_actions: List of valid action letters
            use_hmm_probs: Whether to combine Q-values with HMM probabilities
            
        Returns:
            action: Letter to guess
        """
        # Epsilon-greedy exploration
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        # Get state vector for neural network
        # Need to create state vector from state dict
        state_vector = self._state_to_vector(state)
        
        # Get Q-values from model
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state_vector).unsqueeze(0).to(self.device)
            q_values = self.model(state_tensor).cpu().numpy()[0]
        
        # Combine with HMM probabilities if available
        if use_hmm_probs and 'hmm_probs' in state:
            hmm_probs = state['hmm_probs']
            # Normalize Q-values
            q_norm = (q_values - q_values.min()) / (q_values.max() - q_values.min() + 1e-8)
            # Weighted combination
            combined = 0.7 * q_norm + 0.3 * hmm_probs * 100
        else:
            combined = q_values
        
        # Mask invalid actions
        valid_indices = [ord(a) - ord('a') for a in valid_actions]
        masked_values = np.full(26, -np.inf)
        masked_values[valid_indices] = combined[valid_indices]
        
        # Choose best action
        action_idx = np.argmax(masked_values)
        return chr(action_idx + ord('a'))
    
    def _state_to_vector(self, state):
        """Convert state dict to vector for neural network."""
        # Guessed letters (26 binary features)
        guessed_vec = np.zeros(26)
        for letter in state['guessed_letters']:
            idx = ord(letter) - ord('a')
            guessed_vec[idx] = 1
        
        # Lives remaining (normalized)
        lives_vec = np.array([state['lives_remaining'] / 6.0])
        
        # HMM probabilities (26 features)
        hmm_vec = state.get('hmm_probs', np.zeros(26))
        
        # Word length (normalized)
        length_vec = np.array([state['word_length'] / 20.0])
        
        # Number of blanks (normalized)
        blanks_vec = np.array([state['num_blanks'] / state['word_length']])
        
        # Concatenate
        state_vector = np.concatenate([
            guessed_vec,
            lives_vec,
            hmm_vec,
            length_vec,
            blanks_vec
        ])
        
        return state_vector
    
    def remember(self, state, action, reward, next_state, done):
        """Store experience in replay memory."""
        state_vec = self._state_to_vector(state)
        next_state_vec = self._state_to_vector(next_state)
        action_idx = ord(action) - ord('a')
        self.memory.push(state_vec, action_idx, reward, next_state_vec, done)
    
    def replay(self):
        """Train on batch from replay memory."""
        if len(self.memory) < self.batch_size:
            return 0.0
        
        # Sample batch
        batch = self.memory.sample(self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        
        # Convert to tensors
        states = torch.FloatTensor(np.array(states)).to(self.device)
        actions = torch.LongTensor(actions).unsqueeze(1).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)
        
        # Current Q-values
        current_q = self.model(states).gather(1, actions).squeeze()
        
        # Target Q-values
        with torch.no_grad():
            next_q = self.target_model(next_states).max(1)[0]
            target_q = rewards + (1 - dones) * self.gamma * next_q
        
        # Compute loss
        loss = self.criterion(current_q, target_q)
        
        # Optimize
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        # Update target network periodically
        self.steps += 1
        if self.steps % self.target_update_freq == 0:
            self.update_target_model()
        
        return loss.item()
    
    def decay_epsilon(self):
        """Decay exploration rate."""
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
    
    def save(self, filename):
        """Save model to file."""
        checkpoint = {
            'model_state_dict': self.model.state_dict(),
            'target_model_state_dict': self.target_model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'epsilon': self.epsilon,
            'steps': self.steps
        }
        torch.save(checkpoint, filename)
        print(f"DQN model saved to {filename}")
    
    def load(self, filename):
        """Load model from file."""
        checkpoint = torch.load(filename, map_location=self.device)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.target_model.load_state_dict(checkpoint['target_model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.epsilon = checkpoint.get('epsilon', self.epsilon_min)
        self.steps = checkpoint.get('steps', 0)
        print(f"DQN model loaded from {filename}")


class SimpleQLearningAgent:
    """Table-based Q-Learning Agent for Hangman."""
    
    def __init__(self, alpha=0.1, gamma=0.95, epsilon=1.0, 
                 epsilon_min=0.05, epsilon_decay=0.9995):
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        
        # Q-table: dict of (state_key, action) -> Q-value
        self.q_table = defaultdict(lambda: defaultdict(float))
        
        self.action_space = 'abcdefghijklmnopqrstuvwxyz'
    
    def _get_state_key(self, state):
        """Convert state to hashable key for Q-table."""
        masked_word = state['masked_word']
        guessed = ''.join(sorted(state['guessed_letters']))
        lives = state['lives_remaining']
        return f"{masked_word}:{guessed}:{lives}"
    
    def act(self, state, valid_actions, use_hmm_probs=True):
        """Choose action using epsilon-greedy policy."""
        state_key = self._get_state_key(state)
        
        # Epsilon-greedy exploration
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        # Get Q-values for valid actions
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        
        # Combine with HMM probabilities if available
        if use_hmm_probs and 'hmm_probs' in state:
            hmm_probs = state['hmm_probs']
            for action in valid_actions:
                action_idx = ord(action) - ord('a')
                hmm_score = hmm_probs[action_idx] * 100
                q_values[action] = 0.7 * q_values[action] + 0.3 * hmm_score
        
        # Choose best action
        return max(q_values, key=q_values.get)
    
    def learn(self, state, action, reward, next_state, done):
        """Update Q-value for state-action pair."""
        state_key = self._get_state_key(state)
        next_state_key = self._get_state_key(next_state)
        
        # Current Q-value
        current_q = self.q_table[state_key][action]
        
        # Maximum Q-value for next state
        if done:
            max_next_q = 0
        else:
            next_state_dict = self.q_table[next_state_key]
            max_next_q = max(next_state_dict.values()) if next_state_dict else 0
        
        # Q-learning update
        new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state_key][action] = new_q
    
    def decay_epsilon(self):
        """Decay exploration rate."""
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
    
    def save(self, filename):
        """Save Q-table to file."""
        data = {
            'q_table': dict(self.q_table),
            'epsilon': self.epsilon,
            'alpha': self.alpha,
            'gamma': self.gamma
        }
        with open(filename, 'wb') as f:
            pickle.dump(data, f)
        print(f"Q-Learning model saved to {filename}")
    
    def load(self, filename):
        """Load Q-table from file."""
        with open(filename, 'rb') as f:
            data = pickle.load(f)
        
        # Convert back to defaultdict
        self.q_table = defaultdict(lambda: defaultdict(float))
        for state_key, actions in data['q_table'].items():
            for action, q_value in actions.items():
                self.q_table[state_key][action] = q_value
        
        self.epsilon = data.get('epsilon', self.epsilon_min)
        self.alpha = data.get('alpha', self.alpha)
        self.gamma = data.get('gamma', self.gamma)
        print(f"Q-Learning model loaded from {filename}")


# Test the agents
if __name__ == "__main__":
    print("Testing DQN Agent...")
    dqn_agent = HangmanDQNAgent()
    
    # Create dummy state
    test_state = {
        'masked_word': '_pp__',
        'guessed_letters': set(['e', 's']),
        'lives_remaining': 5,
        'hmm_probs': np.random.rand(26),
        'word_length': 5,
        'num_blanks': 3
    }
    
    valid_actions = ['a', 'b', 'c', 'd', 'l', 'o']
    action = dqn_agent.act(test_state, valid_actions)
    print(f"DQN chose action: {action}")
    
    print("\nTesting Q-Learning Agent...")
    ql_agent = SimpleQLearningAgent()
    action = ql_agent.act(test_state, valid_actions)
    print(f"Q-Learning chose action: {action}")
    
    print("\nAgents initialized successfully!")

Testing DQN Agent...
DQN chose action: l

Testing Q-Learning Agent...
Q-Learning chose action: c

Agents initialized successfully!


In [None]:
import numpy as np
import random
from collections import defaultdict

class HangmanEnvironment:
    """
    Hangman game environment for Reinforcement Learning.
    """
    
    def __init__(self, word_list, hmm_model, max_lives=6):
        """
        Args:
            word_list: List of words to use for games
            hmm_model: Trained HMM model for probability predictions
            max_lives: Maximum number of wrong guesses allowed
        """
        self.word_list = word_list
        self.hmm = hmm_model
        self.max_lives = max_lives
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
        
        # Current game state
        self.target_word = None
        self.masked_word = None
        self.guessed_letters = None
        self.lives_remaining = None
        self.wrong_guesses = 0
        self.repeated_guesses = 0
        
    def reset(self):
        """Start a new game with a random word."""
        self.target_word = random.choice(self.word_list).lower()
        self.masked_word = ['_'] * len(self.target_word)
        self.guessed_letters = set()
        self.lives_remaining = self.max_lives
        self.wrong_guesses = 0
        self.repeated_guesses = 0
        
        return self._get_state()
    
    def _get_state(self):
        """
        Get current state representation.
        
        Returns dictionary containing:
        - masked_word: string representation
        - guessed_letters: set of guessed letters
        - lives_remaining: int
        - hmm_probs: probability distribution from HMM
        - word_length: int
        """
        masked_str = ''.join(self.masked_word)
        hmm_probs = self.hmm.get_letter_probabilities(masked_str, self.guessed_letters)
        
        state = {
            'masked_word': masked_str,
            'guessed_letters': self.guessed_letters.copy(),
            'lives_remaining': self.lives_remaining,
            'hmm_probs': hmm_probs,
            'word_length': len(self.target_word),
            'num_blanks': masked_str.count('_')
        }
        
        return state
    
    def get_state_vector(self):
        """
        Convert state to fixed-size vector for neural network input.
        
        Returns:
            numpy array suitable for DQN
        """
        state = self._get_state()
        
        # Components:
        # 1. Guessed letters (26 binary features)
        guessed_vec = np.zeros(26)
        for letter in state['guessed_letters']:
            idx = ord(letter) - ord('a')
            guessed_vec[idx] = 1
        
        # 2. Lives remaining (normalized)
        lives_vec = np.array([state['lives_remaining'] / self.max_lives])
        
        # 3. HMM probabilities (26 features)
        hmm_vec = state['hmm_probs']
        
        # 4. Word length (normalized)
        length_vec = np.array([state['word_length'] / 20.0])  # Assuming max length ~20
        
        # 5. Number of blanks (normalized)
        blanks_vec = np.array([state['num_blanks'] / state['word_length']])
        
        # Concatenate all features
        state_vector = np.concatenate([
            guessed_vec,    # 26
            lives_vec,      # 1
            hmm_vec,        # 26
            length_vec,     # 1
            blanks_vec      # 1
        ])  # Total: 55 features
        
        return state_vector
    
    def step(self, action):
        """
        Take an action (guess a letter).
        
        Args:
            action: letter to guess (string or int index)
            
        Returns:
            next_state, reward, done, info
        """
        # Convert action to letter if it's an index
        if isinstance(action, int):
            letter = chr(action + ord('a'))
        else:
            letter = action.lower()
        
        # Check if letter was already guessed
        if letter in self.guessed_letters:
            self.repeated_guesses += 1
            reward = -50  # Heavy penalty for repeated guess
            done = False
            info = {'repeated': True, 'wrong': False}
            return self._get_state(), reward, done, info
        
        # Add to guessed letters
        self.guessed_letters.add(letter)
        
        # Check if letter is in the word
        if letter in self.target_word:
            # Correct guess - reveal letters
            count = 0
            for i, char in enumerate(self.target_word):
                if char == letter:
                    self.masked_word[i] = letter
                    count += 1
            
            # Check if word is complete
            if '_' not in self.masked_word:
                reward = 100 + (self.lives_remaining * 10)  # Win bonus + life bonus
                done = True
                info = {'won': True, 'wrong': False, 'letters_revealed': count}
            else:
                reward = 10 * count  # Reward proportional to letters revealed
                done = False
                info = {'won': False, 'wrong': False, 'letters_revealed': count}
        else:
            # Wrong guess
            self.lives_remaining -= 1
            self.wrong_guesses += 1
            
            if self.lives_remaining <= 0:
                reward = -100  # Loss penalty
                done = True
                info = {'won': False, 'wrong': True, 'lost': True}
            else:
                reward = -20  # Wrong guess penalty
                done = False
                info = {'won': False, 'wrong': True, 'lost': False}
        
        next_state = self._get_state()
        return next_state, reward, done, info
    
    def get_valid_actions(self):
        """Get list of letters that haven't been guessed yet."""
        valid = []
        for letter in self.alphabet:
            if letter not in self.guessed_letters:
                valid.append(letter)
        return valid
    
    def get_valid_action_indices(self):
        """Get indices of valid actions (for neural network output)."""
        valid = []
        for i, letter in enumerate(self.alphabet):
            if letter not in self.guessed_letters:
                valid.append(i)
        return valid
    
    def render(self):
        """Print current game state."""
        print(f"Word: {' '.join(self.masked_word)}")
        print(f"Guessed: {sorted(self.guessed_letters)}")
        print(f"Lives: {self.lives_remaining}/{self.max_lives}")
        print(f"Wrong guesses: {self.wrong_guesses}")


def load_corpus(filename):
    """Load word list from corpus file."""
    with open(filename, 'r') as f:
        words = [line.strip().lower() for line in f if line.strip()]
    
    # Filter only alphabetic words
    words = [w for w in words if w.isalpha() and len(w) >= 2]
    return words


# Test the environment
if __name__ == "__main__":
    from hangman_hmm import HangmanHMM
    
    print("Loading HMM model...")
    hmm = HangmanHMM()
    hmm.load('hangman_hmm_model.pkl')
    
    print("Loading corpus...")
    words = load_corpus('corpus.txt')
    print(f"Loaded {len(words)} words")
    
    print("\nTesting environment...")
    env = HangmanEnvironment(words, hmm)
    
    # Play a test game
    state = env.reset()
    print(f"\nTarget word (hidden): {env.target_word}")
    env.render()
    
    done = False
    while not done:
        # Get HMM recommendation
        hmm_probs = state['hmm_probs']
        valid_actions = env.get_valid_actions()
        
        # Choose letter with highest probability
        best_idx = np.argmax(hmm_probs)
        guess = chr(best_idx + ord('a'))
        
        # Make sure it's valid
        if guess not in valid_actions:
            guess = random.choice(valid_actions)
        
        print(f"\nGuessing: {guess}")
        state, reward, done, info = env.step(guess)
        print(f"Reward: {reward}, Info: {info}")
        env.render()
    
    if info.get('won'):
        print("\nüéâ Won the game!")
    else:
        print(f"\n‚ùå Lost! Word was: {env.target_word}")

In [6]:
"""
Training script for Hangman RL Agent.
Supports both DQN and Q-Learning agents.
"""

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import json
from datetime import datetime
from hangman_hmm import HangmanHMM
from hangman_environment import HangmanEnvironment, load_corpus
from hangman_agent import HangmanDQNAgent, SimpleQLearningAgent

def smooth_curve(values, window=100):
    """Smooth curve using moving average."""
    if len(values) < window:
        return values
    smoothed = []
    for i in range(len(values)):
        start = max(0, i - window // 2)
        end = min(len(values), i + window // 2)
        smoothed.append(np.mean(values[start:end]))
    return smoothed

def plot_training_results(history, filename='training_results.png'):
    """Plot training metrics."""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    episodes = range(len(history['rewards']))
    
    # 1. Episode Rewards (smoothed)
    axes[0, 0].plot(episodes, history['rewards'], alpha=0.3, label='Raw')
    smoothed = smooth_curve(history['rewards'])
    axes[0, 0].plot(episodes, smoothed, label='Smoothed', linewidth=2)
    axes[0, 0].set_xlabel('Episode')
    axes[0, 0].set_ylabel('Total Reward')
    axes[0, 0].set_title('Episode Rewards Over Time')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Win Rate
    win_rate_episodes = range(0, len(history['win_rates']) * 500, 500)
    axes[0, 1].plot(win_rate_episodes, history['win_rates'], 
                    marker='o', linewidth=2, markersize=4)
    axes[0, 1].set_xlabel('Episode')
    axes[0, 1].set_ylabel('Win Rate (%)')
    axes[0, 1].set_title('Win Rate Over Time (per 500 episodes)')
    axes[0, 1].grid(True, alpha=0.3)
    axes[0, 1].set_ylim([0, 100])
    
    # 3. Wrong Guesses
    axes[1, 0].plot(win_rate_episodes, history['avg_wrong_guesses'], 
                    marker='s', color='orange', linewidth=2, markersize=4)
    axes[1, 0].set_xlabel('Episode')
    axes[1, 0].set_ylabel('Avg Wrong Guesses')
    axes[1, 0].set_title('Wrong Guesses Over Time (per 500 episodes)')
    axes[1, 0].grid(True, alpha=0.3)
    
    # 4. Repeated Guesses
    axes[1, 1].plot(win_rate_episodes, history['avg_repeated_guesses'], 
                    marker='^', color='red', linewidth=2, markersize=4)
    axes[1, 1].set_xlabel('Episode')
    axes[1, 1].set_ylabel('Avg Repeated Guesses')
    axes[1, 1].set_title('Repeated Guesses Over Time (per 500 episodes)')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(filename, dpi=150, bbox_inches='tight')
    print(f"\nTraining plots saved to {filename}")
    plt.close()

def train_dqn(env, agent, num_episodes=5000, save_freq=1000):
    """Train DQN agent."""
    print("\n" + "="*60)
    print("Training DQN Agent")
    print("="*60)
    
    history = {
        'rewards': [],
        'losses': [],
        'win_rates': [],
        'avg_wrong_guesses': [],
        'avg_repeated_guesses': [],
        'epsilons': []
    }
    
    # Tracking variables
    episode_wins = []
    episode_wrong = []
    episode_repeated = []
    
    for episode in tqdm(range(num_episodes), desc="Training"):
        state = env.reset()
        done = False
        total_reward = 0
        losses = []
        
        while not done:
            # Choose action
            valid_actions = env.get_valid_actions()
            action = agent.act(state, valid_actions, use_hmm_probs=True)
            
            # Take action
            next_state, reward, done, info = env.step(action)
            total_reward += reward
            
            # Store experience
            agent.remember(state, action, reward, next_state, done)
            
            # Train on batch
            if len(agent.memory) > agent.batch_size:
                loss = agent.replay()
                losses.append(loss)
            
            state = next_state
        
        # Decay epsilon
        agent.decay_epsilon()
        
        # Record episode stats
        history['rewards'].append(total_reward)
        history['losses'].extend(losses)
        history['epsilons'].append(agent.epsilon)
        
        episode_wins.append(1 if info.get('won') else 0)
        episode_wrong.append(env.wrong_guesses)
        episode_repeated.append(env.repeated_guesses)
        
        # Log progress every 500 episodes
        if (episode + 1) % 500 == 0:
            win_rate = np.mean(episode_wins[-500:]) * 100
            avg_wrong = np.mean(episode_wrong[-500:])
            avg_repeated = np.mean(episode_repeated[-500:])
            avg_reward = np.mean(history['rewards'][-500:])
            
            history['win_rates'].append(win_rate)
            history['avg_wrong_guesses'].append(avg_wrong)
            history['avg_repeated_guesses'].append(avg_repeated)
            
            print(f"\nEpisode {episode + 1}/{num_episodes}")
            print(f"  Win rate (last 500): {win_rate:.2f}%")
            print(f"  Avg wrong guesses: {avg_wrong:.2f}")
            print(f"  Avg repeated guesses: {avg_repeated:.3f}")
            print(f"  Avg reward: {avg_reward:.2f}")
            print(f"  Epsilon: {agent.epsilon:.4f}")
        
        # Save checkpoint
        if (episode + 1) % save_freq == 0:
            checkpoint_file = f'hangman_dqn_checkpoint_{episode + 1}.pth'
            agent.save(checkpoint_file)
    
    return history

def train_qlearning(env, agent, num_episodes=10000):
    """Train Q-Learning agent."""
    print("\n" + "="*60)
    print("Training Q-Learning Agent")
    print("="*60)
    
    history = {
        'rewards': [],
        'win_rates': [],
        'avg_wrong_guesses': [],
        'avg_repeated_guesses': [],
        'epsilons': []
    }
    
    # Tracking variables
    episode_wins = []
    episode_wrong = []
    episode_repeated = []
    
    for episode in tqdm(range(num_episodes), desc="Training"):
        state = env.reset()
        done = False
        total_reward = 0
        
        while not done:
            # Choose action
            valid_actions = env.get_valid_actions()
            action = agent.act(state, valid_actions, use_hmm_probs=True)
            
            # Take action
            next_state, reward, done, info = env.step(action)
            total_reward += reward
            
            # Learn
            agent.learn(state, action, reward, next_state, done)
            
            state = next_state
        
        # Decay epsilon
        agent.decay_epsilon()
        
        # Record episode stats
        history['rewards'].append(total_reward)
        history['epsilons'].append(agent.epsilon)
        
        episode_wins.append(1 if info.get('won') else 0)
        episode_wrong.append(env.wrong_guesses)
        episode_repeated.append(env.repeated_guesses)
        
        # Log progress every 500 episodes
        if (episode + 1) % 500 == 0:
            win_rate = np.mean(episode_wins[-500:]) * 100
            avg_wrong = np.mean(episode_wrong[-500:])
            avg_repeated = np.mean(episode_repeated[-500:])
            avg_reward = np.mean(history['rewards'][-500:])
            
            history['win_rates'].append(win_rate)
            history['avg_wrong_guesses'].append(avg_wrong)
            history['avg_repeated_guesses'].append(avg_repeated)
            
            print(f"\nEpisode {episode + 1}/{num_episodes}")
            print(f"  Win rate (last 500): {win_rate:.2f}%")
            print(f"  Avg wrong guesses: {avg_wrong:.2f}")
            print(f"  Avg repeated guesses: {avg_repeated:.3f}")
            print(f"  Avg reward: {avg_reward:.2f}")
            print(f"  Epsilon: {agent.epsilon:.4f}")
            print(f"  Q-table size: {len(agent.q_table)}")
    
    return history

def main():
    print("="*60)
    print("HANGMAN RL AGENT - TRAINING")
    print("="*60)
    
    # 1. Load HMM
    print("\n1. Loading HMM model...")
    try:
        hmm = HangmanHMM()
        hmm.load('hangman_hmm_model.pkl')
        print("   ‚úì HMM loaded successfully")
    except FileNotFoundError:
        print("   ‚ùå HMM model not found!")
        print("   Please run 'python hangman_hmm.py' first to train the HMM.")
        return
    
    # 2. Load corpus
    print("\n2. Loading corpus...")
    try:
        words = load_corpus('corpus.txt')
        print(f"   ‚úì Loaded {len(words)} words")
    except FileNotFoundError:
        print("   ‚ùå corpus.txt not found!")
        return
    
    # 3. Create environment
    print("\n3. Creating Hangman environment...")
    env = HangmanEnvironment(words, hmm, max_lives=6)
    print("   ‚úì Environment created")
    
    # 4. Choose agent type
    print("\n4. Choose agent type:")
    print("   1. DQN (Deep Q-Network) - More powerful, slower")
    print("   2. Q-Learning (Table-based) - Simpler, faster")
    
    while True:
        choice = input("\nEnter choice (1 or 2): ").strip()
        if choice in ['1', '2']:
            break
        print("Invalid choice. Please enter 1 or 2.")
    
    use_dqn = (choice == '1')
    
    # 5. Initialize agent
    if use_dqn:
        print("\n5. Initializing DQN agent...")
        agent = HangmanDQNAgent(
            state_size=55,
            action_size=26,
            learning_rate=0.0005,
            gamma=0.95,
            epsilon=1.0,
            epsilon_min=0.05,
            epsilon_decay=0.9995,
            memory_size=10000,
            batch_size=64
        )
        agent_name = "DQN"
        default_episodes = 5000
    else:
        print("\n5. Initializing Q-Learning agent...")
        agent = SimpleQLearningAgent(
            alpha=0.1,
            gamma=0.95,
            epsilon=1.0,
            epsilon_min=0.05,
            epsilon_decay=0.9995
        )
        agent_name = "Q-Learning"
        default_episodes = 10000
    
    print(f"   ‚úì {agent_name} agent initialized")
    
    # 6. Get training parameters
    print(f"\n6. Training configuration:")
    print(f"   Recommended episodes for {agent_name}: {default_episodes}")
    
    while True:
        episodes_input = input(f"   Enter number of episodes (or press Enter for {default_episodes}): ").strip()
        if episodes_input == "":
            num_episodes = default_episodes
            break
        try:
            num_episodes = int(episodes_input)
            if num_episodes > 0:
                break
            print("   Please enter a positive number.")
        except ValueError:
            print("   Invalid input. Please enter a number.")
    
    print(f"\n   Training for {num_episodes} episodes...")
    print(f"   Estimated time: {num_episodes * 0.2 / 60:.1f} minutes")
    
    # 7. Train agent
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    if use_dqn:
        history = train_dqn(env, agent, num_episodes, save_freq=1000)
        model_file = f'hangman_dqn_final.pth'
    else:
        history = train_qlearning(env, agent, num_episodes)
        model_file = f'hangman_qlearning_final.pkl'
    
    # 8. Save final model
    print(f"\n8. Saving final model...")
    agent.save(model_file)
    
    # 9. Save training history
    history_file = f'training_history_{agent_name.lower()}_{timestamp}.json'
    with open(history_file, 'w') as f:
        # Convert numpy arrays to lists for JSON serialization
        history_json = {k: [float(v) for v in vals] for k, vals in history.items()}
        json.dump(history_json, f, indent=2)
    print(f"   Training history saved to {history_file}")
    
    # 10. Plot results
    print("\n9. Generating training plots...")
    plot_file = f'training_results_{agent_name.lower()}_{timestamp}.png'
    plot_training_results(history, plot_file)
    
    # Final summary
    print("\n" + "="*60)
    print("TRAINING COMPLETE!")
    print("="*60)
    print(f"\nFinal Statistics:")
    if history['win_rates']:
        print(f"  Final win rate: {history['win_rates'][-1]:.2f}%")
        print(f"  Final avg wrong guesses: {history['avg_wrong_guesses'][-1]:.2f}")
        print(f"  Final avg repeated guesses: {history['avg_repeated_guesses'][-1]:.3f}")
    print(f"  Final epsilon: {history['epsilons'][-1]:.4f}")
    
    print(f"\nSaved files:")
    print(f"  - Model: {model_file}")
    print(f"  - History: {history_file}")
    print(f"  - Plots: {plot_file}")
    
    print(f"\nNext step: Evaluate your agent")
    print(f"  Run: python hangman_evaluation.py")

if __name__ == "__main__":
    main()

HANGMAN RL AGENT - TRAINING

1. Loading HMM model...
Model loaded from hangman_hmm_model.pkl
   ‚úì HMM loaded successfully

2. Loading corpus...
   ‚úì Loaded 49933 words

3. Creating Hangman environment...
   ‚úì Environment created

4. Choose agent type:
   1. DQN (Deep Q-Network) - More powerful, slower

5. Initializing DQN agent...
   ‚úì DQN agent initialized

6. Training configuration:
   Recommended episodes for DQN: 5000

   Training for 5000 episodes...
   Estimated time: 16.7 minutes

Training DQN Agent


Training:  10%|‚ñà         | 502/5000 [00:14<03:25, 21.90it/s]


Episode 500/5000
  Win rate (last 500): 0.60%
  Avg wrong guesses: 5.99
  Avg repeated guesses: 0.000
  Avg reward: -162.88
  Epsilon: 0.7788


Training:  20%|‚ñà‚ñà        | 1002/5000 [00:38<03:04, 21.67it/s]


Episode 1000/5000
  Win rate (last 500): 2.40%
  Avg wrong guesses: 5.95
  Avg repeated guesses: 0.000
  Avg reward: -146.88
  Epsilon: 0.6065
DQN model saved to hangman_dqn_checkpoint_1000.pth


Training:  30%|‚ñà‚ñà‚ñà       | 1505/5000 [01:00<02:03, 28.31it/s]


Episode 1500/5000
  Win rate (last 500): 9.00%
  Avg wrong guesses: 5.82
  Avg repeated guesses: 0.000
  Avg reward: -120.04
  Epsilon: 0.4723


Training:  40%|‚ñà‚ñà‚ñà‚ñà      | 2002/5000 [01:17<01:53, 26.38it/s]


Episode 2000/5000
  Win rate (last 500): 7.40%
  Avg wrong guesses: 5.84
  Avg repeated guesses: 0.000
  Avg reward: -119.94
  Epsilon: 0.3678
DQN model saved to hangman_dqn_checkpoint_2000.pth


Training:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 2503/5000 [01:42<01:52, 22.18it/s]


Episode 2500/5000
  Win rate (last 500): 12.00%
  Avg wrong guesses: 5.76
  Avg repeated guesses: 0.000
  Avg reward: -104.94
  Epsilon: 0.2864


Training:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3005/5000 [02:03<00:51, 38.73it/s]


Episode 3000/5000
  Win rate (last 500): 18.40%
  Avg wrong guesses: 5.62
  Avg repeated guesses: 0.000
  Avg reward: -87.74
  Epsilon: 0.2230
DQN model saved to hangman_dqn_checkpoint_3000.pth


Training:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 3503/5000 [02:24<00:57, 25.90it/s]


Episode 3500/5000
  Win rate (last 500): 18.80%
  Avg wrong guesses: 5.62
  Avg repeated guesses: 0.000
  Avg reward: -83.14
  Epsilon: 0.1737


Training:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4003/5000 [02:44<00:48, 20.52it/s]


Episode 4000/5000
  Win rate (last 500): 21.60%
  Avg wrong guesses: 5.52
  Avg repeated guesses: 0.000
  Avg reward: -74.50
  Epsilon: 0.1353
DQN model saved to hangman_dqn_checkpoint_4000.pth


Training:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 4504/5000 [03:06<00:17, 28.06it/s]


Episode 4500/5000
  Win rate (last 500): 24.60%
  Avg wrong guesses: 5.50
  Avg repeated guesses: 0.000
  Avg reward: -67.02
  Epsilon: 0.1053


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5000/5000 [03:26<00:00, 24.17it/s]



Episode 5000/5000
  Win rate (last 500): 24.00%
  Avg wrong guesses: 5.44
  Avg repeated guesses: 0.000
  Avg reward: -65.22
  Epsilon: 0.0820
DQN model saved to hangman_dqn_checkpoint_5000.pth

8. Saving final model...
DQN model saved to hangman_dqn_final.pth
   Training history saved to training_history_dqn_20251103_154929.json

9. Generating training plots...

Training plots saved to training_results_dqn_20251103_154929.png

TRAINING COMPLETE!

Final Statistics:
  Final win rate: 24.00%
  Final avg wrong guesses: 5.44
  Final avg repeated guesses: 0.000
  Final epsilon: 0.0820

Saved files:
  - Model: hangman_dqn_final.pth
  - History: training_history_dqn_20251103_154929.json
  - Plots: training_results_dqn_20251103_154929.png

Next step: Evaluate your agent
  Run: python hangman_evaluation.py


In [4]:
"""
Evaluation script for Hangman RL Agent.
Tests agent performance and generates detailed analysis.
"""

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import json
from datetime import datetime
from collections import defaultdict
from hangman_hmm import HangmanHMM
from hangman_environment import HangmanEnvironment, load_corpus
from hangman_agent import HangmanDQNAgent, SimpleQLearningAgent

def evaluate_agent(env, agent, num_games=2000, verbose=False):
    """
    Evaluate agent on test games.
    
    Returns:
        dict: Detailed evaluation metrics
    """
    results = {
        'games_played': 0,
        'games_won': 0,
        'games_lost': 0,
        'total_wrong_guesses': 0,
        'total_repeated_guesses': 0,
        'wrong_guesses_per_game': [],
        'repeated_guesses_per_game': [],
        'game_lengths': [],
        'difficult_words': [],  # Words that caused losses
        'perfect_games': 0,  # Won with 0 wrong guesses
    }
    
    for game_num in tqdm(range(num_games), desc="Evaluating"):
        state = env.reset()
        done = False
        num_turns = 0
        
        if verbose and game_num < 5:
            print(f"\n=== Game {game_num + 1} ===")
            print(f"Target word: {env.target_word}")
        
        while not done:
            num_turns += 1
            
            # Get action from agent (greedy, no exploration)
            valid_actions = env.get_valid_actions()
            action = agent.act(state, valid_actions, use_hmm_probs=True)
            
            # Take action
            state, reward, done, info = env.step(action)
            
            if verbose and game_num < 5:
                print(f"Turn {num_turns}: Guessed '{action}' -> {env.masked_word}")
        
        # Record results
        results['games_played'] += 1
        
        if info.get('won'):
            results['games_won'] += 1
            if env.wrong_guesses == 0:
                results['perfect_games'] += 1
        else:
            results['games_lost'] += 1
            results['difficult_words'].append({
                'word': env.target_word,
                'wrong_guesses': env.wrong_guesses,
                'repeated_guesses': env.repeated_guesses,
                'turns': num_turns
            })
        
        results['total_wrong_guesses'] += env.wrong_guesses
        results['total_repeated_guesses'] += env.repeated_guesses
        results['wrong_guesses_per_game'].append(env.wrong_guesses)
        results['repeated_guesses_per_game'].append(env.repeated_guesses)
        results['game_lengths'].append(num_turns)
        
        if verbose and game_num < 5:
            if info.get('won'):
                print(f"‚úì Won! ({env.wrong_guesses} wrong guesses)")
            else:
                print(f"‚úó Lost! Word was: {env.target_word}")
    
    return results

def plot_evaluation_results(results, filename='evaluation_results.png'):
    """Generate evaluation plots."""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # 1. Wrong Guesses Distribution
    axes[0, 0].hist(results['wrong_guesses_per_game'], bins=range(0, 8), 
                    edgecolor='black', alpha=0.7)
    axes[0, 0].set_xlabel('Wrong Guesses per Game')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].set_title('Distribution of Wrong Guesses')
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Repeated Guesses Distribution
    max_repeated = max(results['repeated_guesses_per_game']) + 1
    axes[0, 1].hist(results['repeated_guesses_per_game'], 
                    bins=range(0, max_repeated + 1), 
                    edgecolor='black', alpha=0.7, color='orange')
    axes[0, 1].set_xlabel('Repeated Guesses per Game')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].set_title('Distribution of Repeated Guesses')
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Win/Loss Pie Chart
    labels = ['Won', 'Lost']
    sizes = [results['games_won'], results['games_lost']]
    colors = ['#90EE90', '#FFB6C6']
    explode = (0.05, 0)
    
    axes[1, 0].pie(sizes, explode=explode, labels=labels, colors=colors,
                   autopct='%1.1f%%', shadow=True, startangle=90)
    axes[1, 0].set_title('Win/Loss Ratio')
    
    # 4. Performance Metrics Summary
    axes[1, 1].axis('off')
    
    success_rate = results['games_won'] / results['games_played'] * 100
    avg_wrong = results['total_wrong_guesses'] / results['games_played']
    avg_repeated = results['total_repeated_guesses'] / results['games_played']
    
    summary_text = f"""
    Performance Summary
    ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    
    Games Played:           {results['games_played']:,}
    Games Won:              {results['games_won']:,}
    Games Lost:             {results['games_lost']:,}
    
    Success Rate:           {success_rate:.2f}%
    Perfect Games:          {results['perfect_games']} ({results['perfect_games']/results['games_played']*100:.1f}%)
    
    Total Wrong Guesses:    {results['total_wrong_guesses']:,}
    Avg Wrong/Game:         {avg_wrong:.3f}
    
    Total Repeated Guesses: {results['total_repeated_guesses']:,}
    Avg Repeated/Game:      {avg_repeated:.3f}
    
    Avg Game Length:        {np.mean(results['game_lengths']):.1f} turns
    """
    
    axes[1, 1].text(0.1, 0.5, summary_text, fontsize=12, 
                    verticalalignment='center', fontfamily='monospace',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
    
    plt.tight_layout()
    plt.savefig(filename, dpi=150, bbox_inches='tight')
    print(f"\nEvaluation plots saved to {filename}")
    plt.close()

def analyze_difficult_words(results, top_n=20):
    """Analyze most difficult words."""
    if not results['difficult_words']:
        print("\nNo failed games to analyze!")
        return
    
    # Sort by wrong guesses
    sorted_words = sorted(results['difficult_words'], 
                         key=lambda x: x['wrong_guesses'], 
                         reverse=True)
    
    print("\n" + "="*60)
    print(f"TOP {top_n} MOST DIFFICULT WORDS")
    print("="*60)
    
    for i, word_info in enumerate(sorted_words[:top_n], 1):
        print(f"\n{i}. '{word_info['word'].upper()}'")
        print(f"   Wrong guesses: {word_info['wrong_guesses']}")
        print(f"   Repeated guesses: {word_info['repeated_guesses']}")
        print(f"   Total turns: {word_info['turns']}")
    
    # Word length analysis
    word_lengths = defaultdict(list)
    for word_info in results['difficult_words']:
        length = len(word_info['word'])
        word_lengths[length].append(word_info['wrong_guesses'])
    
    print("\n" + "="*60)
    print("DIFFICULTY BY WORD LENGTH")
    print("="*60)
    
    for length in sorted(word_lengths.keys()):
        avg_wrong = np.mean(word_lengths[length])
        count = len(word_lengths[length])
        print(f"Length {length:2d}: {count:3d} failed games, "
              f"avg {avg_wrong:.2f} wrong guesses")

def calculate_final_score(results):
    """Calculate final score according to competition formula."""
    success_rate = results['games_won'] / results['games_played']
    total_wrong = results['total_wrong_guesses']
    total_repeated = results['total_repeated_guesses']
    
    score = (success_rate * 2000) - (total_wrong * 5) - (total_repeated * 2)
    
    return score

def print_evaluation_summary(results):
    """Print detailed evaluation summary."""
    success_rate = results['games_won'] / results['games_played'] * 100
    avg_wrong = results['total_wrong_guesses'] / results['games_played']
    avg_repeated = results['total_repeated_guesses'] / results['games_played']
    final_score = calculate_final_score(results)
    
    print("\n" + "="*60)
    print("EVALUATION RESULTS")
    print("="*60)
    
    print(f"\nGames Played: {results['games_played']}")
    print(f"Games Won: {results['games_won']}")
    print(f"Games Lost: {results['games_lost']}")
    
    print(f"\nSuccess Rate: {success_rate:.2f}%")
    print(f"Perfect Games (0 wrong): {results['perfect_games']} "
          f"({results['perfect_games']/results['games_played']*100:.1f}%)")
    
    print(f"\nTotal Wrong Guesses: {results['total_wrong_guesses']}")
    print(f"Avg Wrong Guesses per Game: {avg_wrong:.3f}")
    
    print(f"\nTotal Repeated Guesses: {results['total_repeated_guesses']}")
    print(f"Avg Repeated Guesses per Game: {avg_repeated:.3f}")
    
    print(f"\nAvg Game Length: {np.mean(results['game_lengths']):.1f} turns")
    
    print("\n" + "-"*60)
    print("FINAL SCORE CALCULATION")
    print("-"*60)
    print(f"Success Rate √ó 2000 = {success_rate/100 * 2000:.2f}")
    print(f"Wrong Guesses √ó 5 = {results['total_wrong_guesses'] * 5:.2f}")
    print(f"Repeated Guesses √ó 2 = {results['total_repeated_guesses'] * 2:.2f}")
    print("")
    print(f"FINAL SCORE: {final_score:.2f}")
    print("="*60)

def main():
    print("="*60)
    print("HANGMAN RL AGENT - EVALUATION")
    print("="*60)
    
    # 1. Load HMM
    print("\n1. Loading HMM model...")
    try:
        hmm = HangmanHMM()
        hmm.load('hangman_hmm_model.pkl')
        print("   ‚úì HMM loaded")
    except FileNotFoundError:
        print("   ‚ùå HMM model not found!")
        return
    
    # 2. Load corpus
    print("\n2. Loading corpus...")
    try:
        all_words = load_corpus('corpus.txt')
        # Use last 20% for testing
        test_start = int(len(all_words) * 0.8)
        test_words = all_words[test_start:]
        print(f"   ‚úì Loaded {len(test_words)} test words")
    except FileNotFoundError:
        print("   ‚ùå corpus.txt not found!")
        return
    
    # 3. Create environment
    print("\n3. Creating evaluation environment...")
    env = HangmanEnvironment(test_words, hmm, max_lives=6)
    print("   ‚úì Environment created")
    
    # 4. Choose agent type
    print("\n4. Choose agent to evaluate:")
    print("   1. DQN")
    print("   2. Q-Learning")
    
    while True:
        choice = input("\nEnter choice (1 or 2): ").strip()
        if choice in ['1', '2']:
            break
        print("Invalid choice.")
    
    use_dqn = (choice == '1')
    
    # 5. Load agent
    print(f"\n5. Loading {'DQN' if use_dqn else 'Q-Learning'} agent...")
    
    if use_dqn:
        agent = HangmanDQNAgent()
        default_file = 'hangman_dqn_final.pth'
    else:
        agent = SimpleQLearningAgent()
        default_file = 'hangman_qlearning_final.pkl'
    
    model_file = input(f"   Model file (or press Enter for '{default_file}'): ").strip()
    if not model_file:
        model_file = default_file
    
    try:
        agent.load(model_file)
        agent.epsilon = 0.0  # Greedy evaluation, no exploration
        print(f"   ‚úì Agent loaded (epsilon set to 0 for evaluation)")
    except FileNotFoundError:
        print(f"   ‚ùå Model file '{model_file}' not found!")
        return
    
    # 6. Evaluation parameters
    print("\n6. Evaluation settings:")
    num_games_input = input("   Number of games (press Enter for 2000): ").strip()
    num_games = int(num_games_input) if num_games_input else 2000
    
    verbose_input = input("   Verbose output for first 5 games? (y/n, default n): ").strip().lower()
    verbose = (verbose_input == 'y')
    
    # 7. Run evaluation
    print(f"\n7. Evaluating agent on {num_games} games...")
    results = evaluate_agent(env, agent, num_games, verbose)
    
    # 8. Print summary
    print_evaluation_summary(results)
    
    # 9. Analyze difficult words
    if results['games_lost'] > 0:
        analyze_difficult_words(results, top_n=20)
    
    # 10. Save results
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    agent_name = "dqn" if use_dqn else "qlearning"
    
    results_file = f'evaluation_results_{agent_name}_{timestamp}.json'
    
    # Convert results to JSON-serializable format
    results_json = {
        'games_played': results['games_played'],
        'games_won': results['games_won'],
        'games_lost': results['games_lost'],
        'success_rate': results['games_won'] / results['games_played'] * 100,
        'total_wrong_guesses': results['total_wrong_guesses'],
        'total_repeated_guesses': results['total_repeated_guesses'],
        'avg_wrong_guesses': results['total_wrong_guesses'] / results['games_played'],
        'avg_repeated_guesses': results['total_repeated_guesses'] / results['games_played'],
        'perfect_games': results['perfect_games'],
        'final_score': float(calculate_final_score(results)),
        'difficult_words': results['difficult_words'][:50]  # Save top 50
    }
    
    with open(results_file, 'w') as f:
        json.dump(results_json, f, indent=2)
    print(f"\nResults saved to {results_file}")
    
    # 11. Generate plots
    print("\n11. Generating evaluation plots...")
    plot_file = f'evaluation_results_{agent_name}_{timestamp}.png'
    plot_evaluation_results(results, plot_file)
    
    # Final message
    print("\n" + "="*60)
    print("EVALUATION COMPLETE!")
    print("="*60)
    
    final_score = calculate_final_score(results)
    
    if final_score > 1500:
        grade = "A+"
    elif final_score > 1000:
        grade = "A"
    elif final_score > 500:
        grade = "B"
    elif final_score > 0:
        grade = "C"
    else:
        grade = "D/F"
    
    print(f"\nYour estimated grade: {grade}")
    print(f"Final Score: {final_score:.2f}")
    
    print(f"\nGenerated files:")
    print(f"  - Results: {results_file}")
    print(f"  - Plots: {plot_file}")

if __name__ == "__main__":
    main()

HANGMAN RL AGENT - EVALUATION

1. Loading HMM model...
Model loaded from hangman_hmm_model.pkl
   ‚úì HMM loaded

2. Loading corpus...
   ‚úì Loaded 9987 test words

3. Creating evaluation environment...
   ‚úì Environment created

4. Choose agent to evaluate:
   1. DQN
   2. Q-Learning

5. Loading DQN agent...
DQN model loaded from hangman_dqn_final.pth
   ‚úì Agent loaded (epsilon set to 0 for evaluation)

6. Evaluation settings:


  checkpoint = torch.load(filename, map_location=self.device)



7. Evaluating agent on 2000 games...


Evaluating:   1%|          | 21/2000 [00:00<00:09, 198.66it/s]


=== Game 1 ===
Target word: jovicentrical
Turn 1: Guessed 'i' -> ['_', '_', '_', 'i', '_', '_', '_', '_', '_', 'i', '_', '_', '_']
Turn 2: Guessed 'o' -> ['_', 'o', '_', 'i', '_', '_', '_', '_', '_', 'i', '_', '_', '_']
Turn 3: Guessed 'n' -> ['_', 'o', '_', 'i', '_', '_', 'n', '_', '_', 'i', '_', '_', '_']
Turn 4: Guessed 't' -> ['_', 'o', '_', 'i', '_', '_', 'n', 't', '_', 'i', '_', '_', '_']
Turn 5: Guessed 'e' -> ['_', 'o', '_', 'i', '_', 'e', 'n', 't', '_', 'i', '_', '_', '_']
Turn 6: Guessed 's' -> ['_', 'o', '_', 'i', '_', 'e', 'n', 't', '_', 'i', '_', '_', '_']
Turn 7: Guessed 'r' -> ['_', 'o', '_', 'i', '_', 'e', 'n', 't', 'r', 'i', '_', '_', '_']
Turn 8: Guessed 'c' -> ['_', 'o', '_', 'i', 'c', 'e', 'n', 't', 'r', 'i', 'c', '_', '_']
Turn 9: Guessed 'l' -> ['_', 'o', '_', 'i', 'c', 'e', 'n', 't', 'r', 'i', 'c', '_', 'l']
Turn 10: Guessed 'a' -> ['_', 'o', '_', 'i', 'c', 'e', 'n', 't', 'r', 'i', 'c', 'a', 'l']
Turn 11: Guessed 'p' -> ['_', 'o', '_', 'i', 'c', 'e', 'n', 't', '

Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2000/2000 [00:10<00:00, 192.90it/s]



EVALUATION RESULTS

Games Played: 2000
Games Won: 519
Games Lost: 1481

Success Rate: 25.95%
Perfect Games (0 wrong): 6 (0.3%)

Total Wrong Guesses: 10760
Avg Wrong Guesses per Game: 5.380

Total Repeated Guesses: 0
Avg Repeated Guesses per Game: 0.000

Avg Game Length: 11.3 turns

------------------------------------------------------------
FINAL SCORE CALCULATION
------------------------------------------------------------
Success Rate √ó 2000 = 519.00
Wrong Guesses √ó 5 = 53800.00
Repeated Guesses √ó 2 = 0.00

FINAL SCORE: -53281.00

TOP 20 MOST DIFFICULT WORDS

1. 'JOVICENTRICAL'
   Wrong guesses: 6
   Repeated guesses: 0
   Total turns: 15

2. 'FRACTIONIZE'
   Wrong guesses: 6
   Repeated guesses: 0
   Total turns: 14

3. 'PROHOLIDAY'
   Wrong guesses: 6
   Repeated guesses: 0
   Total turns: 13

4. 'WALLACH'
   Wrong guesses: 6
   Repeated guesses: 0
   Total turns: 8

5. 'KEXY'
   Wrong guesses: 6
   Repeated guesses: 0
   Total turns: 7

6. 'PREVACCINATE'
   Wrong guesses: 6
 