In [1]:
import torch
import numpy as np
from tqdm.notebook import tqdm
import time

import config
import utils
from hmm_model import HangmanHMM
from hangman_env import HangmanEnv
from rl_agent import HangmanAgent

In [2]:
# Load corpus
corpus = utils.load_corpus(config.CORPUS_PATH)

# Load the trained HMM
print("Loading trained HMM...")
hmm = HangmanHMM.load(config.HMM_MODEL_PATH)

# Initialize the Environment
env = HangmanEnv(corpus)

# Initialize the Agent
print("Loading trained RL Agent...")
agent = HangmanAgent(config.STATE_SIZE, config.ACTION_SIZE)

# Load the *trained weights* into the agent's network
agent.q_network.load_state_dict(torch.load(config.AGENT_MODEL_PATH))

# ‚ö†Ô∏è CRITICAL: Set the agent to evaluation mode
agent.q_network.eval()
agent.epsilon = 0.0  # No more exploration, only exploitation

Loaded 49979 valid words from corpus.txt.
Loading trained HMM...
HMM model loaded from hmm_model.pkl
Loading trained RL Agent...


In [3]:
print(f"Running final evaluation for {config.NUM_EPISODES_EVAL} games...")

total_wins = 0
total_wrong_guesses = 0
total_repeated_guesses = 0
start_time = time.time()

for episode in tqdm(range(config.NUM_EPISODES_EVAL)):
    env_state = env.reset()
    done = False
    
    episode_wrong_guesses = 0
    episode_repeated_guesses = 0
    
    while not done:
        # 1. Get HMM probabilities
        hmm_probs = hmm.get_letter_probabilities(
            env_state['masked_word'], 
            env_state['guessed_letters']
        )
        
        # 2. Get state vector
        state_vec = agent.state_to_vector(env_state, hmm_probs)
        
        # 3. Select *best* action (no exploration)
        action_idx = agent.select_action(state_vec, env_state['guessed_letters'])
        action_letter = config.ALPHABET[action_idx]
        
        # 4. Step environment
        next_env_state, reward, done, info = env.step(action_letter)
        
        # 5. Track stats
        if info.get('wrong_guess'):
            episode_wrong_guesses += 1
        if info.get('repeated_guess'):
            episode_repeated_guesses += 1
        
        if done:
            if info.get('win'):
                total_wins += 1
        
        env_state = next_env_state
    
    # Add episode stats to totals
    total_wrong_guesses += episode_wrong_guesses
    total_repeated_guesses += episode_repeated_guesses

end_time = time.time()
print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")

Running final evaluation for 2000 games...


  0%|          | 0/2000 [00:00<?, ?it/s]


Evaluation finished in 0.83 seconds.


In [4]:
# --- Calculate Final Metrics ---
success_rate = total_wins / config.NUM_EPISODES_EVAL
avg_wrong_guesses = total_wrong_guesses / config.NUM_EPISODES_EVAL
avg_repeated_guesses = total_repeated_guesses / config.NUM_EPISODES_EVAL

# --- Calculate Final Score (from problem statement) ---
# Score = (Success Rate * 2000) - (Total Wrong Guesses * 5) - (Total Repeated Guesses * 2)
final_score = (success_rate * 2000) - (total_wrong_guesses * 5) - (total_repeated_guesses * 2)

# --- Print Final Report ---
print("\n" + "="*30)
print(" ü§ñ HACKMAN AGENT FINAL REPORT ü§ñ")
print("="*30)
print(f"Total Games Played: {config.NUM_EPISODES_EVAL}")
print("\n--- PERFORMANCE ---")
print(f"Success Rate:         {success_rate * 100:.2f}%  ({total_wins} wins)")
print(f"Avg. Wrong Guesses:   {avg_wrong_guesses:.2f}")
print(f"Avg. Repeated Guesses: {avg_repeated_guesses:.2f}")

print("\n--- RAW COUNTS ---")
print(f"Total Wrong Guesses:    {total_wrong_guesses}")
print(f"Total Repeated Guesses: {total_repeated_guesses}")

print("\n--- SCORING ---")
print(f"Success Bonus:          +{(success_rate * 2000):.0f}")
print(f"Wrong Guess Penalty:    -{(total_wrong_guesses * 5):.0f}")
print(f"Repeated Guess Penalty: -{(total_repeated_guesses * 2):.0f}")
print("-----------------------------------")
print(f"üèÜ FINAL SCORE:          {final_score:.2f} üèÜ")
print("="*30)


 ü§ñ HACKMAN AGENT FINAL REPORT ü§ñ
Total Games Played: 2000

--- PERFORMANCE ---
Success Rate:         7.70%  (154 wins)
Avg. Wrong Guesses:   5.84
Avg. Repeated Guesses: 0.00

--- RAW COUNTS ---
Total Wrong Guesses:    11689
Total Repeated Guesses: 0

--- SCORING ---
Success Bonus:          +154
Wrong Guess Penalty:    -58445
Repeated Guess Penalty: -0
-----------------------------------
üèÜ FINAL SCORE:          -58291.00 üèÜ
