In [1]:
import config
import utils
from hmm_model import HangmanHMM
import pickle

In [6]:
import config
import utils
from collections import defaultdict

# Load the 50,000-word corpus
print("Loading corpus...")
corpus = utils.load_corpus(config.CORPUS_PATH)
print(f"Successfully loaded {len(corpus)} words.")

# Group words by their length
words_by_length = defaultdict(list)
for word in corpus:
    words_by_length[len(word)].append(word)

min_len = min(words_by_length.keys())
max_len = max(words_by_length.keys())

print(f"Corpus contains words from length {min_len} to {max_len}.")
print(f"Example: {len(words_by_length[5])} words of length 5.")

Loading corpus...
Loaded 49979 valid words from corpus.txt.
Successfully loaded 49979 words.
Corpus contains words from length 1 to 24.
Example: 2340 words of length 5.


In [7]:
from hmm_model import HangmanHMM
from tqdm.notebook import tqdm

# We will store all our trained HMMs in a dictionary
# hmms = { 5: <HMM_obj_for_len_5>, 6: <HMM_obj_for_len_6>, ... }
hmms = {}

print(f"Starting HMM training for all word lengths from {min_len} to {max_len}...")

# Use tqdm for a nice progress bar
for length in tqdm(range(min_len, max_len + 1)):
    if length in words_by_length:
        words = words_by_length[length]
        
        # Initialize a NEW HMM just for this length
        hmm = HangmanHMM(n_states=config.HMM_N_STATES)
        
        # Train this HMM *only* on words of this length
        # ⚠️ This assumes you have implemented the `train()` method in hmm_model.py
        hmm.train(words) 
        
        # Store the trained HMM in our dictionary
        hmms[length] = hmm

print("\nAll HMM training complete.")

Starting HMM training for all word lengths from 1 to 24...


  0%|          | 0/24 [00:00<?, ?it/s]

HMM initialized with 10 hidden states.
Training HMM... (This is the part you need to build!)
HMM Training Complete.
HMM initialized with 10 hidden states.
Training HMM... (This is the part you need to build!)
HMM Training Complete.
HMM initialized with 10 hidden states.
Training HMM... (This is the part you need to build!)
HMM Training Complete.
HMM initialized with 10 hidden states.
Training HMM... (This is the part you need to build!)
HMM Training Complete.
HMM initialized with 10 hidden states.
Training HMM... (This is the part you need to build!)
HMM Training Complete.
HMM initialized with 10 hidden states.
Training HMM... (This is the part you need to build!)
HMM Training Complete.
HMM initialized with 10 hidden states.
Training HMM... (This is the part you need to build!)
HMM Training Complete.
HMM initialized with 10 hidden states.
Training HMM... (This is the part you need to build!)
HMM Training Complete.
HMM initialized with 10 hidden states.
Training HMM... (This is the part

In [4]:
# Run a quick test to see if the oracle is working
print("Testing HMM probability calculation...")
test_mask = "_p_l_"
test_guessed = {'p', 'l', 'e'}

probs = hmm.get_letter_probabilities(test_mask, test_guessed)

# Sort and print top 5 predictions
sorted_probs = sorted(
    [(l, p) for l, p in probs.items() if l not in test_guessed],
    key=lambda x: x[1],
    reverse=True
)

print(f"State: '{test_mask}', Guessed: {test_guessed}")
print("Top 5 HMM predictions:")
for letter, prob in sorted_probs[:5]:
    print(f"  {letter}: {prob:.4f}")

Testing HMM probability calculation...
State: '_p_l_', Guessed: {'e', 'l', 'p'}
Top 5 HMM predictions:
  a: 0.0435
  b: 0.0435
  c: 0.0435
  d: 0.0435
  f: 0.0435


In [5]:


# --- AFTER ---
import pickle

# Save the entire dictionary of trained HMMs
with open(config.HMM_MODEL_PATH, 'wb') as f:
    pickle.dump(hmms, f)
    
print(f"HMM model dictionary saved to {config.HMM_MODEL_PATH}")

HMM model saved to hmm_model.pkl
