In [None]:
import random
import string
import numpy as np
import pandas as pd
from collections import deque, Counter, defaultdict
from tqdm import tqdm

random.seed(24)

# Hyperparameters
MAX_LENGTH = 25
MAX_LIVES = 6
BIAS = 0.6
TARGET_UPDATE_FREQ = 1
TRAIN_FREQ = 40
UNGUESSED_CHAR = 27
PADDING_CHAR = 0

training_data_secretword = []
training_data_intguessed_word = []
training_data_guessed_letters_flags = []
training_data_action = []

with open('cleaned_word_list.txt', 'r') as f:
    WORDS = f.read().splitlines()

def rank_letters_by_frequency(words, min_len=3, max_len=25):
    letter_counts = defaultdict(Counter)
    for word in words:
        word_len = len(word)
        if min_len <= word_len <= max_len:
            letter_counts[word_len].update(set(word))

    return dict(letter_counts)

letter_freqs = rank_letters_by_frequency(WORDS)

def weighted_random_choice(ug_letters, length):

    # Filter frequencies for only unguessed letters
    weights = [letter_freqs[length][letter] for letter in ug_letters]
    return random.choices(list(ug_letters), weights=weights, k=1)[0]




# Hangman Environment
class HangmanEnv:
    def __init__(self, words):
        self.words = words
        self.reset()

    def reset(self):
        self.secret_word = random.choice(self.words)
        self.length_word= len(self.secret_word)
        self.guessed_word = ['_'] * self.length_word
        self.intguessed_word = [27]* self.length_word
        self.intguessed_word = np.pad(self.intguessed_word, (0, 25 - len(self.intguessed_word)), 'constant', constant_values=0)
        self.lives = MAX_LIVES
        self.guessed_letters = set()
        self.guessed_letters_flags= np.zeros(26, dtype = int)
        self.correct_guesses = Counter(self.secret_word)
        self.unguessed_letters = set(string.ascii_lowercase)
        return self._get_state()

    def step(self, action):
        letter = action
        reward = 0
        done = False

        if letter in self.correct_guesses:
            # Add to training data BEFORE updating the word state
            # training_data.append([self.secret_word, self.guessed_word.copy(), self.guessed_letters.copy(), letter])
            # training_data.append([self.secret_word, np.array(self.intguessed_word.copy()), np.array(self.guessed_letters_flags.copy()), ord(letter)-97])

            training_data_secretword.append(self.secret_word)
            training_data_intguessed_word.append(self.intguessed_word.copy())
            training_data_guessed_letters_flags.append(self.guessed_letters_flags.copy())
            training_data_action.append(ord(letter)-97)


            for i, char in enumerate(self.secret_word):
                if char == letter:
                    reward += 1.0
                    self.guessed_word[i] = letter
                    self.intguessed_word[i] = ord(letter) - 96
            del self.correct_guesses[letter]
        else:
            self.lives -= 1
            reward -= 1.0

        self.unguessed_letters.remove(letter)
        self.guessed_letters.add(letter)
        self.guessed_letters_flags[ord(letter)-97]=1

        if '_' not in self.guessed_word:
            reward += 6.0
            done = True
        elif self.lives == 0:
            reward -= 6.0
            done = True

        return self._get_state(), reward, done

    def _get_state(self):
        # Convert guessed_word to a tensor of integers
        return self.secret_word, self.guessed_letters, self.correct_guesses.copy(), self.unguessed_letters


# DQN Agent
class DQNAgent:
    def __init__(self):
        self.step_count = 0
        self.train_count = 0
        self.vowels=set('aeiouy')

    def select_action(self, state):
        self.step_count += 1
        secret_word, guessed_flags, ug_correct_guesses, ug_letters = state
        ug_correct_guesses=list(ug_correct_guesses.elements())
        if self.step_count==1:
            possible_guesses=[i for i in ug_correct_guesses if i in self.vowels]
            return random.choice(possible_guesses)

        if random.random() < BIAS:
            return random.choice(ug_correct_guesses)
        return weighted_random_choice(ug_letters, len(secret_word)) # Consider using probability distribution of letter occurences for random choice

# Training Loop
def train_episodes(env, agent, num_episodes=5000000):
    for episode in tqdm(range(num_episodes), desc="Generating Data"):
        agent.step_count=0
        state = env.reset()
        total_reward = 0
        done = False

        while not done:
            action = agent.select_action(state)
            next_state, reward, done = env.step(action)
            state = next_state

# Load Words and Start Training


env = HangmanEnv(WORDS)
agent = DQNAgent()
train_episodes(env, agent)


Generating Data: 100%|██████████| 5000000/5000000 [12:53<00:00, 6467.10it/s]


In [None]:
len(training_data_secretword)

32538887

In [None]:
idx=3
print(training_data_secretword[:idx])
print(training_data_intguessed_word[:idx])
print(training_data_guessed_letters_flags[:idx])
print(training_data_action[:idx])


['overconfute', 'overconfute', 'overconfute']
[array([27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0]), array([27, 27, 27, 27, 27, 27, 27, 27, 21, 27, 27,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0]), array([27, 22, 27, 27, 27, 27, 27, 27, 21, 27, 27,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0])]
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0])]
[20, 21, 14]


In [None]:
training_data_secretword_np = np.array(training_data_secretword)
training_data_intguessed_word_np = np.array(training_data_intguessed_word)
training_data_guessed_letters_flags_np = np.array(training_data_guessed_letters_flags)
training_data_action_np = np.array(training_data_action)

print(training_data_secretword_np.dtype)
print(training_data_intguessed_word_np.dtype)
print(training_data_guessed_letters_flags_np.dtype)
print(training_data_action_np.dtype)

<U25
int64
int64
int64


In [None]:
np.savez_compressed('Training_Data.npz', training_data_secretword_np, training_data_intguessed_word_np, training_data_guessed_letters_flags_np, training_data_action_np)
print("Successfully saved!")

Successfully saved!


In [None]:
secret_word_set = set(training_data_secretword)

In [None]:
len(secret_word_set)

226110

In [None]:
len(WORDS)

226110