In [3]:
import numpy as np
import bisect 
import math
import random
import gensim
from constants import PATH_TO_DATASET
from answers import secretWords as answers
from secondLevelClusters import secondLevelClusters
import queue
import heuristicrnn
import torch
import torch.nn as nn
import torch.optim as optim


In [4]:
gnews_model = gensim.models.KeyedVectors.load_word2vec_format(PATH_TO_DATASET, binary=True)

In [6]:
class Environment:
    MAX_NUM_PREV_BIN_SCORES = 4
    ALPHA = 0.2
    def __init__(self, mystery_word, clusters):
        """
        Initialize the game environment.

        Parameters:
        - mystery_word: the word to be guessed
        - clusters: a list of clusters for the game state space
        """
        self.mystery_word = mystery_word
        self.clusters = clusters
        self.sub_clusters = secondLevelClusters
        sub_clusters = secondLevelClusters
        self.guessed_words = []
        self.state = [0] * len(clusters)
        self.previous_state = [0] * len(clusters)
        self.sub_state = [[0] * len(sub_clusters[i]) for i in range(len(clusters))]  # New line
        self.previous_sub_state = [[0] * len(sub_clusters[i]) for i in range(len(clusters))]  # New line
        self.previous_binned_scores = [[]] * len(clusters)
        self.previous_sub_binned_scores = [[[] for _ in sub_clusters[i]] for i in range(len(clusters))]  # New line
        #self.weights = calculate_weights(self.MAX_NUM_PREV_BIN_SCORES, self.ALPHA)
        
    def bin_similarity_score(self, similarity_score):
        """
        Assign a similarity score to one of four bins based on its value.

        Parameters:
        - similarity_score: the similarity score between a guessed word and the mystery word

        Returns:
        - the index of the bin for the similarity score
        """
        bins = [0, 15, 25, 50]
        return bisect.bisect_left(bins, similarity_score)
    def add_score_to_history(self, cluster_number, binned_score):
        """
        Add a binned score of a certain cluster number to its previous binned
        scores, and remove the least recent score if the list is too long.
        """
        self.previous_binned_scores[cluster_number].append(binned_score)
        if len(self.previous_binned_scores[cluster_number]) > self.MAX_NUM_PREV_BIN_SCORES:
            self.previous_binned_scores[cluster_number].pop(0)

    def get_state_from_weighted_sum(self, cluster_number):
        """
        Get the weighted score of the previous scores of the specified cluster.
        """
        prev_binned_scores = self.previous_binned_scores[cluster_number].copy()

        weights = self.weights if len(prev_binned_scores) == self.MAX_NUM_PREV_BIN_SCORES \
                    else calculate_weights(len(prev_binned_scores), self.ALPHA)
        
        res_array = [prev_binned_scores[i] * weights[i] \
                     for i in range(len(prev_binned_scores))]
        
        return (int)(round(sum(res_array)))

    def guess_word(self, word, top_action, sub_action):
        """
        Guess a word and update the game state and guessed words accordingly.

        Parameters:
        - word: the word to guess
        - top_action: the index of the top-level cluster for the guessed word
        - sub_action: the index of the sub-level cluster for the guessed word

        Returns:
        - the similarity score between the guessed word and the mystery word
        """
        if word in [gw[0] for gw in self.guessed_words]:
            return -100

        if word == self.mystery_word:
            return 100

        similarity_score = round(100 * gnews_model.similarity(word, self.mystery_word), 6)
        self.guessed_words.append((word, similarity_score))
        self.guessed_words = sorted(self.guessed_words, key=lambda x: x[1], reverse=True)
        binned_score = self.bin_similarity_score(similarity_score)
        self.previous_state = self.state.copy()
        self.previous_sub_state[top_action] = self.sub_state[top_action].copy()  
        self.state[top_action] = binned_score
        self.sub_state[top_action][sub_action] = binned_score
        return similarity_score

    # def get_reward(self, similarity_score, num_guesses, max_guesses, guess):
    #     """
    #     Calculate the reward for the current game state and action.

    #     Parameters:
    #     - similarity_score: the similarity score between the guessed word and the mystery word
    #     - num_guesses: the current number of guesses
    #     - max_guesses: the maximum number of guesses allowed
    #     - guess: the guessed word

    #     Returns:
    #     - the total reward for the current game state and action
    #     """
    #     # Reward for guessing the mystery word correctly
    #     if similarity_score == 100:
    #         return 500

    #     # Penalty for reaching the maximum number of guesses
    #     if num_guesses >= max_guesses - 1:
    #         return -100

    #     # Reward based on the similarity score of the guess
    #     similarity_reward = similarity_score * 2

    #     # Penalty for making a duplicate guess
    #     duplicate_penalty = -10 if guess in [gw[0] for gw in self.guessed_words[:-1]] else 0

    #     # Penalty for taking more than the optimal number of guesses
    #     optimal_guesses = math.ceil(math.log2(len(self.clusters))) + 1
    #     inefficiency_penalty = -10 * (num_guesses - optimal_guesses) if num_guesses > optimal_guesses else 0

    #     # Calculate the total reward
    #     total_reward = similarity_reward + duplicate_penalty + inefficiency_penalty
    #     return total_reward

    def get_reward(self, similarity_score, num_guesses, max_guesses, guess, cluster):
        """
        Calculate the reward for the current game state and action.

        Parameters:
        - similarity_score: the similarity score between the guessed word and the mystery word
        - num_guesses: the current number of guesses
        - max_guesses: the maximum number of guesses allowed
        - guess: the guessed word

        Returns:
        - the total reward for the current game state and action
        """
        # Reward for guessing the mystery word correctly
        if similarity_score == 100:
            return 250

        # Penalty for reaching the maximum number of guesses
        if num_guesses >= max_guesses - 1:
            return -100

        #Reward for picking right cluster
        if self.mystery_word in cluster:
            clusterReward = 200
        else:
            clusterReward = -200

        # Penalty for taking more than the optimal number of guesses
        optimal_guesses = math.ceil(math.log2(len(self.clusters))) + 1
        inefficiency_penalty = -10 * (num_guesses - optimal_guesses) if num_guesses > optimal_guesses else 0

        # Calculate the total reward
        total_reward = clusterReward + inefficiency_penalty
        return total_reward

    def get_state(self):
        """
        Get the current game state.

        Returns:
        - a list representing the current game state
        """
        return self.state
    def get_sub_state(self, clusterNum):
        return self.sub_state[clusterNum]

In [7]:
class QAgent:
    def __init__(self, num_clusters, num_bins, learning_rate, discount_rate, epsilon_decay):
        self.epsilon = 1
        self.learning_rate = learning_rate
        self.discount_rate = discount_rate
        self.epsilon_decay = epsilon_decay
        self.num_bins = num_bins
        self.num_clusters = num_clusters
        self.q_table = self.initialize_q_table()

    def initialize_q_table(self):
        """
        Initialize the Q-table for the agent with all zeros.

        Returns:
        - a dictionary representing the Q-table
        """
        q_table = {}
        for a in range(self.num_bins):
            for b in range(self.num_bins):
                for c in range(self.num_bins):
                    for d in range(self.num_bins):
                        state = (a, b, c, d)
                        q_table[state] = [0] * self.num_clusters
        return q_table
      
    def choose_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            action = np.random.randint(0, self.num_clusters)
        else:
            action = np.argmax(self.q_table[state])
        return action

    def update_q_table(self, state, action, reward, next_state):
        """
        Update the Q-table using the Q-learning algorithm.

        Parameters:
        - state: the current state of the game
        - action: the chosen action for the current state
        - reward: the reward obtained for the current state and action
        - next_state: the next state of the game after taking the chosen action
        """
        current_q = self.q_table[state][action]
        max_next_q = np.max(self.q_table[next_state])
        new_q = current_q + self.learning_rate * (reward + self.discount_rate * max_next_q - current_q)
        self.q_table[state][action] = new_q

    def decay_epsilon(self, game_num):
        self.epsilon = self.epsilon * np.exp(-self.epsilon_decay * game_num)


In [8]:
def weight_init(m):
	if isinstance(m, nn.Linear):
		nn.init.xavier_uniform_(m.weight)
		nn.init.constant_(m.bias, 0.)

In [10]:
def choose_word_from_cluster(cluster, guessed_words, similarity_threshold=30):
    """
    This function chooses a word from a given cluster of unguessed words based on their similarity
    to previously guessed words.

    Parameters:
    - cluster: a list of unguessed words
    - guessed_words: a list of tuples, each containing a guessed word and its similarity score
    - similarity_threshold: a threshold for the similarity score below which the function considers
    the unguessed word not similar enough to the best guessed word

    Returns:
    - the chosen unguessed word from the cluster
    """

    if not guessed_words:
        # If no word has been guessed yet, choose randomly from the cluster
        return random.choice(cluster)

    # Get the best and worst guessed words and their similarity scores
    best_guessed_word, best_similarity_score = guessed_words[0]
    worst_guessed_word, worst_similarity_score = guessed_words[-1]

    # Create a set of the guessed words for faster lookup
    guessed_words_set = set([word for word, _ in guessed_words])

    # Get unguessed words in the cluster
    unguessed_words = [word for word in cluster if word not in guessed_words_set]

    if not unguessed_words:
        # If all words in the cluster have been guessed, choose from all words
        unguessed_words = cluster
    

    # Get word vectors for unguessed words in the cluster
    unguessed_vectors = np.array([gnews_model[word] for word in unguessed_words])

    # Calculate similarity to the best and worst words
    best_word_vec = gnews_model[best_guessed_word]
    worst_word_vec = gnews_model[worst_guessed_word]

    similarity_to_best = 100 * np.dot(unguessed_vectors, best_word_vec) / (np.linalg.norm(unguessed_vectors, axis=1) * np.linalg.norm(best_word_vec))
    similarity_to_worst = 100 * np.dot(unguessed_vectors, worst_word_vec) / (np.linalg.norm(unguessed_vectors, axis=1) * np.linalg.norm(worst_word_vec))

    adjusted_similarity = similarity_to_best - similarity_to_worst

    # Get the index of the word with the highest adjusted similarity
    best_index = np.argmax(adjusted_similarity)
    return unguessed_words[best_index]

In [30]:
def choose_word_from_cluster_rnn(cluster, guessed_words, model, similarity_threshold=30, target_word=None, optimizer=None):
    """
    This function chooses a word from a given cluster of unguessed words based on their similarity
    to previously guessed words.

    Parameters:
    - cluster: a list of unguessed words
    - guessed_words: a list of tuples, each containing a guessed word and its similarity score

    Returns:
    - the chosen unguessed word from the cluster
    """

    if not guessed_words:
        # If no word has been guessed yet, choose randomly from the cluster
        return random.choice(cluster)

    # Get the best and worst guessed words and their similarity scores
    best_guessed_word, best_similarity_score = guessed_words[0]
    worst_guessed_word, worst_similarity_score = guessed_words[-1]

    # Create a set of the guessed words for faster lookup
    guessed_words_set = set([word for word, _ in guessed_words])

    # Get unguessed words in the cluster
    unguessed_words = [word for word in cluster if word not in guessed_words_set]

    if not unguessed_words:
        # If all words in the cluster have been guessed, choose from all words
        unguessed_words = cluster

    # Get word vectors for unguessed words in the cluster
    unguessed_vectors = np.array([gnews_model[word] for word in unguessed_words])

    # Calculate similarity to the best and worst words
    best_word_vec = gnews_model[best_guessed_word]
    worst_word_vec = gnews_model[worst_guessed_word]

    similarity_to_best = 100 * np.dot(unguessed_vectors, best_word_vec) / (np.linalg.norm(unguessed_vectors, axis=1) * np.linalg.norm(best_word_vec))
    similarity_to_worst = 100 * np.dot(unguessed_vectors, worst_word_vec) / (np.linalg.norm(unguessed_vectors, axis=1) * np.linalg.norm(worst_word_vec))

    
    adjusted_similarity = similarity_to_best - similarity_to_worst

    # Get the index of the word with the highest adjusted similarity
    # best_index = np.argmax(adjusted_similarity)
    # return unguessed_words[best_index]
    top_indices = np.argsort(adjusted_similarity)[-5:][::-1]
    top_unguessed_vecs = unguessed_vectors[top_indices]
    top_unguessed_vecs_tensor = torch.tensor(unguessed_vectors[top_indices])

    guessed_vectors = torch.tensor(np.array([gnews_model[word] for word, _ in guessed_words]))
    guessed_scores = torch.tensor(np.array([[score] for _, score in guessed_words]))


    prev_5_guesses = torch.cat((guessed_vectors, guessed_scores), dim=1)[:5]

    # append 0 to the end of each unguessed vector
    A = torch.cat((top_unguessed_vecs_tensor, torch.zeros((top_unguessed_vecs_tensor.size(0), 1))), dim=1)
    B = prev_5_guesses

    # Reshape tensor A to have an additional dimension
    reshaped_A = A.unsqueeze(1)  # Shape: (x, 1, 301)

    # Repeat tensor B along the first dimension to match the number of sublists in A
    repeated_B = B.unsqueeze(0).repeat(reshaped_A.size()[0], 1, 1)  # Shape: (x, y, 301)

    # Concatenate tensors B and A along the second dimension
    model_input = torch.cat((repeated_B, reshaped_A), dim=1).float()  # Shape: (x, y+1, 301)

    model_output = model(model_input)
    best_index = torch.argmax(model_output)
    if target_word is not None:
        target_vec = gnews_model[target_word]
        target_word_similarity = torch.tensor(np.dot(top_unguessed_vecs, target_vec) / (np.linalg.norm(top_unguessed_vecs, axis=1) * np.linalg.norm(target_vec)))
        optimizer.zero_grad()
        loss = model.compute_loss(model_output, target_word_similarity.unsqueeze(1))
        loss.backward()
        optimizer.step()
        return unguessed_words[top_indices[best_index]]

    
    return unguessed_words[top_indices[best_index]]

In [33]:
# RUN THIS CODE BLOCK FOR TESTING WITH GREEDY HEURISTIC

from clusters import clusters
import time

# Configuration
NUM_CLUSTERS = len(clusters)
NUM_SUBCLUSTERS = len(secondLevelClusters[0])
NUM_BINS = 5
LEARNING_RATE = 0.01
DISCOUNT_RATE = 0.90
EPSILON_DECAY = 0.001
NUM_GAMES = 3000
MAX_GUESSES = 50

# Initialize environment and agent
mystery_word = answers[random.randint(0, len(answers) - 1)]
env = Environment(mystery_word, secondLevelClusters)
top_q_agent = QAgent(NUM_CLUSTERS, NUM_BINS, LEARNING_RATE, DISCOUNT_RATE, EPSILON_DECAY)
sub_q_agents = [QAgent(NUM_SUBCLUSTERS, NUM_BINS, LEARNING_RATE, DISCOUNT_RATE, EPSILON_DECAY) for _ in range(NUM_CLUSTERS)]
top_agent_attempts = 0
top_agent_successes = 0
sub_agent_attempts = [0] * NUM_CLUSTERS
sub_agent_successes = [0] * NUM_CLUSTERS
# Initialize game statistics
game_wins = 0
total_moves = 0
similarity_score = 0

game_nums = []
win_percents = []
start_time = time.time()
# Run games
for game_num in range(NUM_GAMES):
    game_start_time = time.time()

    if game_num % 100 == 0:
        print(f"Game Number: {game_num}")
        game_nums.append(game_num)
        if game_wins:
            print(f"Num of games won in games {game_num - 100} through {game_num} is {game_wins} in an average of {total_moves/game_wins} moves with a max of {MAX_GUESSES} moves")
            win_percents.append(game_wins / 100)
            print(f"time per round: {(time.time() - start_time) / game_num}")
            print(f"Cumulative game win percentage: {np.average(win_percents[1:])}")
        else:
            win_percents.append(0)
        game_wins = 0
        total_moves = 0
        top_agent_accuracy = round(top_agent_successes / top_agent_attempts * 100, 2) if top_agent_attempts != 0 else 0
        sub_agent_accuracy = [round(sub_agent_successes[i] / sub_agent_attempts[i] * 100, 2) if sub_agent_attempts[i] != 0 else 0 for i in range(NUM_CLUSTERS)]
        print(f"Top agent accuracy: {top_agent_accuracy}%")
        print(f"Sub-agent accuracies: {sub_agent_accuracy}")
        top_agent_attempts = 0
        top_agent_successes = 0
        sub_agent_attempts = [0] * NUM_CLUSTERS
        sub_agent_successes = [0] * NUM_CLUSTERS
    state = tuple(env.get_state())
    mystery_word = answers[random.randint(0, len(answers) - 1)]
    env.mystery_word = mystery_word

    for guess in range(MAX_GUESSES):
        if similarity_score != 100:
            top_action = top_q_agent.choose_action(state)
            sub_state = tuple(env.get_sub_state(top_action))
            sub_q_agent = sub_q_agents[top_action]
            sub_action = sub_q_agent.choose_action(sub_state)

            word = choose_word_from_cluster(env.clusters[top_action][sub_action], env.guessed_words)
            similarity_score = env.guess_word(word, top_action, sub_action)

            next_state = tuple(env.get_state())
            next_sub_state = tuple(env.get_sub_state(top_action))
            top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, clusters[top_action])
            #top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_q_agent.update_q_table(state, top_action, top_reward, next_state)
            sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, secondLevelClusters[top_action][sub_action])
            #sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            sub_q_agent.update_q_table(sub_state, sub_action, sub_reward, next_sub_state)
            top_agent_attempts += 1
            if mystery_word in clusters[top_action]:
                top_agent_successes += 1
                sub_agent_attempts[top_action] += 1
                if mystery_word in secondLevelClusters[top_action][sub_action]:
                    sub_agent_successes[top_action] += 1
            state = next_state
            sub_state = next_sub_state
        else:
            #top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, clusters[top_action])
            sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, secondLevelClusters[sub_action])
            #sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_q_agent.update_q_table(state, top_action, top_reward, next_state)
            sub_q_agent.update_q_table(sub_state, sub_action, sub_reward, next_sub_state)
            game_wins += 1
            total_moves += guess
            similarity_score = 0
            break
    env.guessed_words = []
    top_q_agent.decay_epsilon(game_num)
    sub_q_agent.decay_epsilon(game_num/4)

Game Number: 0
Top agent accuracy: 0%
Sub-agent accuracies: [0, 0, 0, 0]
Game Number: 100
Num of games won in games 0 through 100 is 60 in an average of 27.783333333333335 moves with a max of 50 moves
time per round: 0.059901490211486816
Cumulative game win percentage: 0.6
Top agent accuracy: 36.51%
Sub-agent accuracies: [29.34, 26.09, 32.12, 25.75]


KeyboardInterrupt: 

In [32]:
# RUN THIS CODE BLOCK FOR TESTING WITH RNN HEURISTIC

from clusters import clusters
import time

# Configuration
NUM_CLUSTERS = len(clusters)
NUM_SUBCLUSTERS = len(secondLevelClusters[0])
NUM_BINS = 5
LEARNING_RATE = 0.01
DISCOUNT_RATE = 0.90
EPSILON_DECAY = 0.001
NUM_GAMES = 3000
MAX_GUESSES = 50

model = heuristicrnn.RNN(301, 200, 1, 2)
model.load_model("heuristic-model-qtrained")

# Initialize environment and agent
mystery_word = answers[random.randint(0, len(answers) - 1)]
env = Environment(mystery_word, secondLevelClusters)
top_q_agent = QAgent(NUM_CLUSTERS, NUM_BINS, LEARNING_RATE, DISCOUNT_RATE, EPSILON_DECAY)
sub_q_agents = [QAgent(NUM_SUBCLUSTERS, NUM_BINS, LEARNING_RATE, DISCOUNT_RATE, EPSILON_DECAY) for _ in range(NUM_CLUSTERS)]
top_agent_attempts = 0
top_agent_successes = 0
sub_agent_attempts = [0] * NUM_CLUSTERS
sub_agent_successes = [0] * NUM_CLUSTERS
# Initialize game statistics
game_wins = 0
total_moves = 0
similarity_score = 0

rnn_win_percents = []
start_time = time.time()
# Run games
for game_num in range(NUM_GAMES):
    game_start_time = time.time()

    if game_num % 100 == 0:
        print(f"Game Number: {game_num}")
        if game_wins:
            print(f"Num of games won in games {game_num - 100} through {game_num} is {game_wins} in an average of {total_moves/game_wins} moves with a max of {MAX_GUESSES} moves")
            rnn_win_percents.append(game_wins / 100)
            print(f"Cumulative time per round: {(time.time() - start_time) / game_num}")
            print(f"Cumulative game win percentage: {np.average(rnn_win_percents[1:])}")
        else:
            rnn_win_percents.append(0)
        game_wins = 0
        total_moves = 0
        top_agent_accuracy = round(top_agent_successes / top_agent_attempts * 100, 2) if top_agent_attempts != 0 else 0
        sub_agent_accuracy = [round(sub_agent_successes[i] / sub_agent_attempts[i] * 100, 2) if sub_agent_attempts[i] != 0 else 0 for i in range(NUM_CLUSTERS)]
        print(f"Top agent accuracy: {top_agent_accuracy}%")
        print(f"Sub-agent accuracies: {sub_agent_accuracy}")
        top_agent_attempts = 0
        top_agent_successes = 0
        sub_agent_attempts = [0] * NUM_CLUSTERS
        sub_agent_successes = [0] * NUM_CLUSTERS
    state = tuple(env.get_state())
    mystery_word = answers[random.randint(0, len(answers) - 1)]
    env.mystery_word = mystery_word

    for guess in range(MAX_GUESSES):
        if similarity_score != 100:
            top_action = top_q_agent.choose_action(state)
            sub_state = tuple(env.get_sub_state(top_action))
            sub_q_agent = sub_q_agents[top_action]
            sub_action = sub_q_agent.choose_action(sub_state)

            word = choose_word_from_cluster_rnn(env.clusters[top_action][sub_action], env.guessed_words, model)
            similarity_score = env.guess_word(word, top_action, sub_action)

            next_state = tuple(env.get_state())
            next_sub_state = tuple(env.get_sub_state(top_action))
            top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, clusters[top_action])
            #top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_q_agent.update_q_table(state, top_action, top_reward, next_state)
            sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, secondLevelClusters[top_action][sub_action])
            #sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            sub_q_agent.update_q_table(sub_state, sub_action, sub_reward, next_sub_state)
            top_agent_attempts += 1
            if mystery_word in clusters[top_action]:
                top_agent_successes += 1
                sub_agent_attempts[top_action] += 1
                if mystery_word in secondLevelClusters[top_action][sub_action]:
                    sub_agent_successes[top_action] += 1
            state = next_state
            sub_state = next_sub_state
        else:
            #top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, clusters[top_action])
            sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, secondLevelClusters[sub_action])
            #sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_q_agent.update_q_table(state, top_action, top_reward, next_state)
            sub_q_agent.update_q_table(sub_state, sub_action, sub_reward, next_sub_state)
            game_wins += 1
            total_moves += guess
            similarity_score = 0
            break
    env.guessed_words = []
    top_q_agent.decay_epsilon(game_num)
    sub_q_agent.decay_epsilon(game_num/4)

Game Number: 0
Top agent accuracy: 0%
Sub-agent accuracies: [0, 0, 0, 0]
Game Number: 100
Num of games won in games 0 through 100 is 45 in an average of 28.822222222222223 moves with a max of 50 moves
Cumulative time per round: 0.22368453979492187
Cumulative game win percentage: 0.45
Top agent accuracy: 38.25%
Sub-agent accuracies: [26.17, 19.56, 27.99, 24.88]


KeyboardInterrupt: 

In [31]:
# RUN THIS CODE BLOCK TO TRAIN RNN DURING Q-LEARNING

from clusters import clusters

# Configuration
NUM_CLUSTERS = len(clusters)
NUM_SUBCLUSTERS = len(secondLevelClusters[0])
NUM_BINS = 5
LEARNING_RATE = 0.01
DISCOUNT_RATE = 0.90
EPSILON_DECAY = 0.001
NUM_GAMES = 3000
MAX_GUESSES = 50


# Initialize environment and agent
mystery_word = answers[random.randint(0, len(answers) - 1)]
env = Environment(mystery_word, secondLevelClusters)
top_q_agent = QAgent(NUM_CLUSTERS, NUM_BINS, LEARNING_RATE, DISCOUNT_RATE, EPSILON_DECAY)
sub_q_agents = [QAgent(NUM_SUBCLUSTERS, NUM_BINS, LEARNING_RATE, DISCOUNT_RATE, EPSILON_DECAY) for _ in range(NUM_CLUSTERS)]
top_agent_attempts = 0
top_agent_successes = 0
sub_agent_attempts = [0] * NUM_CLUSTERS
sub_agent_successes = [0] * NUM_CLUSTERS
# Initialize game statistics
game_wins = 0
total_moves = 0
similarity_score = 0

model = heuristicrnn.RNN(301, 200, 1, 2)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=.01)

# Run games
for game_num in range(NUM_GAMES):
    model.train()

    if game_num % 100 == 0:
        print(f"Game Number: {game_num}")
        if game_wins:
            print(f"Num of games won in games {game_num - 100} through {game_num} is {game_wins} in an average of {total_moves/game_wins} moves with a max of {MAX_GUESSES} moves")
            print(f"time per round: {(time.time() - start_time) / game_num}")

        game_wins = 0
        total_moves = 0
        top_agent_accuracy = round(top_agent_successes / top_agent_attempts * 100, 2) if top_agent_attempts != 0 else 0
        sub_agent_accuracy = [round(sub_agent_successes[i] / sub_agent_attempts[i] * 100, 2) if sub_agent_attempts[i] != 0 else 0 for i in range(NUM_CLUSTERS)]
        print(f"Top agent accuracy: {top_agent_accuracy}%")
        print(f"Sub-agent accuracies: {sub_agent_accuracy}")
        top_agent_attempts = 0
        top_agent_successes = 0
        sub_agent_attempts = [0] * NUM_CLUSTERS
        sub_agent_successes = [0] * NUM_CLUSTERS
    state = tuple(env.get_state())
    mystery_word = answers[random.randint(0, len(answers) - 1)]
    env.mystery_word = mystery_word

    for guess in range(MAX_GUESSES):
        if similarity_score != 100:
            top_action = top_q_agent.choose_action(state)
            sub_state = tuple(env.get_sub_state(top_action))
            sub_q_agent = sub_q_agents[top_action]
            sub_action = sub_q_agent.choose_action(sub_state)

            word = choose_word_from_cluster_rnn(env.clusters[top_action][sub_action], env.guessed_words, model, optimizer=optimizer, target_word=mystery_word)
            similarity_score = env.guess_word(word, top_action, sub_action)

            next_state = tuple(env.get_state())
            next_sub_state = tuple(env.get_sub_state(top_action))
            top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, clusters[top_action])
            #top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_q_agent.update_q_table(state, top_action, top_reward, next_state)
            sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, secondLevelClusters[top_action][sub_action])
            #sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            sub_q_agent.update_q_table(sub_state, sub_action, sub_reward, next_sub_state)
            top_agent_attempts += 1
            if mystery_word in clusters[top_action]:
                top_agent_successes += 1
                sub_agent_attempts[top_action] += 1
                if mystery_word in secondLevelClusters[top_action][sub_action]:
                    sub_agent_successes[top_action] += 1
            state = next_state
            sub_state = next_sub_state
        else:
            #top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, clusters[top_action])
            sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word, secondLevelClusters[sub_action])
            #sub_reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            top_q_agent.update_q_table(state, top_action, top_reward, next_state)
            sub_q_agent.update_q_table(sub_state, sub_action, sub_reward, next_sub_state)
            game_wins += 1
            total_moves += guess
            similarity_score = 0
            break


    env.guessed_words = []
    top_q_agent.decay_epsilon(game_num)
    sub_q_agent.decay_epsilon(game_num/4)

Game Number: 0
Top agent accuracy: 0%
Sub-agent accuracies: [0, 0, 0, 0]
Game Number: 100
Num of games won in games 0 through 100 is 46 in an average of 30.97826086956522 moves with a max of 50 moves
time per round: 2.474470360279083
Top agent accuracy: 38.55%
Sub-agent accuracies: [24.85, 24.56, 30.23, 24.75]


KeyboardInterrupt: 

In [19]:
# Save model after training it
model.save_model("heuristic-model-qtrained")

In [20]:
import matplotlib.pyplot as plt

# Plotting the graph
plt.plot(game_nums, win_percents, marker='o', label="Greedy Word Selection")
plt.plot(game_nums, rnn_win_percents, marker='o', label="Neural Network Word Selection")

# Adding labels and title
plt.xlabel('Game Number')
plt.ylabel('Win Percentage')
plt.title('Win Percentage by Game')

plt.ylim(0, 1)

plt.legend()

# Displaying the graph
plt.show()

In [None]:
from clusters import clusters
import time
# Configuration — .01 .90 .001 best parameters so far
NUM_CLUSTERS = len(clusters)
NUM_SUBCLUSTERS = len(secondLevelClusters[0])
NUM_BINS = 5
LEARNING_RATE = 0.01
DISCOUNT_RATE = 0.90
EPSILON_DECAY = 0.001
NUM_GAMES = 10000
MAX_GUESSES = 35

# Initialize environment and agent
mystery_word = answers[random.randint(0, len(answers) - 1)]
env = Environment(mystery_word, clusters)
agents = [QAgent(NUM_SUBCLUSTERS, NUM_BINS, LEARNING_RATE, DISCOUNT_RATE, EPSILON_DECAY) for _ in range(NUM_CLUSTERS)]

# Initialize game statistics
game_wins = 0
total_moves = 0
similarity_score = 0

cumulative_game_wins = 0

optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=.01)
start_time = time.time()

# Run games
for game_num in range(NUM_GAMES):
    total_loss = 0

    if game_num % 100 == 0:
        if game_wins:
            print(f"Num of games won in games {game_num - 100} through {game_num} is {game_wins} in an average of {total_moves/game_wins} moves with a max of {MAX_GUESSES} moves")
            cumulative_game_wins += game_wins
            print(f"Total number of games won through {game_num} is {cumulative_game_wins}")
            print(f"Average time per game: {(time.time() - start_time) / game_num}")


        game_wins = 0
        total_moves = 0


    state = tuple(env.get_state())
    mystery_word = answers[random.randint(0, len(answers) - 1)]
    env.mystery_word = mystery_word

    # Vectors for each expected/model output for loss calculation
    game_expected_out = torch.tensor([], requires_grad=True)
    game_model_output = torch.tensor([], requires_grad=True)

    for guess in range(MAX_GUESSES):
        game_loss = 0
        if similarity_score != 100:
            action = q_agent.choose_action(state)
            word, expected_out, model_output = choose_word_from_cluster_rnn(env.clusters[action], env.guessed_words, target_word=mystery_word)
            game_expected_out = torch.cat((game_expected_out, expected_out), dim=0)
            game_model_output = torch.cat((game_model_output, model_output), dim=0)

            similarity_score = env.guess_word(word, action)

            next_state = tuple(env.get_state())
            reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            q_agent.update_q_table(state, action, reward, next_state)
            state = next_state
            game_loss += loss
            total_loss += loss
        else:
            reward = env.get_reward(similarity_score, guess, MAX_GUESSES, word)
            q_agent.update_q_table(state, action, reward, next_state)
            game_wins += 1
            total_moves += guess
            similarity_score = 0
            break

    optimizer.zero_grad()
    loss = model.compute_loss(game_expected_out, game_model_output)
    loss.backward()
    optimizer.step()



    env.guessed_words = []
    q_agent.decay_epsilon(game_num)

if game_wins:
    print(f"Num of games won in games {game_num - 10000} through {game_num} is {game_wins} in an average of {total_moves/game_wins} moves with a max of {MAX_GUESSES} moves")

4


NameError: name 'q_agent' is not defined