In [1]:
import numpy as np
import bisect 
import math
import random 
import gensim
from answers import secretWords as answers
from constants import PATH_TO_DATASET
import concurrent.futures
from multiprocessing import Manager
gnews_model = gensim.models.KeyedVectors.load_word2vec_format(PATH_TO_DATASET, binary=True)


In [2]:
class environment:
    def __init__(self, mystery_word, clusters):
        self.mystery_word = mystery_word
        self.clusters = clusters
        self.guessed_words = []
        self.state = [0] * len(clusters)
        self.previous_state = [0] * len(clusters)

    def binSimilarityScore(self, similarityScore):
        bins = [0, 10, 25, 50]
        return bisect.bisect_left(bins, similarityScore)

    def guess_word(self, word, clusterNumber):
        if word == self.mystery_word:
            return 100
        similarity_score = round(100 * gnews_model.similarity(word, self.mystery_word), 6)
        self.guessed_words.append((word, similarity_score))
        self.guessed_words = sorted(self.guessed_words, key=lambda x: x[1], reverse=True)
        binned_score = self.binSimilarityScore(similarity_score)
        self.previous_state = self.state.copy()
        self.state[clusterNumber] = binned_score
        return similarity_score

    def get_reward(self, similarity_score, num_guesses, max_guesses, guess):
        if similarity_score == 100:
            return 500

        if num_guesses >= max_guesses - 1:
            return -100

        state_reward = sum(self.state) * 10
        inefficiency_penalty = -10 * num_guesses
        guess_reward = similarity_score

        state_difference = sum(self.state) - sum(self.previous_state)
        progress_bonus = 25 * state_difference if state_difference > 0 else 0

        total_reward = (
            state_reward
            + inefficiency_penalty
            + progress_bonus
        )
        return total_reward

    def get_state(self):
        return self.state


In [3]:
class QAgent():
    def __init__(self, num_clusters, num_bins, learning_rate, discount_rate, epsilon_decay, shared_q_table):
        self.num_clusters = num_clusters
        self.num_bins = num_bins
        self.learning_rate = learning_rate
        self.discount_rate = discount_rate
        self.epsilon = 0.0001
        self.epsilon_decay = epsilon_decay
        self.q_table = shared_q_table

    def choose_action(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, self.num_clusters - 1)
        else:
            return np.argmax(self.q_table[state])
    
    def update(self, state, action, reward, next_state):
        current_q = self.q_table[state][action]
        max_next_q = np.max(self.q_table[next_state])
        new_q = current_q + self.learning_rate * (reward + self.discount_rate * max_next_q - current_q)
        self.q_table[state][action] = new_q
    
    def decayEpsilon(self, game_number):
        self.epsilon = 0.01 + (0.5 - 0.01) * math.exp(-self.epsilon_decay * game_number)


In [4]:
def choose_word_from_cluster(cluster, guessed_words, similarity_threshold=30):
    if not guessed_words:
        return cluster[random.randint(0, len(cluster) - 1)]

    best_word = guessed_words[0][0]
    worst_word = guessed_words[-1][0]
    best_similarity = guessed_words[0][1]

    guessed_words_set = set([gw[0] for gw in guessed_words])

    # Get unguessed words in the cluster
    unguessed_words = [word for word in cluster if word not in guessed_words_set]

    if not unguessed_words:
        # If all words in the cluster have been guessed, choose from all words
        unguessed_words = cluster

    # Get word vectors for unguessed words in the cluster
    unguessed_vectors = np.array([gnews_model[word] for word in unguessed_words])

    # Calculate similarity to the best and worst words
    best_word_vec = gnews_model[best_word]
    worst_word_vec = gnews_model[worst_word]

    similarity_to_best = np.dot(unguessed_vectors, best_word_vec) / (np.linalg.norm(unguessed_vectors, axis=1) * np.linalg.norm(best_word_vec))
    similarity_to_worst = np.dot(unguessed_vectors, worst_word_vec) / (np.linalg.norm(unguessed_vectors, axis=1) * np.linalg.norm(worst_word_vec))

    if best_similarity < similarity_threshold:
        adjusted_similarity = (similarity_to_best + similarity_to_worst) / 2
    else:
        adjusted_similarity = similarity_to_best - similarity_to_worst

    # Get the index of the word with the highest adjusted similarity
    best_index = np.argmax(adjusted_similarity)

    return unguessed_words[best_index]


In [5]:
from clusters import clusters
from runSingle import run_single_game
num_clusters = len(clusters)
num_bins = 5
learning_rate = 0.01
discount_rate = 0.90
epsilon_decay = 0.001
similarity_score = 0

num_games = 20000
max_guesses = 35
game_wins = 0
total_moves = 0

shared_q_table = {}

for a in range(num_bins):
    for b in range(num_bins):
        for c in range(num_bins):
            for d in range(num_bins):
                tempState = (a, b, c, d)
                shared_q_table[tempState] = [0] * num_clusters



num_parallel_games = 8  # Set to the number of cores of CPU
q_agent = QAgent(num_clusters, num_bins, learning_rate, discount_rate, epsilon_decay, shared_q_table)
shared_q_table = {key: [0] * num_clusters for key in shared_q_table}  # Convert the shared_q_table to a regular dictionary

import concurrent.futures
import random
from clusters import clusters

with concurrent.futures.ProcessPoolExecutor(max_workers=num_parallel_games) as executor:
    for num_game in range(0, num_games, num_parallel_games):
        futures = []
        print(num_game)
        if(not game_wins == 0): print("Num of games won in games " + str(num_game - 8) + " through " + str(num_game) + " is " + str(game_wins) + " in an average of " + str(total_moves/game_wins) + " moves with a max of " + str(max_guesses) + " moves");
        game_wins = 0
        total_moves = 0
        mystery_words = [answers[random.randint(0, len(answers) - 1)] for _ in range(num_parallel_games)]
        for i in range(num_parallel_games):
            future = executor.submit(run_single_game, mystery_words[i], q_agent, max_guesses, shared_q_table)
            futures.append(future)

        for future in concurrent.futures.as_completed(futures):
            game_win, total_move = future.result()
            game_wins += game_win
            total_moves += total_move


0


mystery_word = answers[random.randint(0, len(answers) - 1)]
from clusters import clusters 
num_clusters = len(clusters)
num_bins = 5
learning_rate = 0.01
discount_rate = 0.90
epsilon_decay = 0.0025
similarity_score = 0

env = environment(mystery_word, clusters)
q_agent = QAgent(num_clusters, num_bins, learning_rate, discount_rate, epsilon_decay)
num_games = 20000
max_guesses = 35
game_wins = 0 
total_moves = 0 

    for i in range(max_guesses):
        if similarity_score != 100:
            action = q_agent.choose_action(state)
            word = choose_word_from_cluster(env.clusters[action], env.guessed_words)
            similarity_score = env.guess_word(word, action)
            next_state = tuple(env.get_state())
            reward = env.get_reward(similarity_score, i, max_guesses, word)
            q_agent.update(state, action, reward, next_state)
            state = next_state
        else:
            reward = env.get_reward(similarity_score, i, max_guesses, word)
            q_agent.update(state, action, reward, next_state)
            game_wins += 1
            total_moves += i
            similarity_score = 0
            break
    env.guessed_words = []
    q_agent.decayEpsilon(num_game)
if(not game_wins == 0): print("Num of games won in games " + str(num_game - 10000) + " through " + str(num_game) + " is " + str(game_wins) + " in an average of " + str(total_moves/game_wins) + " moves with a max of " + str(max_guesses) + " moves");

        



learning_rates = [0.03, 0.05, 0.1, 0.12, 0.15, 0.18]
discount_rates = [0.85, 0.9, 0.95, 0.99]
epsilon_decays = [0.08, 0.1, 0.12, 0.14, 0.16]

num_games = 20000
max_guesses = 50

for learning_rate in learning_rates:
    for discount_rate in discount_rates:
        for epsilon_decay in epsilon_decays:
            q_agent = QAgent(num_clusters, num_bins, learning_rate, discount_rate, epsilon_decay)
            game_wins = 0
            total_moves = 0

            for num_game in range(num_games):
                if(num_game % 20000 == 0):
                    if(not game_wins == 0): print("Num of games won in games " + str(num_game - 10000) + " through " + str(num_game) + " is " + str(game_wins) + " in an average of " + str(total_moves/game_wins) + " moves with a max of " + str(max_guesses) + " moves");
                    game_wins = 0
                    total_moves = 0
                state = tuple(env.get_state())
                mystery_word = answers[random.randint(0, len(answers) - 1)]

                for i in range(max_guesses):
                    if(similarity_score != 100):
                        action = q_agent.choose_action(state)
                        word = choose_word_from_cluster(env.clusters[action], env.guessed_words)
                        similarity_score = env.guess_word(word, action)
                        next_state = tuple(env.get_state())
                        q_agent.update(state, action, math.log(np.max([0.01, similarity_score])), next_state)
                        state = next_state
                    else:
                        game_wins += 1
                        total_moves += i
                        similarity_score = 0
                        break

                    env.guessed_words = []
                    q_agent.decayEpsilon(num_game)

            print("Parameters: learning_rate={}, discount_rate={}, epsilon_decay={}".format(learning_rate, discount_rate, epsilon_decay))
            print("Num of games won: {}, average moves per win: {}, max moves: {}".format(game_wins, total_moves / game_wins if game_wins > 0 else 0, max_guesses))
            print()
