In [1]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
import gensim.downloader as api
from gensim.models.word2vec import Word2Vec
from gensim.models import KeyedVectors
from nltk import FreqDist, download
from nltk.corpus import brown
import nltk
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import kmeans_plusplus
import numpy as np
import scipy

In [2]:
download('brown')
frequency_list = FreqDist(word for word in brown.words() if word.islower())
all_words = [word for word,_ in frequency_list.most_common(10000)]

[nltk_data] Downloading package brown to
[nltk_data]     /Users/francisfurnelli/nltk_data...
[nltk_data]   Package brown is already up-to-date!


In [None]:
""" This class is a Gymnasium Environment for Semantle, similar to Thomas' MDP """
import scipy.special


class SemantleEnv(gym.Env):
    def __init__(self, target_words, embedding_model=None, target_word=None):
        super(SemantleEnv, self).__init__()
        self.target_words = target_words
        if embedding_model == None:
            embedding_model = api.load('word2vec-google-news-300')
        self.embedding_model = embedding_model  # e.g., a Word2Vec or BERT wrapper
        self.word_list = [word for word in all_words if word in self.embedding_model]
        if target_word == None or target_word not in self.target_words:
            target_word = random.sample(self.target_words, k=1)[0]
        self.target_word = target_word
        self.target_vector = self.get_embedding(target_word)

        self.action_space = spaces.Discrete(11)
        #self.action_space = spaces.Discrete(len(self.word_list))  # index of word list
        #self.observation_space = spaces.Box(low=-1.,high=1.,shape=(300,))
        self.observation_space = spaces.Dict({#'current_embedding': spaces.Box(low=-1., high=1., shape=(300,), dtype=np.float32), 
                                               #'score': spaces.Box(low=-1., high=1., dtype=np.float32),
                                               #'best_embedding': spaces.Box(low=-1., high=1., shape=(300,), dtype=np.float32),
                                               'max_score': spaces.Box(low=-1., high=1., dtype=np.float32),
        #                                       'delta_score': spaces.Box(low=-1., high=1., dtype=np.float32),
                                               'number_of_guesses': spaces.Box(low=0, high=300)})
        # self.observation_space = spaces.Dict({'best_embedding': spaces.Box(low=-1., high=1., shape=(300,), dtype=np.float32),
        #                                       'max_score': spaces.Box(low=-1., high=1., dtype=np.float32),
        #                                       'number_of_guesses': spaces.Box(low=0, high=300)})
        self.knn = NearestNeighbors(n_neighbors=1, algorithm='auto')
        self.knn.fit(self.embedding_model[self.word_list])

        self.state = []
        self.guess_history = []
        self.best = None

    def get_embedding(self, word):
        return self.embedding_model[word]
    
    def sample_target_word(self):
        return random.sample(self.target_words, k=1)[0]

    def compute_similarity(self, vec1, vec2):
        return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

    def reset(self, seed=None, options=None):
        self.guess_history = []
        self.target_word = self.sample_target_word()
        self.target_vector = self.get_embedding(self.target_word)
        self.word_list = [word for word in all_words if word in self.embedding_model]
        print("\n======================")
        print(self.target_word)
        self.state = {#'current_embedding': np.zeros((300,)), 
                         #'score': np.array([-1]),
                         #'best_embedding': np.zeros((300,)),
                         'max_score': np.array([-1]),
        #                 'delta_score': np.array([0]),
                         'number_of_guesses': np.array([0])}
        
        #self.state = np.zeros(self.observation_space.shape)
        self.best = None
        return self.state, {}

    def step(self, action):
        print(f"Explore {action}" if action == 0 else f"Exploit {action}")
        N_CLUSTERS = 8 # How many clusters (basically how diverse)
        THRESHOLD = 0.5 # Lower bound for similarity to best guess so far
        NNeighbors = 8 # How many nearest neighbors to compute
        K = 4 # How many of the nearest neighbors to randomly sample
        embeddings = self.embedding_model[self.word_list]
        self.knn.fit(embeddings)
        guess = None
        if action == 0: # Explore
            guess = random.sample(self.word_list, k=1)[0]
        else: # Exploit
            NNeighbors = [9,8,7,6,5,4,3,2,1,0][action-1] #1-6 works pretty well
            best_embedding = self.get_embedding(self.best[0]) if self.best != None else (np.random.random((300,))-0.5)*2
            distances, indices = self.knn.kneighbors(best_embedding.reshape(1,-1), n_neighbors=min(2**NNeighbors, len(self.word_list)))
            indices_to_words = [self.word_list[id] for id in indices[0]]
            #probs = scipy.special.softmax(distances[0])
            guess = random.choices(indices_to_words, k=1)[0]
        guess_vector = self.get_embedding(guess)
        similarity = self.embedding_model.similarity(guess, self.target_word)#self.compute_similarity(guess_vector, self.target_vector)
        print(f'guess: {guess}, sim: {similarity}')
        self.guess_history.append((guess, similarity))
        prev_best = self.best[1] if self.best != None else 0
        if self.best == None or similarity > self.best[1]: 
            self.best = (guess,similarity)
        self.state = {#'current_embedding': guess_vector, 
                        #'score': np.array([similarity], dtype=float),
                        #'best_embedding': self.get_embedding(self.best[0]),
                        'max_score': np.array([self.best[1]], dtype=float),
                        #'delta_score': np.array([delta], dtype=float),
                         'number_of_guesses': np.array([len(self.guess_history)], dtype=float)}
        self.word_list.remove(guess)
        ## Reward Modeling
        reward = 0
        if similarity >= 0.99:#similarity >= 0.1 and similarity - prev_best >= 0:
            reward = (similarity * 10)**3
        elif similarity < 0.1:
            reward = 0
        else: 
            reward = 0
        print(reward)
        terminated = similarity >= 0.99  # or similarity == 1.0
        truncated = len(self.guess_history) == 200

        return self.state, reward, terminated, truncated, {}

    def render(self):
        if self.guess_history:
            print(f"Guesses: {self.guess_history[-5:]}")

    def close(self):
        pass

In [4]:
import pandas
df = pandas.read_csv("semantle.csv") # Pull in target words
target_words = df['answer'].to_list()
embedding_model = api.load('word2vec-google-news-300')

In [84]:
from stable_baselines3 import PPO, DQN, A2C, DDPG, TD3, SAC
from stable_baselines3.common.noise import NormalActionNoise
env = SemantleEnv(target_words=target_words, embedding_model=embedding_model)
#model = PPO("MultiInputPolicy", env, verbose=1, gamma=0.99)
model = PPO("MultiInputPolicy", env, verbose=1,gamma=0.9)
model.learn(total_timesteps=10000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.

shut
Exploit 8
guess: consumers, sim: 0.03743299841880798
0
Exploit 3
guess: nevertheless, sim: 0.007771952077746391
0
Explore 0
guess: aerator, sim: 0.027457095682621002
0
Exploit 2
guess: commercial, sim: 0.059483036398887634
0
Exploit 2
guess: meant, sim: 0.07909299433231354
0
Exploit 10
guess: intended, sim: -0.02407711185514927
0
Exploit 3
guess: years', sim: 0.03247057646512985
0
Explore 0
guess: ties, sim: 0.050247278064489365
0
Exploit 6
guess: means, sim: 0.05104690045118332
0
Exploit 8
guess: supposed, sim: 0.14632992446422577
0
Exploit 1
guess: certain, sim: 0.026086170226335526
0
Exploit 4
guess: then, sim: 0.1034623309969902
0
Exploit 8
guess: not, sim: 0.18968282639980316
0
Exploit 8
guess: anyway, sim: 0.17973627150058746
0
Exploit 4
guess: this, sim: 0.11681153625249863
0
Exploit 2
guess: people's, sim: 0.0686803013086319
0
Explore 0
guess: humans, sim: -0.005973674822598696
0

<stable_baselines3.ppo.ppo.PPO at 0x1d4442810>

In [85]:
results = []
obs, info = env.reset()
print(f"Target: {env.target_word}")
for i in range(1000):
    action, _states = model.predict(obs, deterministic=False)
    obs, reward, done, truncated, info = env.step(action)
    # VecEnv resets automatically
    if done or truncated:
        env.render()
        print(f'Guesses {len(env.guess_history)}')
        results.append(len(env.guess_history))
        obs, info = env.reset()
        print("\n==============")
        print(f"Target: {env.target_word}")
print(f"Average episode len: {sum(results)/len(results)}")
print(f"Wins: {len([x for x in results if x < 200])} out of {len(results)}")
env.close()


individual
Target: individual
Exploit 8
guess: organic, sim: 0.08997227996587753
0
Exploit 1
guess: larger, sim: 0.2640749514102936
0
Exploit 5
guess: more, sim: 0.17309418320655823
0
Exploit 1
guess: regular, sim: 0.20730353891849518
0
Exploit 7
guess: smaller, sim: 0.26476043462753296
0
Exploit 4
guess: because, sim: 0.1539265513420105
0
Exploit 3
guess: presumably, sim: 0.1405324786901474
0
Exploit 7
guess: small, sim: 0.292633980512619
0
Explore 0
guess: guiding, sim: 0.16850116848945618
0
Exploit 4
guess: handful, sim: 0.22131435573101044
0
Exploit 10
guess: large, sim: 0.26140064001083374
0
Exploit 4
guess: kid's, sim: 0.02465800568461418
0
Exploit 10
guess: tiny, sim: 0.12595602869987488
0
Exploit 7
guess: child's, sim: 0.14355559647083282
0
Exploit 9
guess: the, sim: 0.21763989329338074
0
Exploit 8
guess: in, sim: 0.11233267188072205
0
Exploit 7
guess: tetrachloride, sim: 0.006650602910667658
0
Exploit 9
guess: only, sim: 0.23672492802143097
0
Exploit 7
guess: doctor's, sim: 0