In [12]:
from wordle_env import WordleEnv, WordleTokens
from stable_baselines3 import PPO
from gymnasium.wrappers import RecordEpisodeStatistics
from stable_baselines3.common.logger import configure
from datetime import datetime

log_dir = "runs/ppo"
new_logger = configure(folder=log_dir, format_strings=["stdout", "csv", "tensorboard"])

env = RecordEpisodeStatistics(
    WordleEnv(calendar=None, eval_mode=False),  # training
)

model = PPO(
    "MlpPolicy", 
    env, 
    tensorboard_log=log_dir,
    device='cuda', 
    verbose=1, 
    policy_kwargs=dict(
        features_extractor_class=WordleTokens,
        net_arch=[256,256]
    )
)

model.set_logger(new_logger)
model.learn(total_timesteps=1_000_000, tb_log_name=f"ppo_{datetime.now()}")

AssertionError: tensorboard is not installed, you can use `pip install tensorboard` to do so

In [10]:
eval_env = WordleEnv(target='acorn', eval_mode=True)
obs, _ = eval_env.reset()
done = False
while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = eval_env.step(action)

In [11]:
import string
symbols = { -1: "·", 0: "⬜", 1: "🟨", 2: "🟩" }
IDX2LETTER = {i: c for i, c in enumerate(string.ascii_uppercase)}
output = ""
for i in obs:
    word = ""
    tiles = ""
    for j in i:
        if j[1] == -1:
            break
        word += IDX2LETTER[j[0]]
        tiles += symbols[j[1]]
    output += f"{word} {tiles}\n" if tiles != "" else ""
print(output)

ABODE 🟩⬜🟩⬜⬜
ABASE 🟩⬜⬜⬜⬜
AGONY 🟩⬜🟩🟨⬜
ACORN 🟩🟩🟩🟩🟩

