# Jakob's Handy Dandy Development Notebook

In [None]:
# dataset prep
# !wget https://gist.githubusercontent.com/dracos/dd0668f281e685bad51479e5acaadb93/raw/6bfa15d263d6d5b63840a8e5b64e04b382fdb079/valid-wordle-words.txt
import torch
import random
data_unfiltered = open('valid-wordle-words.txt', 'r').readlines()
def filter_word(word):
    # don't repeat letters this makes the game hard to code.
    return len(set(word)) == len(word)
data = [d for d in data_unfiltered if filter_word(d[:-1])]
print('total filtered words:', len(data))
print("words removed:", len(data_unfiltered) - len(data))
# train_data = random_split
train_data, val_data, test_data = torch.utils.data.random_split(data, lengths=[0.8,0.1,0.1], generator=torch.random.manual_seed(42))
open("filtered-valid-wordle-words.txt", 'w').writelines(data)
open("train-valid-wordle-words.txt", 'w').writelines(train_data)
open("val-valid-wordle-words.txt", 'w').writelines(val_data)
open("test-valid-wordle-words.txt", 'w').writelines(test_data)
remove_nl = lambda l: [li.strip() for li in l]
train_data, val_data, test_data = list(remove_nl(train_data)), list(remove_nl(val_data)), list(remove_nl(test_data))
data = list(remove_nl(data))
print(list(data)) # good words: speir, lythe, rangs, ables

total filtered words: 9365
words removed: 5490
['abeng', 'abers', 'abets', 'abeys', 'abhor', 'abide', 'abies', 'abius', 'abjud', 'abled', 'abler', 'ables', 'ablet', 'ablow', 'abmho', 'abnet', 'abode', 'abohm', 'aboil', 'abord', 'abore', 'aborn', 'abort', 'about', 'above', 'abrim', 'abrin', 'abris', 'absey', 'absit', 'abune', 'aburn', 'abuse', 'abuts', 'abyes', 'abysm', 'acedy', 'acerb', 'acers', 'ached', 'acher', 'aches', 'achey', 'acids', 'acidy', 'acies', 'acing', 'acker', 'acmes', 'acned', 'acnes', 'acoel', 'acold', 'acone', 'acorn', 'acred', 'acres', 'acrid', 'acron', 'acros', 'acryl', 'acted', 'actin', 'acton', 'actor', 'actus', 'acute', 'acyls', 'adbot', 'adept', 'adhoc', 'adieu', 'adios', 'adits', 'adlib', 'admen', 'admin', 'admit', 'admix', 'adnex', 'adobe', 'adopt', 'adorb', 'adore', 'adorn', 'adown', 'adoze', 'adret', 'adrip', 'adsum', 'aduki', 'adult', 'adunc', 'adust', 'advew', 'advts', 'adyts', 'adzes', 'aegis', 'aeons', 'aeros', 'aesir', 'aevum', 'afion', 'afire', 'aflow'

In [29]:
# game of wordle with no repeated characters.
game_possible_words = data
def get_feedback_str(guess, target):
    feedback_str = "" # ⬜🟨⬜🟩⬜
    for c_t, c_g in zip(target, guess):
        if c_t == c_g:
            feedback_str += "🟩"
        elif c_g in target:
            feedback_str += "🟨"
        else:
            feedback_str += "⬜"
    return feedback_str
target = random.choice(game_possible_words)
def is_valid_guess(guess: str):
    if guess.lower() not in game_possible_words:
        return False
    return True
print(f"{target=}")
for i in range(6):
    guess = input("what is your next guess?")
    if guess == "q":
        break
    while not is_valid_guess(guess):
        guess = input(f"Invalid guess {guess}. What is your next guess?")
        if guess == "q":
            break
    if guess == 'q':
        break
    feedback = get_feedback_str(guess, target)
    print(f"Guess #{i+1}: {guess} -> {feedback}")
    if guess == target:
        print("won")
        break
    if i == 6:
        print("lost")

target='scour'
Guess #1: print -> ⬜🟨⬜⬜⬜
Guess #2: usage -> 🟨🟨⬜⬜⬜
Guess #3: scour -> 🟩🟩🟩🟩🟩
won


In [2]:
# how do I formally come up with the best first word in wordle? 
# this is a tree search problem right? well its something.
# I guess the first word: picks, based on how many words it eliminates or. 
# we start with a uniform prior over all 14855 words
# Then in a pragmatic way we reduce the possibility space, are we trying to increase the probability of the answer? 
# well. we know the probability of any word is 1/14855 before we start, and nothing we do will change that. 
# we can only eliminate some words from the possibility space. 
# like if they we know that some letter is not included, or that some letter must be at a particular spot.
# this feels a lot like a set cover problem with guesses connecting to a maximal number of words. maybe the edges are characters shared between them.
# this isn't exactly right tho. The characters which are within your word are how you get feedback, so you want to remove characters. 
# if you had some character which was in every word, you would gain no information from using that character.
# if you had a character which partitioned the set of choices exactly, then including that word would be great, 
# further the position information you get about that word would help you get more than just 1/2 the support eliminated.
# if you knew that when the word is included that the position of its inclusion would partition the set further, 
# then you would try to optimally position this character.
# then you balance these choices with all characters in your string to reduce your set optimally. 
# But I guess if you reduce your set to a set which isn't nice to reduce, then you've screwed yourself doing the greedy thing.
# there is a simplified version of wordle. lets call it set wordle, we do this to reduce wordle to a set covering. 
# Perhaps because it isn't the exact same its an invalid heuristic, but whatever.
# looking for a node in the graph and we only get to probe the graph and see if this node is neighbors with the target.
# kind of a binary search. find nodes which partition the search, and which allow for further paritioning information gain style.
# what is the information gain of the word (in decision trees information gain doesn't necessarily tell you everything because sometimes there are two zero information gain decisions, but they can lead to partitions which have high information gain in themselves.)
# lets talk about the support (it sounds nice).
# initial support is 14855
# then you guess and what is the smallest support you can achieve after the first guess?
# I can just enumerate over all possible words.

$min_{word} max_{target} (len support(feedback(word, target)))$

In [None]:
# this cell is me trying to calculate optimal word for wordle and realizing it seems intractible. And in python the first greedy information gathering layer is also too slow with O(n**3).
class Criteria:
    def __init__(self):
        # types of feedback. 
        # W is in the word and in the correct spot.
        # I is in the word but in the wrong spot.
        # U is not in the word in any spot.
        self.contained: set = set()
        self.located: dict = dict()
        self.not_contained: set = set()
    #     self.guesses: list = list()
    # def add_guess(self, guess, target):
        # "⬜🟨⬜🟩⬜" I could add guesses to help verbalize the guesses and feedback for a langauge model instead of giving the LLM the full support every time.
        # 
        # ...
    def is_supported(self, word):
        assert len(word) == 5, "word must be 5 chars long"
        for c, i in self.located.items():
            if word[i] != c:
                return False
        for c in self.contained:
            if c not in word:
                return False
        for c in self.not_contained:
            if c in word:
                return False
        return True
    def verbalize(self):
        # TODO: make for prompting 
        # return 
        ...
    def __repr__(self):
        return f"Criteria(contained={self.contained}, located={self.located}, not_contained={self.not_contained})"
    def __add__(self, other):
        new_contained = self.contained | other.contained
        new_located = self.located | other.located
        new_not_contained = self.not_contained | other.not_contained
        new_criteria = Criteria()
        new_criteria.contained = new_contained
        new_criteria.located = new_located
        new_criteria.not_contained = new_not_contained
        return new_criteria

def get_criteria(guess, target):
    criteria: Criteria = Criteria()
    for i, (c_t, c_g) in enumerate(zip(target, guess)):
        if c_t == c_g:
            criteria.located[c_t] = i
        elif c_g in target:
            criteria.contained.add(c_g)
        else:
            criteria.not_contained.add(c_g)
    return criteria

def find_new_support(support: list[str], criteria: Criteria):
    new_support = []
    for word in support:
        if criteria.is_supported(word):
            new_support.append(word)
    return new_support

# assuming all possible targets, what is the minimum_word max_targets{len newsupport(allwords, criteria(word, target))} ? 
# this is a 9365 * 9365 sized problem 87,703,225 words and to calculate the new support takes O(9365) again so really 
# 821,340,702,125 python string operations. which is going to be slow...

new_support = find_new_support(data, get_criteria('words', "tarts"))
len(new_support), len(data)

(194, 9365)

In [None]:
# Dynamic programming solution to Wordle?
# Given a set of criteria, you can narrow your search from there. 


In [7]:
25.7 * 9365 / 3600

66.85569444444444

In [13]:
# for guess_word in data:
#     worst_case_support = max(
#         len(find_new_support(data, get_criteria(guess_word, target))) for target in data
#     )
#     print(worst_case_support)
#     break
worst_case_support = max(
    len(find_new_support(data, get_criteria('ranes', target))) for target in data
)

In [15]:
worst_case_support

704

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3,4"
from sglang.test.test_utils import is_in_ci
from sglang.utils import wait_for_server, print_highlight, terminate_process

if is_in_ci():
    from patch import launch_server_cmd
else:
    from sglang.utils import launch_server_cmd

# This is equivalent to running the following command in your terminal

# python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b --host 0.0.0.0
model_name = "qwen/qwen2.5-0.5b"

server_process, port = launch_server_cmd(
    f"""
python3 -m sglang.launch_server --model-path {model_name} --host 0.0.0.0
"""
)

wait_for_server(f"http://localhost:{port}")


[2025-05-25 17:05:07] server_args=ServerArgs(model_path='qwen/qwen2.5-0.5b', tokenizer_path='qwen/qwen2.5-0.5b', tokenizer_mode='auto', skip_tokenizer_init=False, load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization=None, quantization_param_path=None, context_length=None, device='cuda', served_model_name='qwen/qwen2.5-0.5b', chat_template=None, completion_template=None, is_embedding=False, enable_multimodal=None, revision=None, host='0.0.0.0', port=30212, mem_fraction_static=0.88, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='fcfs', schedule_conservativeness=1.0, cpu_offload_gb=0, page_size=1, tp_size=1, pp_size=1, max_micro_batch_size=None, stream_interval=1, stream_output=False, random_seed=915520994, constrained_json_whitespace_pattern=None, watchdog_timeout=300, dist_timeout=None, download_dir=None, base_gpu_id=0, gpu_id_step=1, log_level='info', log_level_http=None, 

In [2]:
from openai import OpenAI
client = OpenAI(api_key="None", base_url=f"http://0.0.0.0:{port}/v1")

In [None]:
prompt = "What is your first guess for wordle today: "
# longer prompt from the paper https://arxiv.org/pdf/2504.20997
# '''You are an agent playing a customized version of the game Wordle. There is a five-letter target word from the
# English dictionary which you must try to guess as quickly as possible. The target word does not contain any repeated
# letters. You will incrementally construct your guess for this target word by selecting one letter of the alphabet at each
# timestep. For each letter you guess, you will be given feedback indicating if the guessed letter is either in the correct
# position for the target word, in the wrong position for the target word, or does not appear in the target word at all.
# You will receive a reward of one if your guessed word correctly matches the target word. Otherwise, rewards will
# always be zero. Your only available actions are letters of the alphabet.'''
num_attempts = 20
res = client.completions.create(model=model_name, prompt=[prompt] * num_attempts, max_tokens=5)
res

[2025-05-25 17:09:19] Prefill batch. #new-seq: 1, #new-token: 1, #cached-token: 10, token usage: 0.00, #running-req: 0, #queue-req: 0
[2025-05-25 17:09:19] Prefill batch. #new-seq: 19, #new-token: 19, #cached-token: 190, token usage: 0.00, #running-req: 1, #queue-req: 0
[2025-05-25 17:09:20] Decode batch. #running-req: 19, #token: 486, token usage: 0.00, cuda graph: True, gen throughput (token/s): 41.40, #queue-req: 0
[2025-05-25 17:09:20] Decode batch. #running-req: 15, #token: 0, token usage: 0.00, cuda graph: True, gen throughput (token/s): 5608.12, #queue-req: 0
[2025-05-25 17:09:20] INFO:     127.0.0.1:51868 - "POST /v1/completions HTTP/1.1" 200 OK


Completion(id='99a619430f984719b100fb7f2dff0230', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text='6 | A SMARTMAN\nWords300\nacters\nnumbers\nAnalyzing the\nWords300\nacters revealed their type as me\ndded & by definition\nwords. Having used up the fourteen typed letters, "\\\nHQQD" as the first guess we enter the alphabet\n-\\\n\nThen', matched_stop=None), CompletionChoice(finish_reason='length', index=1, logprobs=None, text='100 words and counts with an average of 3 words per natural/day.\\nThis is what I got:\\nI have seven main categories:\\n1. Sheers (4: 6 XL, 4 U, 1 10) > 1 word\\n2. Contact List > ', matched_stop=None), CompletionChoice(finish_reason='length', index=2, logprobs=None, text='3, 6, 11, 16, 5? Most people guess the word [enigma] first. Why?\nThis may seem ridiculous from a math perspective, but it’s the right motivation.\nThe “words” that you see in wordle each might have multiple meanings. Déjà vu', matched_stop=None), CompletionChoic

In [11]:
for choice in res.choices:
    print("-"*10)
    print(choice.text)

----------
6 | A SMARTMAN
Words300
acters
numbers
Analyzing the
Words300
acters revealed their type as me
dded & by definition
words. Having used up the fourteen typed letters, "\
HQQD" as the first guess we enter the alphabet
-\

Then
----------
100 words and counts with an average of 3 words per natural/day.\nThis is what I got:\nI have seven main categories:\n1. Sheers (4: 6 XL, 4 U, 1 10) > 1 word\n2. Contact List > 
----------
3, 6, 11, 16, 5? Most people guess the word [enigma] first. Why?
This may seem ridiculous from a math perspective, but it’s the right motivation.
The “words” that you see in wordle each might have multiple meanings. Déjà vu
----------
9? 5?
The answer to this question is:

nine
----------
70% of numbers got at least 1 letter wrong. The solutions were write me and 2000 days of coding\nLet's calculate:\nThere are 62 letters in the alphabet\nThere are a total of 9 digits\nIf each place is equally likely and a digit is placed in
----------
romeo, blake leather s

In [4]:
client.completions.create?

[0;31mSignature:[0m
[0mclient[0m[0;34m.[0m[0mcompletions[0m[0;34m.[0m[0mcreate[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmodel[0m[0;34m:[0m [0;34m"Union[str, Literal['gpt-3.5-turbo-instruct', 'davinci-002', 'babbage-002']]"[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprompt[0m[0;34m:[0m [0;34m'Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbest_of[0m[0;34m:[0m [0;34m'Optional[int] | NotGiven'[0m [0;34m=[0m [0mNOT_GIVEN[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mecho[0m[0;34m:[0m [0;34m'Optional[bool] | NotGiven'[0m [0;34m=[0m [0mNOT_GIVEN[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfrequency_penalty[0m[0;34m:[0m [0;34m'Optional[float] | NotGiven'[0m [0;34m=[0m [0mNOT_GIVEN[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlogit_bias[0m[0;34m:[0m [0;34m'Optional[Dict[str, int]] | NotGiven'[0m [0;34m=[0m [0mNOT_GI

In [12]:
# import requests

# response = requests.post(
#     f"http://localhost:{port}/generate",
#     json={
#         "text": "The capital of France is",
#         "sampling_params": {
#             "temperature": 0,
#             "max_new_tokens": 320,
#         },
#     },
# )

# print_highlight(response.json())