In [1]:
import os

from predibase import (
    Predibase,
    GRPOConfig,
    RewardFunctionsConfig,
    RewardFunctionsRuntimeConfig,
    SFTConfig,
    SamplingParamsConfig,
)
from datasets import load_dataset
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore') # i think this is because the Predibase API sometimes gives warnings for version-related stuff (it does it in the course videos too)

# Lesson eight: Actually do RFT via Predibase to train a model that plays Wordle, better

## First, train a fine-tuned model using RFT 

In [2]:
_ = load_dotenv(override=True) # populate env from .env file, reload of file - 'override' - ok here

pb = Predibase(api_token=os.environ['PREDIBASE_API_TOKEN'])

Load the training data that shows how to think about answers - they said this is from strong models like more expensive versions of Claude, but w/ the actual results/answers removed - I think to help train on the 'thinking' processes. They uploaded this data to HF, from where we'll get it.

In [3]:
dataset = load_dataset('predibase/wordle-grpo', split='train')
dataset = dataset.to_pandas()

In [6]:
# and now provide/upload the data to Predibase (I guess associated w/ my own account?)
dataset_name = 'wordle-grpo-data-tryagain'

try:
    dataset = pb.datasets.from_pandas_dataframe(dataset, name=dataset_name)
except Exception:
    dataset = pb.datasets.get(dataset_name)

In [7]:
# create a Predibase 'repo' as the structure to collect what I'm doing
repo = pb.repos.create(name='wordle', exists_ok=True)

The reward functions - more complicated than what we did in previous lessons, but still pretty straightforward. I got these from reward_functions.py.

In [8]:
def output_format_check(prompt: str, completion: str, example: dict) -> int:
    import re
    import pandas as pd

    reward = 0
    try:
        # Add synthetic <think> as it's already part of the prompt and prefilled 
        # for the assistant to more easily match the regex
        completion = "<think>" + completion

        # Check if the format matches expected pattern:
        # <think> content </think> followed by <answer> content </answer>
        regex = (
            r"^<think>\s*([^<]*(?:<(?!/?think>)[^<]*)*)\s*<\/think>\n"
            r"<guess>\s*([\s\S]*?)\s*<\/guess>$"
        )

        # Search for the regex in the completion
        match = re.search(regex, completion, re.DOTALL)
        if match is None or len(match.groups()) != 2:
            return 0

        guess = match.groups()[1]
        guess = guess.strip()

        # If the word is not 5 characters, return 0
        if len(guess) != 5:
            return 0.1

        # Check if the guess is a valid word compared to a predifined list of words
        word_list = pd.read_csv(str(example["word_list"]))
        if guess not in word_list["Word"].values:
            return 0.5

        reward = 1.0
    except Exception:
        pass

    return reward


# Reward function that checks if the guess uses the previous feedback for its next guess
def uses_previous_feedback(prompt: str, completion: str, example: dict) -> int:
    import re
    import ast

    reward = 0
    try:
        # Add synthetic <think> as it's already part of the prompt and prefilled 
        # for the assistant to more easily match the regex
        completion = "<think>" + completion

        # Extract the guess from the completion
        regex = r"<guess>\s*([\s\S]*?)\s*<\/guess>$"
        match = re.search(regex, completion, re.DOTALL)
        if match is None or len(match.groups()) != 1:
            return 0

        guess = match.groups()[0].strip()
        if len(guess) != 5:
            return 0.0

        past_guess_history = ast.literal_eval(example["past_guess_history"])
        if len(past_guess_history) == 0:
            print("Uses previous feedback reward: 0.1 (No past guesses)")
            return 0.1

        correct_letter_to_position = {}
        valid_letter_to_position = {}
        wrong_letter_to_position = {}
        for _, past_feedback in past_guess_history:
            past_feedback = past_feedback.split(" ")
            for i, fb in enumerate(past_feedback):
                if '✓' in fb:
                    if fb[0] not in correct_letter_to_position:
                        correct_letter_to_position[fb[0]] = set()
                    correct_letter_to_position[fb[0]].add(i)
                elif '-' in fb:
                    if fb[0] not in valid_letter_to_position:
                        valid_letter_to_position[fb[0]] = set()
                    valid_letter_to_position[fb[0]].add(i)
                else:
                    if fb[0] not in wrong_letter_to_position:
                        wrong_letter_to_position[fb[0]] = set()
                    wrong_letter_to_position[fb[0]].add(i)

        for idx, letter in enumerate(guess):
            # Positive reward if guess reuses letter in confirmed correct position
            if (letter in correct_letter_to_position and idx in correct_letter_to_position[letter]):
                reward += 0.2
            # Reward if letter known to be in word is used in a new position
            elif (letter in valid_letter_to_position and idx not in valid_letter_to_position[letter]):
                reward += 0.1
            # Penalize reuse of known-in-word letter in same position (not exploring)
            elif (letter in valid_letter_to_position and idx in valid_letter_to_position[letter]):
                reward -= 0.2
            # Penalize use of known-absent letter
            elif letter in wrong_letter_to_position:
                reward -= 0.5
            else:
                # Reward unknown letters with partial credit for exploration
                reward += 0.05

    except Exception:
        return 0.0

    return reward


# Reward function that computes normalized information gain of the guess, i.e.,
# does the new guess reduce the uncertainty of the secret word the most
def guess_value(prompt: str, completion: str, example: dict) -> int:
    import math
    import re
    import ast
    import pandas as pd

    def validate_guess(secret: str, guess: str, raw_feedback: bool = False) -> str:
        feedback = []
        secret_list = list(secret)

        # Check for correct positions
        for i, (g_char, s_char) in enumerate(zip(guess, secret)):
            if g_char == s_char:
                feedback.append(f"{g_char}(✓) ")
                secret_list[i] = None
            else:
                feedback.append(None)

        # Check for misplaced letters
        for i, g_char in enumerate(guess):
            if feedback[i] is None:
                if g_char in secret_list:
                    feedback[i] = f"{g_char}(-) "
                    secret_list[secret_list.index(g_char)] = None
                else:
                    feedback[i] = f"{g_char}(x) "

        if raw_feedback:
            return feedback
        return "".join(feedback).strip()

    def filter_candidates(all_candidate_words, past_guesses):
        filtered = []
        for word in all_candidate_words:
            valid = True
            for past_guess, past_feedback in past_guesses:
                # Compute what the feedback would be if 'word' were the secret.
                candidate_feedback = validate_guess(word, past_guess)
                if candidate_feedback != past_feedback:
                    valid = False
                    break
            if valid:
                filtered.append(word)
        return filtered

    def compute_normalized_information_gain(all_candidate_words, past_guesses, guess):
        # First, filter the candidate words based on past guesses.
        candidates = filter_candidates(all_candidate_words, past_guesses)
        total_candidates = len(candidates)

        # If no candidates remain, return zeros.
        if total_candidates == 0:
            return 0.0, 0.0

        # Current uncertainty (entropy) before the guess.
        current_entropy = math.log2(total_candidates)

        # Partition candidates by the feedback pattern that would be produced by the current guess.
        feedback_groups = {}
        for word in candidates:
            # Get the raw feedback list (e.g., ['B(✓) ', 'R(✓) ', 'A(x) ', ...])
            feedback = validate_guess(word, guess, raw_feedback=True)
            # Create a simple representation for the feedback pattern.
            # '1' for correct position, '0' for wrong position, 'x' for letter not in word.
            feedback_pattern = "".join('1' if "✓" in fb else ('0' if "-" in fb else 'x') 
                                    for fb in feedback)
            feedback_groups.setdefault(feedback_pattern, []).append(word)

        expected_entropy = 0
        max_info_gain = 0
        # For each feedback group, compute its contribution to the expected entropy and the info gain.
        for group in feedback_groups.values():
            group_size = len(group)
            p = group_size / total_candidates
            # Entropy if this feedback is received.
            group_entropy = math.log2(group_size) if group_size > 0 else 0
            expected_entropy += p * group_entropy
            # Information gain for this feedback outcome.
            info_gain = current_entropy - group_entropy
            max_info_gain = max(max_info_gain, info_gain)

        # The expected gain is the reduction in entropy on average.
        expected_gain = current_entropy - expected_entropy

        # Normalize by the maximum possible gain, which is current_entropy (if you reduced to one candidate).
        normalized_expected_gain = expected_gain / current_entropy if current_entropy > 0 else 0
        normalized_max_gain = max_info_gain / current_entropy if current_entropy > 0 else 0

        return normalized_expected_gain, normalized_max_gain

    reward = 0
    try:
        # Add synthetic <think> as it's already part of the prompt and prefilled 
        # for the assistant to more easily match the regex
        completion = "<think>" + completion

        # Extract the guess from the completion
        regex = r"<guess>\s*([\s\S]*?)\s*<\/guess>$"
        match = re.search(regex, completion, re.DOTALL)
        if match is None or len(match.groups()) != 1:
            return 0

        guess = match.groups()[0].strip()
        if len(guess) != 5:
            return 0.0

        # Load the word list
        word_list = pd.read_csv(str(example["word_list"]))
        if guess not in word_list["Word"].values:
            return 0.0

        # Extract past guesses and feedback
        past_guess_history = ast.literal_eval(example["past_guess_history"])

        # Compute normalized information gain
        normalized_expected_gain, _ = compute_normalized_information_gain(
            word_list["Word"].values,
            past_guess_history,
            guess
        )

        # Compute reward based on normalized information gain
        reward = normalized_expected_gain
    except Exception:
        return 0.0

    return reward


And, ok, I should be able to actually train the updated model w/ just one call. The only things I need to provide are the base model's name, to specify the reward functions (the three functions just above), give a few config params, and the dataset on which to train (the one I uploaded above). Then Predibase just goes to town, apparently.

In [9]:
pb.finetuning.jobs.create(
    config=GRPOConfig(
        base_model="qwen2-5-7b-instruct",
        reward_fns=RewardFunctionsConfig(
            runtime=RewardFunctionsRuntimeConfig(
                packages=["pandas"]
            ),
            functions={
                "output_format_check": output_format_check,
                "uses_previous_feedback": uses_previous_feedback,
                "guess_value": guess_value,
            }
        ),
        sampling_params=SamplingParamsConfig(max_tokens=4096),
        num_generations=4
    ),
    dataset=dataset,
    repo="wordle",
    description="Wordle GRPO"
)

Successfully requested finetuning of qwen2-5-7b-instruct from base as `wordle/1`. (Job UUID: 91feeade-8a29-431c-ab6a-a83b4a64021e).



FinetuningJob(uuid='91feeade-8a29-431c-ab6a-a83b4a64021e', base_model='qwen2-5-7b-instruct', dataset='wordle-grpo-data-tryagain', description='Wordle GRPO', target_repo='wordle', target_version_tag=1, accelerator='', status='pending', params=GRPOConfig(base_model='qwen2-5-7b-instruct', adapter='lora', task='grpo', epochs=None, learning_rate=None, rank=None, target_modules=None, enable_early_stopping=None, apply_chat_template=None, lr_scheduler=None, optimizer=None, lora_alpha=None, lora_dropout=None, warmup_ratio=None, effective_batch_size=None, beta=None, num_generations=4, sampling_params=SamplingParamsConfig(temperature=None, top_p=None, top_k=None, max_tokens=4096), reward_fns=RewardFunctionsConfig(runtime=RewardFunctionsRuntimeConfig(packages=['pandas']), functions={'guess_value': RewardFunction(encoded_fn='ZGVmIGd1ZXNzX3ZhbHVlKHByb21wdD...'), 'output_format_check': RewardFunction(encoded_fn='ZGVmIG91dHB1dF9mb3JtYXRfY2hlY2...'), 'uses_previous_feedback': RewardFunction(encoded_fn=

The lecture just basically ends here - it doesn't include any code or specific examples of using the trained model. (It shows some graphs that they generated w/ this code.) I came back as below and wrote my own code/used the code from lesson three to actually use the fine-tuned model.

So, uploading the data didn't work the first time - I think it should have taken at most a few mins, and it ran for 30m+ and judging from the Data UI on the Predibase site, is still running. I let it sit for a while and checked again before calling it a day - it actually ran for more than 24 hours. I contacted support, and they got back to me w/i that day and said they'd fixed the problem. I tried it myself and it succeeded, in less than a minute.

Then, I ran the above code to do a training run and build a fine-tuned model. After ~30m of queuing time, Predibase started training. I let it run for ~3.5 hours on the default A100 hardware and got 237 training steps, of the 1000 that is the default for GRPOConfig. Then I stopped it, as that much of a run had cost ~$20 for the 1.85M tokens the training process had used (at the $10/1M tokens that Predibase charges for this kind of fine tuning) and so I'd used most of the $25 in free credits Predibase provides. I stopped it using the web UI. 

I didn't know if the resulting model would be usable, but it does at least run without giving me errors, as the text below shows. It doesn't do particularly well.

## Then use the fine-tuned model to play Wordle

I use the code from lesson three - in that lecture the code uses a non-fine-tuned model - a base Qwen model - to play Wordle. Here I instead used the fine-tuned model that I trained above. 

This runs, so that's good. However, it's not great - for example, it looks like it doesn't take understand feedback that tells it a letter is correct but in the wrong position. I don't know if this is something that's a result of me stopping the fine-tuning before it ran its specified number of steps, or if there's another issue. For now, I won't worry about spending more or digging deeper.

In [24]:
_ = load_dotenv(override=True) # populate env from .env file, reload of file - 'override' - ok here

In [25]:
from openai import OpenAI # lesson 3 uses OpenAI completions code, but with a base URL on the Predibase site, as below

# Predibase provides a OpenAI chat completions v1 endpoint, which is what the tutorial uses
# the model URL changes by user and I've created my own based on docs at https://docs.predibase.com/user-guide/inference/migrate-openai
client = OpenAI(
    base_url=os.environ['PREDIBASE_QWEN_MODEL_URL'], 
    api_key=os.environ['PREDIBASE_API_TOKEN'],
)

In [26]:
from transformers import AutoTokenizer

base_model_id = 'Qwen/Qwen2.5-7B-Instruct'
tokenizer = AutoTokenizer.from_pretrained(base_model_id)

In [27]:
SYSTEM_PROMPT = """
You are playing Wordle, a word-guessing game.

### Game Rules:
- You have **6 tries** to guess a secret **5-letter** word.
- Each guess must be a valid **5-letter English word**.
- After each guess, you will receive feedback indicating how close 
your guess was.

### Feedback Format:
Each letter in your guess will receive one of three symbols:
1. ✓ : The letter is in the word and in the CORRECT position.
2. - : The letter is in the word but in the WRONG position.
3. x : The letter is NOT in the word.

### Example:
Secret Word: BRISK

Guess 1: STORM → Feedback: S(-) T(x) O(x) R(-) M(x)
Guess 2: BRAVE → Feedback: B(✓) R(✓) A(x) V(x) E(x)
Guess 3: BRISK → Feedback: B(✓) R(✓) I(✓) S(✓) K(✓)

### Response Format:
Think through the problem and feedback step by step. Make sure to 
first add your step-by-step thought process within <think> </think> 
tags. Then, return your guessed word in the following format: 
<guess> guessed-word </guess>.
"""

In [28]:
from dataclasses import dataclass
from enum import Enum
from typing import List

class LetterFeedback(Enum):
    CORRECT = "✓"
    WRONG_POS = "-"
    WRONG_LETTER = "x"

@dataclass
class GuessWithFeedback:
    guess: str
    feedback: List[LetterFeedback]

    def __repr__(self) -> str:
        "A human (and LLM)-readable string showing the guess and letter-by-letter feedback"
        feedback_str = " ".join(f"{letter}({fb.value})" for letter, fb in zip(self.guess, self.feedback))
        return f"{self.guess} -> Feedback: {feedback_str}"

In [29]:
def render_user_prompt(past_guesses: List[GuessWithFeedback]) -> str:
    "Creates a user-facing prompt that includes past guesses and the matching feedback."
    prompt = "Make a new 5-letter word guess."
    if past_guesses:
        prompt += "\n\nHere is some previous feedback:"
        for i, guess in enumerate(past_guesses):
            prompt += f"\nGuess {i+1}: {guess}"
    return prompt

In [30]:
def render_prompt(past_guesses: List[GuessWithFeedback]) -> str:
    """Formats a full chat prompt using a system message, user prompt, and assistant 
    preamble to start the model's step-by-step reasoning."""
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": render_user_prompt(past_guesses)
        },
        {
            "role": "assistant",
            "content": "Let me solve this step by step.\n<think>"
        }
    ]

    return tokenizer.apply_chat_template(messages, tokenize=False, continue_final_message=True)

In [31]:
def generate_stream(prompt: str, adapter_id: str = "") -> str:
    "Streams a model-generated response from a prompt in real time, printing as tokens arrive."
    response = client.completions.create(
        model=adapter_id,
        prompt=prompt,
        temperature=0.0, # deterministic, for evaluation
        max_tokens=2048,
        stream=True,
    )

    completion = ""
    for chunk in response:
        if chunk.choices[0].text is not None:
            content = chunk.choices[0].text
            print(content, end="", flush=True)
            completion += content
    print

    return completion

And check that I can use the above code to get one response, from the fine-tuned model.

In [32]:
secret_word = "CRAFT"

past_guesses = [
    GuessWithFeedback(
        "CRANE", [
            LetterFeedback.CORRECT,
            LetterFeedback.CORRECT,
            LetterFeedback.CORRECT,
            LetterFeedback.WRONG_LETTER,
            LetterFeedback.WRONG_LETTER,
        ]),
    GuessWithFeedback(
        "CRASH", [
            LetterFeedback.CORRECT,
            LetterFeedback.CORRECT,
            LetterFeedback.CORRECT,
            LetterFeedback.WRONG_LETTER,
            LetterFeedback.WRONG_LETTER,
        ]),
]

prompt = render_prompt(past_guesses)
print(prompt)

<|im_start|>system

You are playing Wordle, a word-guessing game.

### Game Rules:
- You have **6 tries** to guess a secret **5-letter** word.
- Each guess must be a valid **5-letter English word**.
- After each guess, you will receive feedback indicating how close 
your guess was.

### Feedback Format:
Each letter in your guess will receive one of three symbols:
1. ✓ : The letter is in the word and in the CORRECT position.
2. - : The letter is in the word but in the WRONG position.
3. x : The letter is NOT in the word.

### Example:
Secret Word: BRISK

Guess 1: STORM → Feedback: S(-) T(x) O(x) R(-) M(x)
Guess 2: BRAVE → Feedback: B(✓) R(✓) A(x) V(x) E(x)
Guess 3: BRISK → Feedback: B(✓) R(✓) I(✓) S(✓) K(✓)

### Response Format:
Think through the problem and feedback step by step. Make sure to 
first add your step-by-step thought process within <think> </think> 
tags. Then, return your guessed word in the following format: 
<guess> guessed-word </guess>.
<|im_end|>
<|im_start|>user
Make

In [34]:
finetuned_adapter_id = 'wordle/1'

ft_completion = generate_stream(prompt, adapter_id=finetuned_adapter_id)

 
- From Guess 1 (CRANE), we know that C, R, and A are correct letters in the word.
- From Guess 2 (CRASH), we can confirm C and R are correct, but they are in the right positions as well.
- The letter A is also correct and in the right position.
- N and E from CRANE are not in the word.
- S and H from CRASH are not in the word.
- This leaves us with the letters B, D, G, I, L, M, P, T, U, V, W, X, Y, Z to choose from for our next guess.
- We need to include C, R, and A in our next guess and avoid S, H, N, E.
</think>
<guess> BRICK </guess>

And then add in the code, again from lesson three, to play a full game.

In [35]:
import re

def get_feedback(guess: str, secret_word: str) -> List[LetterFeedback]:
    "Generate a list of tripartite feedback for each letter of the guess."
    valid_letters = set(secret_word)
    feedback = []
    for letter, secret_letter in zip(guess, secret_word):
        if letter == secret_letter:
            feedback.append(LetterFeedback.CORRECT)
        elif letter in valid_letters:
            feedback.append(LetterFeedback.WRONG_POS)
        else:
            feedback.append(LetterFeedback.WRONG_LETTER)
    return feedback

In [36]:
def next_turn(
    past_guesses: List[GuessWithFeedback],
    secret_word: str,
    adapter_id = ""
):
    """Do a full turn: build a prompt incorporating previous guesses, 
    use the LLM to get a new guess, and check and score it."""
    prompt = render_prompt(past_guesses)
    completion = generate_stream(prompt, adapter_id)
    
    match = re.search(r"<guess>\s*(.*?)\s*</guess>", completion, re.DOTALL)
    if not match:
        print(completion)
        raise RuntimeError("Invalid guess - no guess found in response.")
    guess = match.group(1).upper()
    
    feedback = get_feedback(guess, secret_word)
    past_guesses.append(GuessWithFeedback(guess, feedback))
    
    print("\n\n")
    print(("-" * 100) + "\n")
    for past_guess in past_guesses:
        print(past_guess)

    if guess == secret_word:
        print("SUCCESS - win")
    elif len(past_guesses) >= 6:
        print("LOSS - ran out of guesses")

In [37]:
secret_word = "BRICK"
past_guesses = []

adapter_id = finetuned_adapter_id

In [38]:
next_turn(past_guesses, secret_word, adapter_id)

 Considering the rules and feedback format, I need to make an initial guess that covers a wide range of letters. A common starting word is "CRANE" as it includes vowels and consonants which are frequently used in English words.</think>
<guess> CRANE </guess>


----------------------------------------------------------------------------------------------------

CRANE -> Feedback: C(-) R(✓) A(x) N(x) E(x)


In [39]:
next_turn(past_guesses, secret_word, adapter_id)

 Given the feedback from the first guess, we know:
- R is in the correct position (3rd letter).
- C, A, N, and E are not part of the word.
The possible letters are now limited to the ones not mentioned (B, D, F, G, H, I, J, K, L, M, P, Q, S, T, U, V, W, X, Y, Z).

Since R is already placed correctly, we need to find a new combination of letters that fits the criteria.
Considering common 5-letter words, let's try a word that includes R and avoids C, A, N, and E.</think>
<guess> STRAP </guess>


----------------------------------------------------------------------------------------------------

CRANE -> Feedback: C(-) R(✓) A(x) N(x) E(x)
STRAP -> Feedback: S(x) T(x) R(-) A(x) P(x)


In [40]:
next_turn(past_guesses, secret_word, adapter_id)

 
From Guess 1 (CRANE):
- R is correct and in the right position (✓).
- C, A, N, and E are not in the word.

From Guess 2 (STRAP):
- A is correct and in the right position (✓).
- S, T, R, and P are not in the word.

The word must contain R and A. It cannot contain C, N, E, S, T, or P.
</think>
<guess> BRICK </guess>


----------------------------------------------------------------------------------------------------

CRANE -> Feedback: C(-) R(✓) A(x) N(x) E(x)
STRAP -> Feedback: S(x) T(x) R(-) A(x) P(x)
BRICK -> Feedback: B(✓) R(✓) I(✓) C(✓) K(✓)
SUCCESS - win


In [49]:
secret_word = "SALES"
past_guesses = []

next_turn(past_guesses, secret_word, adapter_id)

 
Since this is the first guess, I'll choose a common 5-letter word that has a mix of vowels and consonants to maximize the chances of getting some correct positions or letters. A good starting word could be "CRANE".
</think>
<guess> CRANE </guess>


----------------------------------------------------------------------------------------------------

CRANE -> Feedback: C(x) R(x) A(-) N(x) E(-)


In [50]:
next_turn(past_guesses, secret_word, adapter_id)

 Given the feedback from the first guess, none of the letters C, R, A, N, or E appear in the secret word. This means we need to choose a completely different set of letters for our next guess. I'll consider common 5-letter words that don't include these letters. A good choice could be a word like "FLUTE" which is a common English word.</think>
<guess> FLUTE </guess>


----------------------------------------------------------------------------------------------------

CRANE -> Feedback: C(x) R(x) A(-) N(x) E(-)
FLUTE -> Feedback: F(x) L(-) U(x) T(x) E(-)


In [51]:
next_turn(past_guesses, secret_word, adapter_id)

 From the feedback, we know that none of the letters in CRANE or FLUTE are correct. This means we need to choose a completely different set of letters for our next guess.
I'll consider common 5-letter words that don't include any of these letters (C, R, A, N, E, F, L, U). Some good options could be words like 'SWAMP', 'BREAD', 'GHOST', etc.
I'll go with 'SWAMP' as it's a common 5-letter word and doesn't include any of the disallowed letters.</think>
<guess> SWAMP </guess>


----------------------------------------------------------------------------------------------------

CRANE -> Feedback: C(x) R(x) A(-) N(x) E(-)
FLUTE -> Feedback: F(x) L(-) U(x) T(x) E(-)
SWAMP -> Feedback: S(✓) W(x) A(-) M(x) P(x)


In [52]:
next_turn(past_guesses, secret_word, adapter_id)

 
From the feedback:
- Guess 1 (CRANE): No letters are correct.
- Guess 2 (FLUTE): Only 'T' is correct.
- Guess 3 (SWAMP): 'S' is correct.

The word must contain 'S' and 'T'. Let's try to form a new word that includes these letters and avoids the incorrect ones from previous guesses.
</think>
<guess> STAND </guess>


----------------------------------------------------------------------------------------------------

CRANE -> Feedback: C(x) R(x) A(-) N(x) E(-)
FLUTE -> Feedback: F(x) L(-) U(x) T(x) E(-)
SWAMP -> Feedback: S(✓) W(x) A(-) M(x) P(x)
STAND -> Feedback: S(✓) T(x) A(-) N(x) D(x)


In [53]:
next_turn(past_guesses, secret_word, adapter_id)

 
From the feedback:
- Guess 1 (CRANE): No letters are correct.
- Guess 2 (FLUTE): Only 'T' is correct.
- Guess 3 (SWAMP): 'S' is correct.
- Guess 4 (STAND): 'S' is correct.

The word must contain 'S' and no other letters from these guesses can be used again. The remaining letters are A, C, E, F, L, M, N, P, R, W, and T. 

Since 'S' is already confirmed, we need to avoid using any of the letters from the previous guesses that were incorrect or not in the right place. This leaves us with A, C, E, F, L, M, N, P, R, W, and T.

Let's try a new word that includes 'S' and avoids the letters from the previous guesses.
</think>
<guess> SMILE </guess>


----------------------------------------------------------------------------------------------------

CRANE -> Feedback: C(x) R(x) A(-) N(x) E(-)
FLUTE -> Feedback: F(x) L(-) U(x) T(x) E(-)
SWAMP -> Feedback: S(✓) W(x) A(-) M(x) P(x)
STAND -> Feedback: S(✓) T(x) A(-) N(x) D(x)
SMILE -> Feedback: S(✓) M(x) I(x) L(-) E(-)


In [54]:
next_turn(past_guesses, secret_word, adapter_id)

 
- From guess 1 (CRANE), we know that none of the letters C, R, A, N, E are in the correct positions.
- From guess 2 (FLUTE), we know that none of the letters F, L, U are in the correct positions.
- From guess 3 (SWAMP), we know that S is in the first position, and W, A, M, P are not in the correct positions.
- From guess 4 (STAND), we know that S is in the first position, and T, A, N, D are not in the correct positions.
- From guess 5 (SMILE), we know that S is in the first position, and M, I, L, E are not in the correct positions.

Combining all this information, we can deduce that the first letter must be S. The second letter cannot be T, A, N, D, or E, so it could be any other letter. The third letter cannot be A, M, P, I, L, E, so it could be any other letter. The fourth letter cannot be T, A, N, D, or E, so it could be any other letter. The fifth letter cannot be M, I, L, E, so it could be any other letter.

A good guess would be a word starting with S, containing no repeated le