In [None]:
import numpy as np

import torch

import warnings
warnings.filterwarnings('ignore') # i think this is because the Predibase API sometimes gives warnings for version-related stuff (it does it in the course videos too)



# Lesson four: Reward functions for Wordle

In [None]:
model_id = "Qwen/Qwen2.5-7B-Instruct"

In [None]:
def create_deployment(name: str):
    "Creates Predibase 'deployment' per utils.py, needed since I'm running this locally and can't use the one they've created, I think."
    pb = Predibase(api_token=os.environ["PREDIBASE_API_TOKEN"])
    try:
        pb.deployments.create(
            name=name,
            config=DeploymentConfig(
                accelerator='a10_24gb_100', # cheapest ($2.60/hr), I added this to the code from utils.py, thinking I'd avoid it 'helpfully' picking a more expensive option 
                base_model="qwen2-5-7b-instruct",
                min_replicas=0,
                max_replicas=1,
                cooldown_time=1200,
            )
        )
    except Exception:
        print(f"Deployment {name} already exists")

create_deployment(private_deployment_name)

In [None]:
pb = Predibase(api_token=os.environ['PREDIBASE_API_TOKEN'])
client = pb.deployments.client(private_deployment_name)

Create a custom deployed instance of the Qwen 2.5 7B instruct model, using Predibase's API, which we'll start using for inference here and then fine-tune - using Predibase's API - below.

First, I'll try a simple 0/1 reward function.

In [None]:
def wordle_reward(guess: str, secret_word: str) -> int:
    if guess.upper() == secret_word.upper():
        return 1 # correct
    else:
        return 0

And then I'll define a function from utils.py that gets a new guess from the LLM, by generating a prompt from the template/previous guesses combo.

In [None]:
# from utils.py 

# in L4 this is defined identically to 'client' above - I think the defn might change in later lessons
best_of_client = OpenAI(
    base_url=os.environ["PREDIBASE_MODEL_QWEN_URL"],
    api_key=os.environ["PREDIBASE_API_TOKEN"],
)

def generate(
    messages: List[dict],
    adapter_id: str = "",
    num_guesses: int = 1,
    temperature: float = 0.7,
    max_tokens: int = 1024,
) -> List[str]:
    if temperature > 0.0:
        completions = best_of_client.chat.completions.create(
            model=adapter_id,
            messages=messages,
            n=num_guesses,
            temperature=temperature,
            max_tokens=max_tokens
        )
        return [choice.message.content for choice in completions.choices]
    else:
        return [
            best_of_client.chat.completions.create(
                model=adapter_id,
                messages=messages,
                n=1,
                temperature=temperature,
                max_tokens=max_tokens
            ).choices[0].message.content for _ in range(num_guesses)
        ]