In [1]:
import os
from dotenv import load_dotenv
from typing import TypedDict
import csv
import random
import art
from art.local import LocalBackend
import weave
from pydantic import BaseModel
import openai
import re
import Levenshtein
import asyncio

  from .autonotebook import tqdm as notebook_tqdm


INFO 07-29 17:10:21 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 07-29 17:10:21 [__init__.py:239] Automatically detected platform cuda.


2025-07-29 17:10:22,276	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
SYSTEM_PROMPT = """
You are an expert in transforming standard German into German Spoon Language or Löffelsprache.
Given a German sentence, you will transform it into German Spoon Language.
Follow these strict rules:

Let x be any of the following vowels or vowel pairs:
{ei, ie, au, eu, äu, a, e, i, o, u}
For each occurrence of x (here a variable), replace it with xlewx.
Example: a → alewa, ei → eilewei
Always match vowel pairs first, before checking for single vowels.
After a replacement, continue from the end of the replaced text — do not reprocess inside the result.
Preserve casing:
If the original x begins with an uppercase letter, only the first letter of the xlewx replacement is uppercase.
Example: A → Alewa, Ei → Eilewei, Au → Aulewau
Example words:
Hallo -> Halewallolewo
Eier -> Eileweielewer
Do not apply transformations recursively.
Return only the converted sentence, wrapped in <spoon> ... </spoon> tags.
Do not explain your transformation.
"""

load_dotenv()
WANDB_API_KEY = os.getenv("WANDB_API_KEY")

class SentencePair(BaseModel):
    german: str
    spoon: str
    
def load_data(file_path: str) -> list[SentencePair]:
    with open(file_path, "r") as file:
        reader = csv.reader(file)
        next(reader)
        return [SentencePair(german=row[0], spoon=row[1]) for row in reader]

def draw_sample(data: list[SentencePair]) -> SentencePair:
    return random.choice(data)


In [3]:
@weave.op
@art.retry(exceptions=(openai.LengthFinishReasonError,))
async def rollout(model: art.Model, pair: SentencePair) -> art.Trajectory:
    trajectory = art.Trajectory(
        messages_and_choices=[
            {
                "role": "system",
                "content": SYSTEM_PROMPT,
            }
        ],
        metadata={
            "notebook-id": "SpoonRL",
        },
        reward=0,
    )
    trajectory.messages_and_choices.append({
        "role": "user",
        "content": pair.german,
    })
    messages = trajectory.messages()
    try:
        client = model.openai_client()
        chat_completion = await client.chat.completions.create(
            model=model.get_model_name(),
            messages=messages,
            max_tokens=2048,
        )
    except openai.LengthFinishReasonError as e:
        raise e
    except Exception as e:
        print("Caught exception generating chat comopletion")
        print(e)
        global failing_trajectory
        failing_trajectory = trajectory
        raise e
    
    choice = chat_completion.choices[0]
    content = choice.message.content
    
    format_reward = 0
    match = re.search(r"<spoon>(.*?)</spoon>", content, re.DOTALL)
    if match:
        return match.group(1).strip()
        format_reward = 1
    else:
        match = content
    dist = Levenshtein.distance(match, pair.spoon)
    max_len = max(len(match), len(pair.spoon), 1)
    spoon_reward = 1.0 - dist / max_len  
    
    reward = spoon_reward * 0.8 + format_reward * 0.2
    trajectory.reward = reward
    return trajectory    

In [4]:
data = load_data("../data/german_spoon.csv")

In [5]:
random.seed(42)
backend = LocalBackend()

In [6]:
model = art.TrainableModel(
    name="001-german-spoon",
    project="SpoonRL",
    base_model="wambosec/Qwen2.5-7B-Instruct-spoon-language-SFT",
)
await model.register(backend)


Please restructure your imports with 'import unsloth' at the top of your file.
  import unsloth  # type: ignore


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!




INFO 07-29 17:11:09 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 07-29 17:11:09 [__init__.py:239] Automatically detected platform cuda.
==((====))==  Unsloth 2025.5.1: Fast Qwen2 patching. Transformers: 4.51.3. vLLM: 0.8.5.post1.
   \\   /|    NVIDIA A100 80GB PCIe. Num GPUs = 1. Max memory: 79.256 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading wambosec/Qwen2.5-7B-Instruct-spoon-language-SFT with actual GPU utilization = 78.55%
Unsloth: Your GPU has CUDA compute capability 8.0 with VRAM = 79.26 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 32768. Num Sequences = 368.
Unsloth: vLLM's KV Cache can use up to 48.03 GB. Also swap space = 6 GB.
INF

Loading safetensors checkpoint shards:   0% Completed | 0/7 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  14% Completed | 1/7 [00:01<00:09,  1.54s/it]
Loading safetensors checkpoint shards:  29% Completed | 2/7 [00:02<00:07,  1.47s/it]
Loading safetensors checkpoint shards:  43% Completed | 3/7 [00:04<00:06,  1.56s/it]
Loading safetensors checkpoint shards:  57% Completed | 4/7 [00:06<00:04,  1.60s/it]
Loading safetensors checkpoint shards:  71% Completed | 5/7 [00:07<00:02,  1.45s/it]
Loading safetensors checkpoint shards:  86% Completed | 6/7 [00:08<00:01,  1.32s/it]
Loading safetensors checkpoint shards: 100% Completed | 7/7 [00:09<00:00,  1.06s/it]
Loading safetensors checkpoint shards: 100% Completed | 7/7 [00:09<00:00,  1.29s/it]



INFO 07-29 17:11:36 [punica_selector.py:18] Using PunicaWrapperGPU.
INFO 07-29 17:11:36 [model_runner.py:1140] Model loading took 5.2451 GiB and 9.983583 seconds
INFO 07-29 17:12:03 [worker.py:287] Memory profiling takes 26.02 seconds
INFO 07-29 17:12:03 [worker.py:287] the current vLLM instance can use total_gpu_memory (79.26GiB) x gpu_memory_utilization (0.79) = 62.25GiB
INFO 07-29 17:12:03 [worker.py:287] model weights take 5.25GiB; non_torch_memory takes 0.09GiB; PyTorch activation peak memory takes 8.29GiB; the rest of the memory reserved for KV Cache is 48.62GiB.
INFO 07-29 17:12:03 [executor_base.py:112] # cuda blocks: 56904, # CPU blocks: 7021
INFO 07-29 17:12:03 [executor_base.py:117] Maximum concurrency for 32768 tokens per request: 27.79x
INFO 07-29 17:12:09 [model_runner.py:1450] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.

Capturing CUDA graph shapes: 100%|██████████| 49/49 [01:17<00:00,  1.59s/it]


INFO 07-29 17:13:27 [model_runner.py:1592] Graph capturing finished in 78 secs, took 1.58 GiB
INFO 07-29 17:13:27 [llm_engine.py:437] init engine (profile, create kv cache, warmup model) took 110.60 seconds
Unsloth: Just some info: will skip parsing ['post_feedforward_layernorm', 'k_norm', 'pre_feedforward_layernorm', 'q_norm']
Unsloth: Just some info: will skip parsing ['post_feedforward_layernorm', 'k_norm', 'pre_feedforward_layernorm', 'q_norm']


Unsloth 2025.5.1 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.
