In [2]:
import tinker
from tinker import types
from transformers import AutoTokenizer
from datasets import load_dataset
from tinker_cookbook import renderers
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
dataset = load_dataset(
    "google-research-datasets/mbpp",
    split="train"
)

In [4]:
service_client = tinker.ServiceClient()
print("Available models:")
for item in service_client.get_server_capabilities().supported_models:
    print("- " + item.model_name)

Available models:
- deepseek-ai/DeepSeek-V3.1
- deepseek-ai/DeepSeek-V3.1-Base
- moonshotai/Kimi-K2-Thinking
- meta-llama/Llama-3.1-70B
- meta-llama/Llama-3.1-8B
- meta-llama/Llama-3.1-8B-Instruct
- meta-llama/Llama-3.2-1B
- meta-llama/Llama-3.2-3B
- meta-llama/Llama-3.3-70B-Instruct
- Qwen/Qwen3-235B-A22B-Instruct-2507
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-Base
- Qwen/Qwen3-30B-A3B-Instruct-2507
- Qwen/Qwen3-32B
- Qwen/Qwen3-4B-Instruct-2507
- Qwen/Qwen3-8B
- Qwen/Qwen3-8B-Base
- Qwen/Qwen3-VL-235B-A22B-Instruct
- Qwen/Qwen3-VL-30B-A3B-Instruct
- openai/gpt-oss-120b
- openai/gpt-oss-20b


In [5]:
training_client = service_client.create_lora_training_client(
    base_model="meta-llama/Llama-3.2-1B",
    rank=32
)

In [6]:
baseline_sampler = service_client.create_sampling_client(
    base_model="meta-llama/Llama-3.2-1B"
)

In [7]:
tokenizer = training_client.get_tokenizer()

In [8]:
renderer = renderers.get_renderer("llama3", tokenizer)

In [9]:
def build_messages(example):
    tests = "\n".join(example["test_list"])

    return [
        {
            "role": "system",
            "content": (
                "You are a senior Python engineer. "
                "Write correct, test-passing code."
            )
        },
        {
            "role": "user",
            "content": f"""
                Problem:
                {example['text']}

                Tests:
                {tests}
            """
        },
        {
            "role": "assistant",
            "content": f"""
                ```python
                {example['code']}
                ```
            """
        }
]

In [10]:
def process_example(example):
    messages = build_messages(example)

    model_input, weights = renderer.build_supervised_example(messages)
    
    tokens = model_input.tolist()
    weights = weights.tolist()

    return types.Datum(
        model_input=types.ModelInput.from_ints(tokens[:-1]),
        loss_fn_inputs={
            "target_tokens": tokens[1:],
            "weights": weights[1:]
        }
    )

In [11]:
training_data = [
    process_example(ex)
    for ex in dataset.select(range(200))
]

In [12]:
from tinker.types import AdamParams
import numpy as np

for step in range(50):
    fwd = training_client.forward_backward(
        training_data,
        loss_fn="cross_entropy"
    )
    opt = training_client.optim_step(
        AdamParams(learning_rate=1e-4)
    )

    fwd_res = fwd.result()
    opt.result()

    logprobs = np.concatenate(
        [o["logprobs"].tolist() for o in fwd_res.loss_fn_outputs]
    )
    weights = np.concatenate(
        [d.loss_fn_inputs["weights"].tolist() for d in training_data]
    )

    loss = -np.dot(logprobs, weights) / weights.sum()
    print(f"Step {step} | Loss {loss:.4f}")

Step 0 | Loss 1.5811
Step 1 | Loss 1.5173
Step 2 | Loss 1.3705
Step 3 | Loss 1.2140
Step 4 | Loss 1.0810
Step 5 | Loss 0.9881
Step 6 | Loss 0.9261
Step 7 | Loss 0.8793
Step 8 | Loss 0.8450
Step 9 | Loss 0.8150
Step 10 | Loss 0.7824
Step 11 | Loss 0.7545
Step 12 | Loss 0.7345
Step 13 | Loss 0.7151
Step 14 | Loss 0.6941
Step 15 | Loss 0.6735
Step 16 | Loss 0.6548
Step 17 | Loss 0.6383
Step 18 | Loss 0.6223
Step 19 | Loss 0.6039
Step 20 | Loss 0.5847
Step 21 | Loss 0.5656
Step 22 | Loss 0.5473
Step 23 | Loss 0.5294
Step 24 | Loss 0.5108
Step 25 | Loss 0.4916
Step 26 | Loss 0.4707
Step 27 | Loss 0.4496
Step 28 | Loss 0.4292
Step 29 | Loss 0.4089
Step 30 | Loss 0.3885
Step 31 | Loss 0.3673
Step 32 | Loss 0.3455
Step 33 | Loss 0.3230
Step 34 | Loss 0.3001
Step 35 | Loss 0.2789
Step 36 | Loss 0.2560
Step 37 | Loss 0.2341
Step 38 | Loss 0.2134
Step 39 | Loss 0.1927
Step 40 | Loss 0.1729
Step 41 | Loss 0.1535
Step 42 | Loss 0.1354
Step 43 | Loss 0.1182
Step 44 | Loss 0.1023
Step 45 | Loss 0.088

In [13]:
sampler = training_client.save_weights_and_get_sampling_client("mbpp-sft")


In [14]:
example = dataset[250]

prompt = renderer.build_generation_prompt([
    {
        "role": "system",
        "content": "You are a senior Python engineer."
    },
    {
        "role": "user",
        "content": f"""
            Problem:
            {example['text']}

            Tests:
            {chr(10).join(example['test_list'])}
        """
    }
])

stop_sequences = renderer.get_stop_sequences()

result = sampler.sample(
    prompt=prompt,
    num_samples=1,
    sampling_params=types.SamplingParams(
        max_tokens=400,
        temperature=0.2,
        stop=stop_sequences
    )
).result()

tokens = result.sequences[0].tokens
response, _ = renderer.parse_response(tokens)

print(response["content"])


            ```python
            def Sum_of_Inverse_Divisors(N, M):
    sum = 0
    for i in range(1,N + 1):
        j = i 
        inverse = 0
        while (j % M == 0):
            j = int(j / M)
            inverse = inverse + 1
        sum = sum + (inverse) 
    return sum
            ```
        


In [15]:
# Comparison between trained model and baseline
import numpy as np


test_example = dataset[250]

# Build the prompt
prompt_messages = [
    {
        "role": "system",
        "content": "You are a senior Python engineer. Write correct, test-passing code."
    },
    {
        "role": "user",
        "content": f"""
            Problem:
            {test_example['text']}

            Tests:
            {chr(10).join(test_example['test_list'])}
        """
    }
]

prompt = renderer.build_generation_prompt(prompt_messages)
stop_sequences = renderer.get_stop_sequences()

sampling_params = types.SamplingParams(
    max_tokens=400,
    temperature=0.2,
    stop=stop_sequences
)

print("=" * 80)
print("PROBLEM:")
print(test_example['text'])
print("\nTESTS:")
for test in test_example['test_list']:
    print(f"  {test}")
print("=" * 80)

# Sample from baseline model
print("\nðŸ”µ BASELINE MODEL OUTPUT:")
print("-" * 80)
baseline_result = baseline_sampler.sample(
    prompt=prompt,
    num_samples=1,
    sampling_params=sampling_params
).result()

baseline_tokens = baseline_result.sequences[0].tokens
baseline_response, _ = renderer.parse_response(baseline_tokens)
print(baseline_response["content"])

# Sample from trained model
print("\nðŸŸ¢ TRAINED MODEL OUTPUT:")
print("-" * 80)
trained_result = sampler.sample(
    prompt=prompt,
    num_samples=1,
    sampling_params=sampling_params
).result()

trained_tokens = trained_result.sequences[0].tokens
trained_response, _ = renderer.parse_response(trained_tokens)
print(trained_response["content"])

# Show expected solution
print("\nâœ… EXPECTED SOLUTION:")
print("-" * 80)
print(f"```python\n{test_example['code']}\n```")
print("=" * 80)


PROBLEM:
Write a python function to find sum of inverse of divisors.

TESTS:
  assert Sum_of_Inverse_Divisors(6,12) == 2
  assert Sum_of_Inverse_Divisors(9,13) == 1.44
  assert Sum_of_Inverse_Divisors(1,4) == 4

ðŸ”µ BASELINE MODEL OUTPUT:
--------------------------------------------------------------------------------
            Solution:
            def Sum_of_Inverse_Divisors(divisor, divisor_count):
                if divisor_count == 0:
                    return 0
                else:
                    return (Sum_of_Inverse_Divisors(divisor, divisor_count - 1) + divisor_count)
            print(Sum_of_Inverse_Divisors(6,12))
            print(Sum_of_Inverse_Divisors(9,13))
            print(Sum_of_Inverse_Divisors(1,4))
            print(Sum_of_Inverse_Divisors(2,3))
            print(Sum_of_Inverse_Divisors(3,2))
            print(Sum_of_Inverse_Divisors(4,1))
            print(Sum_of_Inverse_Divisors(5,1))
            print(Sum_of_Inverse_Divisors(6,1))
            print(S