In [1]:
from datetime import datetime

# Base model for all experiments
BASE_MODEL = "Qwen/Qwen3-4B-Instruct-2507"  # change for experiments

# Which dataset slice to use
DATASET = "sample"  # "sample" or "full"

# Simple preset configs so you can quickly compare runs in W&B
EXPERIMENT_PRESETS = {
    "baseline": {
        # This matches the original single-config settings
        "max_train_rows": 300,
        "lora_rank": 16,
        "lora_alpha": 32,
        "learning_rate": 1e-4,
        "num_epochs": 3,
        "batch_size": 8,
    },
    "light": {
        # quicker / cheaper run
        "max_train_rows": 150,
        "lora_rank": 8,
        "lora_alpha": 16,
        "learning_rate": 2e-4,
        "num_epochs": 2,
        "batch_size": 8,
    },
    "heavy": {
        # slower / more thorough run
        "max_train_rows": 500,
        "lora_rank": 32,
        "lora_alpha": 64,
        "learning_rate": 5e-5,
        "num_epochs": 4,
        "batch_size": 8,
    },
}

# Pick which preset to run
EXPERIMENT_NAME = "light"  # change to "light" or "heavy" for other runs

cfg = EXPERIMENT_PRESETS[EXPERIMENT_NAME]

MAX_TRAIN_ROWS = cfg["max_train_rows"]
LORA_RANK      = cfg["lora_rank"]
LORA_ALPHA     = cfg["lora_alpha"]
LEARNING_RATE  = cfg["learning_rate"]
NUM_EPOCHS     = cfg["num_epochs"]
BATCH_SIZE     = cfg["batch_size"]

# Tag used in logging / W&B run name
RUN_TAG = f"piglatin-{EXPERIMENT_NAME}"

# Short model name + timestamp are used for W&B run names
MODEL_SHORT_NAME = BASE_MODEL.split("/")[-1]
TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

print("Experiment:", EXPERIMENT_NAME)
print("Max train rows:", MAX_TRAIN_ROWS)
print("LoRA rank:", LORA_RANK)
print("Learning rate:", LEARNING_RATE)
print("Num epochs:", NUM_EPOCHS)

Experiment: baseline
Max train rows: 300
LoRA rank: 16
Learning rate: 0.0001
Num epochs: 3


In [2]:
# --- Weights & Biases (W&B) experiment config ---

import wandb

# Turn W&B tracking on/off for this run
USE_WANDB = True  # set to False if you want to disable logging temporarily

# These identify where the run shows up in your W&B account
WANDB_PROJECT = "tinker-hello-world"  # you can rename later if you want
WANDB_ENTITY = None  # or "your-wandb-username" if you're using a team/org

# How W&B should behave
WANDB_MODE = "online"  # change to "offline" if you don't have internet

# Build a readable, unique run name like:
# piglatin-baseline-Qwen3-4B-Instruct-2507-20251201_153000
# (MODEL_SHORT_NAME and TIMESTAMP come from the first config cell)
RUN_NAME = f"{RUN_TAG}-{MODEL_SHORT_NAME}-{TIMESTAMP}"

print("Experiment:", EXPERIMENT_NAME)
print("W&B run name:", RUN_NAME)

Experiment: baseline
W&B run name: piglatin-baseline-Qwen3-4B-Instruct-2507-20251201_212320


In [3]:
from pathlib import Path

def find_repo_root(start=None):
    p = Path(start or Path.cwd()).resolve()
    for parent in [p, *p.parents]:
        if (parent / "pyproject.toml").exists() or (parent / ".git").exists():
            return parent
    return p

REPO_ROOT = find_repo_root()

data_rel = "data/piglatin/sample.jsonl" if DATASET == "sample" else "data/piglatin/full/piglatin.jsonl"
DATA_PATH = REPO_ROOT / data_rel

assert DATA_PATH.exists(), f"Missing dataset at {DATA_PATH}. Run: python scripts/build_piglatin_dataset.py"
print("CWD:", Path.cwd())
print("DATA_PATH:", DATA_PATH)

CWD: C:\Users\user\Desktop\tinker-hello-world\notebooks
DATA_PATH: C:\Users\user\Desktop\tinker-hello-world\data\piglatin\sample.jsonl



# Notebook 03 — Hello, Tinker LoRA

**Objective:** Fine-tune a base model using Tinker's managed LoRA training API.

**Prerequisites:** Completed `00_check_env.ipynb`, active `TINKER_API_KEY`, and access to the Tinker service.

**Estimated run time:** ~5 minutes to configure plus however long your training job runs server-side.

### Before you start
1. Confirm `python test_env.py` prints `True` for the API key.
2. Run the environment check notebook whenever you set up a new machine or virtual environment.
3. In this notebook you'll load your credentials, inspect available models, pick a base model, and initialize a LoRA training client.


In [4]:
# Load credentials and confirm package versions before proceeding.
from dotenv import load_dotenv
import os
import tinker
import transformers

load_dotenv()

print("TINKER_API_KEY loaded:", bool(os.getenv("TINKER_API_KEY")))
print("Tinker SDK version:", tinker.__version__)
print("Transformers version:", transformers.__version__)

TINKER_API_KEY loaded: True
Tinker SDK version: 0.3.0
Transformers version: 4.57.1


In [5]:
# Inspect which base models are available to your account.
# Tinker reads TINKER_API_KEY from the environment
service_client = tinker.ServiceClient()

caps = service_client.get_server_capabilities()
print("Supported models:")
for m in caps.supported_models:
    print("-", m.model_name)

Supported models:
- deepseek-ai/DeepSeek-V3.1
- deepseek-ai/DeepSeek-V3.1-Base
- meta-llama/Llama-3.1-70B
- meta-llama/Llama-3.1-8B
- meta-llama/Llama-3.1-8B-Instruct
- meta-llama/Llama-3.2-1B
- meta-llama/Llama-3.2-3B
- meta-llama/Llama-3.3-70B-Instruct
- Qwen/Qwen3-235B-A22B-Instruct-2507
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-Base
- Qwen/Qwen3-30B-A3B-Instruct-2507
- Qwen/Qwen3-32B
- Qwen/Qwen3-4B-Instruct-2507
- Qwen/Qwen3-8B
- Qwen/Qwen3-8B-Base
- openai/gpt-oss-120b
- openai/gpt-oss-20b


In [6]:
from tinker_hw.utils.io import read_jsonl

examples = read_jsonl(DATA_PATH)[:MAX_TRAIN_ROWS]

# sanity checks (fail fast)
assert len(examples) > 0, "No examples loaded — check DATA_PATH"
assert all(("input" in e and "output" in e) for e in examples), "Expected keys: input/output"

print("Loaded examples:", len(examples))
print("Example input:\n", examples[0]["input"][:200], "...")
print("Example output:\n", examples[0]["output"][:200], "...")

Loaded examples: 300
Example input:
 Translate this to Pig Latin:
Having assembled the senate, he reminded them of the injustice of his ...
Example output:
 aving-Hay assembled-ay e-thay enate-say, e-hay eminded-ray em-thay of-ay e-thay injustice-ay of-ay is-hay ...


In [7]:
# Default to a lightweight 3B model for fast, low-cost demos
print("Using base model:", BASE_MODEL)

Using base model: Qwen/Qwen3-4B-Instruct-2507


In [8]:
# --- Initialize W&B run (optional) ---

wandb_run = None

if USE_WANDB:
    wandb_run = wandb.init(
        project=WANDB_PROJECT,
        entity=WANDB_ENTITY,
        name=RUN_NAME,      # piglatin-baseline / -light / -heavy + model + timestamp
        mode=WANDB_MODE,
        config={
            # High-level experiment info
            "experiment_name": EXPERIMENT_NAME,
            "base_model": BASE_MODEL,
            "dataset": DATASET,
            "data_path": DATA_PATH,
            # Training data / size
            "max_train_rows": MAX_TRAIN_ROWS,
            # LoRA hyperparameters
            "lora_rank": LORA_RANK,
            "lora_alpha": LORA_ALPHA,
            # Optimization hyperparameters
            "learning_rate": LEARNING_RATE,
            "num_epochs": NUM_EPOCHS,
            "batch_size": BATCH_SIZE,
        },
    )

    # Log one example pair so you can always see what the dataset looked like
    if examples:
        wandb.log({
            "example_input": examples[0]["input"],
            "example_output": examples[0]["output"],
        })

    print("W&B run initialized.")
else:
    print("W&B disabled for this run.")

wandb: Currently logged in as: nick99 (itprodirect) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


W&B run initialized.


In [9]:
# Spin up a managed LoRA training client. Adjust rank/params as you explore.

training_client = service_client.create_lora_training_client(
    base_model=BASE_MODEL,
    rank=LORA_RANK,
)

tokenizer = training_client.get_tokenizer()
print("Tokenizer vocab size:", tokenizer.vocab_size)

Tokenizer vocab size: 151643


In [10]:
import random
import numpy as np
from tinker import types

# 1) Convert raw examples -> Tinker Datum objects (once per run)

def process_example(example: dict, tokenizer) -> types.Datum:
    # Format input/output the same way as the Tinker docs
    prompt = f"English: {example['input']}\nPig Latin:"
    prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True)
    prompt_weights = [0] * len(prompt_tokens)

    # Space before output, end with double newline
    completion_tokens = tokenizer.encode(
        f" {example['output']}\n\n", add_special_tokens=False
    )
    completion_weights = [1] * len(completion_tokens)

    tokens = prompt_tokens + completion_tokens
    weights = prompt_weights + completion_weights

    # Shift for next-token prediction
    input_tokens = tokens[:-1]
    target_tokens = tokens[1:]
    weights = weights[1:]

    return types.Datum(
        model_input=types.ModelInput.from_ints(tokens=input_tokens),
        loss_fn_inputs=dict(
            weights=weights,
            target_tokens=target_tokens,
        ),
    )

processed_examples = [process_example(ex, tokenizer) for ex in examples]
print(f"Prepared {len(processed_examples)} training examples.")

# 2) Training loop: multiple epochs over the small dataset

num_examples = len(processed_examples)
steps_per_epoch = int(np.ceil(num_examples / BATCH_SIZE))
total_steps = NUM_EPOCHS * steps_per_epoch

print(
    f"Training for {NUM_EPOCHS} epochs "
    f"({steps_per_epoch} steps/epoch, ~{total_steps} steps total)."
)

global_step = 0

for epoch in range(NUM_EPOCHS):
    # Shuffle each epoch to avoid learning order-specific quirks
    random.shuffle(processed_examples)

    for start in range(0, num_examples, BATCH_SIZE):
        batch = processed_examples[start:start + BATCH_SIZE]

        # Queue forward+backward and optimizer step on Tinker
        fwdbwd_future = training_client.forward_backward(
            batch,
            loss_fn="cross_entropy",
        )
        optim_future = training_client.optim_step(
            types.AdamParams(learning_rate=LEARNING_RATE)
        )

        # Wait for results so we can compute a scalar loss
        fwdbwd_result = fwdbwd_future.result()
        optim_future.result()

        # 3) Compute weighted token-level loss, same as Tinker docs
        logprobs = np.concatenate(
            [out["logprobs"].tolist() for out in fwdbwd_result.loss_fn_outputs]
        )
        weights = np.concatenate(
            [ex.loss_fn_inputs["weights"].tolist() for ex in batch]
        )

        loss = -float(np.dot(logprobs, weights) / weights.sum())
        global_step += 1

        print(
            f"Epoch {epoch + 1}/{NUM_EPOCHS} "
            f"Step {global_step}/{total_steps} "
            f"Loss {loss:.4f}"
        )

        # 4) Log to W&B, if enabled
        if USE_WANDB and wandb_run is not None:
            wandb.log(
                {
                    "train/loss": loss,
                    "train/epoch": epoch + 1,
                    "train/step": global_step,
                    "train/batch_size": len(batch),
                    "train/num_examples": num_examples,
                },
                step=global_step,
            )

print("Training loop finished.")

# If W&B is enabled, mark the run as finished
if USE_WANDB and wandb_run is not None:
    try:
        wandb.finish()
        print("W&B run finished.")
    except Exception as e:
        print("W&B finish() failed:", e)

Prepared 300 training examples.
Training for 3 epochs (38 steps/epoch, ~114 steps total).
Epoch 1/3 Step 1/114 Loss 2.6197
Epoch 1/3 Step 2/114 Loss 2.1540
Epoch 1/3 Step 3/114 Loss 1.9037
Epoch 1/3 Step 4/114 Loss 1.7491
Epoch 1/3 Step 5/114 Loss 1.2676
Epoch 1/3 Step 6/114 Loss 1.2247
Epoch 1/3 Step 7/114 Loss 1.3471
Epoch 1/3 Step 8/114 Loss 0.9969
Epoch 1/3 Step 9/114 Loss 0.9919
Epoch 1/3 Step 10/114 Loss 0.8339
Epoch 1/3 Step 11/114 Loss 0.6095
Epoch 1/3 Step 12/114 Loss 0.4277
Epoch 1/3 Step 13/114 Loss 0.4885
Epoch 1/3 Step 14/114 Loss 0.4580
Epoch 1/3 Step 15/114 Loss 0.5287
Epoch 1/3 Step 16/114 Loss 0.6191
Epoch 1/3 Step 17/114 Loss 0.4734
Epoch 1/3 Step 18/114 Loss 0.5830
Epoch 1/3 Step 19/114 Loss 0.5787
Epoch 1/3 Step 20/114 Loss 0.2702
Epoch 1/3 Step 21/114 Loss 0.2716
Epoch 1/3 Step 22/114 Loss 0.3086
Epoch 1/3 Step 23/114 Loss 0.2873
Epoch 1/3 Step 24/114 Loss 0.2887
Epoch 1/3 Step 25/114 Loss 0.1840
Epoch 1/3 Step 26/114 Loss 0.2183
Epoch 1/3 Step 27/114 Loss 0.2019
E

0,1
train/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅█████████████
train/loss,█▆▅▄▃▂▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/num_examples,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇███

0,1
example_input,Translate this to Pi...
example_output,aving-Hay assembled-...
train/batch_size,4
train/epoch,3
train/loss,0.00686
train/num_examples,300
train/step,114


W&B run finished.


In [11]:
from tinker import types

# 1) Snapshot weights and get a sampling client
sampling_client = training_client.save_weights_and_get_sampling_client(
    name=RUN_NAME  # ties weights to this specific experiment/run
)

def piglatin_demo(text: str):
    prompt = f"English: {text}\nPig Latin:"
    # Be explicit and mirror training-time tokenization
    prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True)
    model_input = types.ModelInput.from_ints(prompt_tokens)

    params = types.SamplingParams(
        max_tokens=40,
        temperature=0.1,
        stop=["\n"],
    )

    future = sampling_client.sample(
        prompt=model_input,
        sampling_params=params,
        num_samples=1,
    )
    result = future.result()
    decoded = tokenizer.decode(result.sequences[0].tokens)

    print("INPUT :", text)
    print("OUTPUT:", repr(decoded))

# Try a few
piglatin_demo("coffee break")
piglatin_demo("I am learning to fine tune LLMs")
piglatin_demo("Basketball is my favorite sport")

INPUT : coffee break
OUTPUT: ' offee-cay eak-bray\n\n'
INPUT : I am learning to fine tune LLMs
OUTPUT: ' am-I aring-to-ay LLMs-lay\n\n'
INPUT : Basketball is my favorite sport
OUTPUT: ' asketball-ay is-may is-favorite-say ortball-bay\n\n'
