In [1]:
from datetime import datetime

BASE_MODEL = "Qwen/Qwen3-4B-Instruct-2507"
RUN_TAG = "piglatin-hello-world"

DATASET = "sample"  # "sample" or "full"
MAX_TRAIN_ROWS = 300

LORA_RANK = 16
LORA_ALPHA = 32
LEARNING_RATE = 1e-4
NUM_EPOCHS = 3
BATCH_SIZE = 8

MODEL_SHORT_NAME = BASE_MODEL.split("/")[-1]
TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

In [2]:
from pathlib import Path

def find_repo_root(start=None):
    p = Path(start or Path.cwd()).resolve()
    for parent in [p, *p.parents]:
        if (parent / "pyproject.toml").exists() or (parent / ".git").exists():
            return parent
    return p

REPO_ROOT = find_repo_root()

data_rel = "data/piglatin/sample.jsonl" if DATASET == "sample" else "data/piglatin/full/piglatin.jsonl"
DATA_PATH = REPO_ROOT / data_rel

assert DATA_PATH.exists(), f"Missing dataset at {DATA_PATH}. Run: python scripts/build_piglatin_dataset.py"
print("CWD:", Path.cwd())
print("DATA_PATH:", DATA_PATH)

CWD: C:\Users\user\Desktop\tinker-hello-world\notebooks
DATA_PATH: C:\Users\user\Desktop\tinker-hello-world\data\piglatin\sample.jsonl



# Notebook 03 — Hello, Tinker LoRA

**Objective:** Fine-tune a base model using Tinker's managed LoRA training API.

**Prerequisites:** Completed `00_check_env.ipynb`, active `TINKER_API_KEY`, and access to the Tinker service.

**Estimated run time:** ~5 minutes to configure plus however long your training job runs server-side.

### Before you start
1. Confirm `python test_env.py` prints `True` for the API key.
2. Run the environment check notebook whenever you set up a new machine or virtual environment.
3. In this notebook you'll load your credentials, inspect available models, pick a base model, and initialize a LoRA training client.


In [3]:
# Load credentials and confirm package versions before proceeding.
from dotenv import load_dotenv
import os
import tinker
import transformers

load_dotenv()

print("TINKER_API_KEY loaded:", bool(os.getenv("TINKER_API_KEY")))
print("Tinker SDK version:", tinker.__version__)
print("Transformers version:", transformers.__version__)

TINKER_API_KEY loaded: True
Tinker SDK version: 0.3.0
Transformers version: 4.57.1


In [4]:
# Inspect which base models are available to your account.
# Tinker reads TINKER_API_KEY from the environment
service_client = tinker.ServiceClient()

caps = service_client.get_server_capabilities()
print("Supported models:")
for m in caps.supported_models:
    print("-", m.model_name)

Supported models:
- deepseek-ai/DeepSeek-V3.1
- deepseek-ai/DeepSeek-V3.1-Base
- meta-llama/Llama-3.1-70B
- meta-llama/Llama-3.1-8B
- meta-llama/Llama-3.1-8B-Instruct
- meta-llama/Llama-3.2-1B
- meta-llama/Llama-3.2-3B
- meta-llama/Llama-3.3-70B-Instruct
- Qwen/Qwen3-235B-A22B-Instruct-2507
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-Base
- Qwen/Qwen3-30B-A3B-Instruct-2507
- Qwen/Qwen3-32B
- Qwen/Qwen3-4B-Instruct-2507
- Qwen/Qwen3-8B
- Qwen/Qwen3-8B-Base
- openai/gpt-oss-120b
- openai/gpt-oss-20b


In [5]:
from tinker_hw.utils.io import read_jsonl

examples = read_jsonl(DATA_PATH)[:MAX_TRAIN_ROWS]

# sanity checks (fail fast)
assert len(examples) > 0, "No examples loaded — check DATA_PATH"
assert all(("input" in e and "output" in e) for e in examples), "Expected keys: input/output"

print("Loaded examples:", len(examples))
print("Example input:\n", examples[0]["input"][:200], "...")
print("Example output:\n", examples[0]["output"][:200], "...")

Loaded examples: 300
Example input:
 Translate this to Pig Latin:
Having assembled the senate, he reminded them of the injustice of his ...
Example output:
 aving-Hay assembled-ay e-thay enate-say, e-hay eminded-ray em-thay of-ay e-thay injustice-ay of-ay is-hay ...


In [8]:
# Default to a lightweight 3B model for fast, low-cost demos
print("Using base model:", BASE_MODEL)

Using base model: meta-llama/Llama-3.2-3B


In [9]:
# Spin up a managed LoRA training client. Adjust rank/params as you explore.

training_client = service_client.create_lora_training_client(
    base_model=BASE_MODEL,
    rank=LORA_RANK,
)

tokenizer = training_client.get_tokenizer()
print("Tokenizer vocab size:", tokenizer.vocab_size)

Tokenizer vocab size: 128000
