In [1]:
import torch

print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))
    print("Capability:", torch.cuda.get_device_capability(0))

CUDA available: False


In [None]:
from unsloth import FastLanguageModel
from datasets import Dataset
from trl import SFTTrainer
from transformers import TrainingArguments

In [None]:
max_seq_length = 512
dtype = None          # let Unsloth auto-choose (fp16/bf16)
load_in_4bit = True   # VERY important for VRAM

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name      = "unsloth/tinyllama-bnb-4bit",  # 1.1B, 4-bit quantized
    max_seq_length  = max_seq_length,
    dtype           = dtype,
    load_in_4bit    = load_in_4bit,
)

# --- 3. Add LoRA on top (PEFT) ---
model = FastLanguageModel.get_peft_model(
    model,
    r                  = 16,  # low rank to save VRAM
    target_modules     = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha         = 16,
    lora_dropout       = 0,
    bias               = "none",
    use_gradient_checkpointing = "unsloth",  # helps on small GPUs
    random_state       = 3407,
    use_rslora         = False,
    loftq_config       = None,
)

# --- 4. Tiny toy dataset (Alpaca-style) ---
train_examples = [
    {
        "instruction": "Say hello to the user named Areg.",
        "input": "",
        "output": "Hello Areg! Nice to meet you.",
    },
    {
        "instruction": "Explain in one sentence what Django is.",
        "input": "",
        "output": "Django is a high-level Python web framework that helps you build secure, scalable web apps quickly.",
    },
    {
        "instruction": "Explain in one sentence what WhatsApp Web is.",
        "input": "",
        "output": "WhatsApp Web is the browser-based interface for using your WhatsApp account on a computer.",
    },
]

dataset = Dataset.from_list(train_examples)

# Simple Alpaca-style formatting: Instruction + (optional) input + output
def format_example(example):
    if example["input"]:
        prompt = (
            f"### Instruction:\n{example['instruction']}\n\n"
            f"### Input:\n{example['input']}\n\n"
            f"### Response:\n{example['output']}"
        )
    else:
        prompt = (
            f"### Instruction:\n{example['instruction']}\n\n"
            f"### Response:\n{example['output']}"
        )
    example["text"] = prompt
    return example

dataset = dataset.map(format_example)

# --- 5. Trainer setup ---
# On Windows Unsloth recommends dataset_num_proc=1 to avoid crashes.
# We also keep everything tiny: batch size 1, ~30 steps.
training_args = TrainingArguments(
    output_dir                      = "tinyllama-unsloth-demo",
    per_device_train_batch_size     = 1,
    gradient_accumulation_steps     = 1,
    learning_rate                   = 2e-4,
    max_steps                       = 30,   # tiny run â€“ just to test
    warmup_steps                    = 5,
    logging_steps                   = 5,
    save_strategy                   = "no",
    fp16                            = True,   # if this errors, set to False
)

trainer = SFTTrainer(
    model              = model,
    tokenizer          = tokenizer,
    train_dataset      = dataset,
    dataset_text_field = "text",
    max_seq_length     = max_seq_length,
    packing            = True,   # packs short samples together
    dataset_num_proc   = 1,      # important for Windows
    args               = training_args,
)

# --- 6. Run a tiny training loop ---
trainer.train()

# --- 7. Switch to inference mode + quick test ---
FastLanguageModel.for_inference(model)  # disables grad, applies speedups

prompt = "### Instruction:\nSay hello to Areg in a friendly way.\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    generated = model.generate(
        **inputs,
        max_new_tokens = 64,
        do_sample      = True,
        top_p          = 0.9,
        temperature    = 0.7,
    )

print(tokenizer.decode(generated[0], skip_special_tokens=True))