In [None]:
# =========================
# TinyLLaMA QLoRA Fine-Tune (STABLE VERSION)
# Works on Colab GPU
# =========================

!pip install -U transformers datasets peft bitsandbytes accelerate huggingface_hub

# -------------------------
# Login
# -------------------------
from huggingface_hub import login
login('YOUR_HF_TOKEN')   # paste HF token

# -------------------------
# Create dataset
# -------------------------
import json

data = [
  {"text": "### User: What is overfitting?\n### Assistant: Overfitting is when a model memorizes training data and fails to generalize."},
  {"text": "### User: Explain gradient descent\n### Assistant: Gradient descent updates model weights to minimize loss."}
]

with open("train.json", "w") as f:
    for row in data:
        f.write(json.dumps(row) + "\n")

# -------------------------
# Load model (QLoRA)
# -------------------------
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto"
)

# -------------------------
# Attach LoRA
# -------------------------
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# -------------------------
# Load dataset
# -------------------------
from datasets import load_dataset
dataset = load_dataset("json", data_files="train.json")["train"]

# Tokenize dataset
def tokenize(example):
    tokenized_example = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokenized_example["labels"] = tokenized_example["input_ids"].copy() # Add labels for causal language modeling
    return tokenized_example

dataset = dataset.map(tokenize, remove_columns=["text"])

# -------------------------
# Trainer (stable)
# -------------------------
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./tinyllama-ft",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,          # Colab supports fp16
    bf16=False,        # disable bf16
    logging_steps=5,
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

trainer.train()

# -------------------------
# Save adapter
# -------------------------
model.save_pretrained("tinyllama_adapter")
tokenizer.save_pretrained("tinyllama_adapter")

# -------------------------
# Inference
# -------------------------
from peft import PeftModel

base = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto"
)

model = PeftModel.from_pretrained(base, "tinyllama_adapter")

prompt = "### User: What is overfitting?\n### Assistant:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

out = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(out[0], skip_special_tokens=True))

Collecting huggingface_hub
  Using cached huggingface_hub-1.3.3-py3-none-any.whl.metadata (13 kB)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Step,Training Loss


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


### User: What is overfitting?
### Assistant: Overfitting is when a model is trained on a dataset that is too small or too large for the data. This can lead to a model that performs poorly on new data, as the model may not have learned enough information from the training data. Overfitting can occur when the model is trained on a dataset that is too similar to the training data, or when the model is trained on a dataset that is too large. This can lead to a model that performs poorly on new data
