In [None]:
!pip -q install --upgrade \
  unsloth datasets accelerate bitsandbytes wandb huggingface_hub \
  "transformers==4.57.1" \
  "trl>=0.10.0"

# ---- Imports & versions
import os, sys, random, numpy as np, torch
print("Python        :", sys.version.split()[0])
print("NumPy         :", np.__version__)
print("Transformers  :", __import__("transformers").__version__)
print("TRL           :", __import__("trl").__version__)

# ---- CUDA / GPU info
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU           :", torch.cuda.get_device_name(0))
    print("CUDA version  :", torch.version.cuda)
    print("CC            :", torch.cuda.get_device_capability(0))

# ---- Reproducibility seeds
seed = 42
random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# ---- Default: disable W&B unless you enable it in Step 2
os.environ.setdefault("WANDB_DISABLED", "true")
print("\n✅ Step 1 complete: installs + env OK. Ready for Step 2 (HF/W&B auth).")

In [None]:
## Load SmolLM2  + attach LoRA
from unsloth import FastLanguageModel
import torch

MODEL_NAME = "unsloth/smollm2-135m"
MAX_LEN    = 1024

# Pick dtype (bf16 if supported, else fp16)
use_bf16 = torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8
DTYPE    = torch.bfloat16 if use_bf16 else torch.float16

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name     = MODEL_NAME,
    max_seq_length = MAX_LEN,
    dtype          = DTYPE,
    load_in_4bit   = True,
    token          = hf_token if "hf_token" in globals() and hf_token else None,
)

# Tokenizer hygiene
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Attach LoRA (pass kwargs; do NOT pass a LoraConfig object)
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
    use_rslora=False,
    loftq_config=None,
)

FastLanguageModel.for_training(model)
model.print_trainable_parameters()

# Quick generate smoke test
_test = "Classify the sentiment: I absolutely love this new feature!"
ids   = tokenizer(_test, return_tensors="pt").to(model.device)
with torch.inference_mode():
    out = model.generate(**ids, max_new_tokens=12)
print("🟢 Generate OK:", tokenizer.decode(out[0], skip_special_tokens=True)[:120])

print("\n✅ Step 2 complete: model loaded in 4-bit and LoRA attached. Ready for Step 3 (dataset).")



Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.11.2: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/742 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

trainable params: 4,884,480 || all params: 139,400,064 || trainable%: 3.5039
🟢 Generate OK: Classify the sentiment: I absolutely love this new feature!

The new feature is called "Sentiment Analysis".

✅ Step 2 complete: model loaded in 4-bit and LoRA attached. Ready for Step 3 (dataset).


In [None]:
## Load AG News and format into Supervised SFT text
from datasets import load_dataset

# Load AG News
raw = load_dataset("ag_news")

train = raw["train"]
valid = raw["test"]

# Optional: limit size during testing
train = train.select(range(min(12000, len(train))))
valid = valid.select(range(min(2000, len(valid))))

# Label mapping
LABELS = {
    0: "World",
    1: "Sports",
    2: "Business",
    3: "Sci/Tech",
}

EOS = tokenizer.eos_token

# SFT Prompt Template
PROMPT = """You are a news classifier. Classify the news article into one of the following categories:
World, Sports, Business, Sci/Tech.

### News Article:
{}

### Instructions:
Respond with exactly one category from the list.

### Category:
{}"""

# Convert dataset to text format for SFTTrainer
def to_sft(batch):
    texts = batch["text"]
    labels = batch["label"]
    out = []
    for t, y in zip(texts, labels):
        gold = LABELS[int(y)]
        out.append(PROMPT.format(t, gold) + EOS)
    return {"text": out}

train_sft = train.map(to_sft, batched=True, remove_columns=train.column_names)
valid_sft = valid.map(to_sft, batched=True, remove_columns=valid.column_names)

print("✅ Example sample:\n")
print(train_sft["text"][0][:500])
print("\n✅ Step 3 complete — dataset loaded & formatted.")


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

Map:   0%|          | 0/12000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

✅ Example sample:

You are a news classifier. Classify the news article into one of the following categories:
World, Sports, Business, Sci/Tech.

### News Article:
Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again.

### Instructions:
Respond with exactly one category from the list.

### Category:
Business<|endoftext|>

✅ Step 3 complete — dataset loaded & formatted.


In [None]:
## Build TrainingArguments + SFTTrainer
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

# Try both eval_strategy / evaluation_strategy for compatibility
try:
    args = TrainingArguments(
        output_dir = "outputs_lora_agnews",
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 1,
        num_train_epochs = 1,
        learning_rate = 2e-4,
        warmup_ratio = 0.03,
        lr_scheduler_type = "cosine",
        weight_decay = 0.05,
        logging_steps = 10,

        save_strategy = "epoch",
        eval_strategy = "epoch",            # ✅ new TRL versions
        # evaluation_strategy = "epoch",     # ✅ old HF versions
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        report_to = "none",
    )
except:
    args = TrainingArguments(
        output_dir = "outputs_lora_agnews",
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 1,
        num_train_epochs = 1,
        learning_rate = 2e-4,
        warmup_ratio = 0.03,
        lr_scheduler_type = "cosine",
        weight_decay = 0.05,
        logging_steps = 10,

        save_strategy = "epoch",
        evaluation_strategy = "epoch",      # ✅ fallback
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        report_to = "none",
    )


# ✅ Build SFTTrainer (NO formatting_func needed because "text" field exists)
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_sft,
    eval_dataset = valid_sft,
    dataset_text_field = "text",
    max_seq_length = 1024,
    packing = False,              # ✅ safe, no dataset packing
    args = args,
)

print("✅ Step 4 complete — Trainer built successfully (training not started).")
print("Train batches:", len(trainer.get_train_dataloader()))
print("Eval batches :", len(trainer.get_eval_dataloader()))


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/12000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/2000 [00:00<?, ? examples/s]

✅ Step 4 complete — Trainer built successfully (training not started).
Train batches: 750
Eval batches : 250


In [None]:
## Training
from math import ceil
import torch

# Nice-to-have: show an estimate of total steps
num_update_steps_per_epoch = ceil(len(trainer.get_train_dataloader()) / trainer.args.gradient_accumulation_steps)
print(f"Epochs: {trainer.args.num_train_epochs} | "
      f"Batches/epoch: {len(trainer.get_train_dataloader())} | "
      f"GradAccum: {trainer.args.gradient_accumulation_steps} | "
      f"Updates/epoch: {num_update_steps_per_epoch}")

train_result = trainer.train()
trainer.save_state()  # keeps optimizer/scheduler etc. for potential resume

print("\n✅ Training finished.")
print("Train result metrics:", {k: v for k, v in train_result.metrics.items() if isinstance(v, (int, float))})

# Quick GPU memory note
if torch.cuda.is_available():
    peak_gb = torch.cuda.max_memory_reserved() / 1e9
    print(f"💾 Peak reserved GPU memory: {peak_gb:.3f} GB")


Epochs: 1 | Batches/epoch: 750 | GradAccum: 1 | Updates/epoch: 750


Epoch,Training Loss,Validation Loss
1,1.6625,1.598731


Unsloth: Will smartly offload gradients to save VRAM!

✅ Training finished.
Train result metrics: {'train_runtime': 537.2789, 'train_samples_per_second': 22.335, 'train_steps_per_second': 1.396, 'total_flos': 1337310344300544.0, 'train_loss': 1.7350418319702148, 'epoch': 1.0}
💾 Peak reserved GPU memory: 7.669 GB


In [None]:
from transformers import pipeline
from unsloth import FastLanguageModel

print("Switching model to inference mode...")
FastLanguageModel.for_inference(model)

def build_infer_prompt(article: str) -> str:
    return f"""You are a news classifier. Classify the news article into one of the following categories:
World, Sports, Business, Sci/Tech.

### News Article:
{article}

### Instructions:
Respond with exactly one category from the list.

### Category:
"""

# Create an inference pipeline
gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=(torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16),
    device_map="auto",
)

# Test samples
samples = [
    "The stock market rallied today as major tech companies posted strong earnings.",
    "NASA confirmed a new exoplanet that may contain signs of life.",
    "Cristiano Ronaldo scored a hat-trick as his team dominated in a 5-1 win.",
    "The Prime Minister met with European leaders to discuss climate policies.",
]

print("\n✅ Running quick evaluation:")
for s in samples:
    prompt = build_infer_prompt(s)
    out = gen(prompt, max_new_tokens=6, do_sample=False)[0]["generated_text"]
    prediction = out.split("### Category:")[-1].strip()
    print("\nArticle:", s)
    print("Predicted:", prediction)


Switching model to inference mode...

✅ Running quick evaluation:

Article: The stock market rallied today as major tech companies posted strong earnings.
Predicted: Business

Article: NASA confirmed a new exoplanet that may contain signs of life.
Predicted: Sci/Tech

Article: Cristiano Ronaldo scored a hat-trick as his team dominated in a 5-1 win.
Predicted: Sports

Article: The Prime Minister met with European leaders to discuss climate policies.
Predicted: World


In [None]:
# ✅ Save LoRA adapter + tokenizer locally in Colab

save_dir = "/content/SmolLM2-135M-AGNews-LoRA"

model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

print("✅ Model + tokenizer saved locally at:", save_dir)


✅ Model + tokenizer saved locally at: /content/SmolLM2-135M-AGNews-LoRA
