Install required libraries:

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
!pip install transformers datasets peft bitsandbytes accelerate

In [None]:
!pip install transformers --upgrade

You can use `google.colab.files.download` to download files directly to your local machine. Let's download the PEFT adapters you saved earlier.

In [None]:
!pip install --force-reinstall transformers

In [None]:
!pip install fsspec datasets

Login to HuggingFace (optional but recommended):

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

Step 3: Load and Prepare Dataset
Why this matters : Clean data is essential for effective training.

Example: SMS Spam Collection dataset

In [None]:
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set padding token (critical for batch processing)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Use EOS token as pad token

def tokenize_function(examples):
    return tokenizer(
        examples["sms"],
        truncation=True,
        padding="max_length",  # Explicit padding
        max_length=128,
        return_special_tokens_mask=True  # Helps with padding
    )

dataset = load_dataset("sms_spam")
print(dataset['train'][0])  # View first sample

# Split into 80% train / 20% test
split_dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)

# Apply tokenization to the split datasets
tokenized_datasets = split_dataset.map(tokenize_function, batched=True)

# Reorganize into a DatasetDict (optional, but keeps the structure)
tokenized_datasets = DatasetDict({
    "train": tokenized_datasets["train"],
    "test": tokenized_datasets["test"]
})

In [None]:
print(dataset['train'].features)

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set padding token (critical for batch processing)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Use EOS token as pad token

In [None]:
def tokenize_function(examples):
    return tokenizer(
        examples["sms"],
        truncation=True,
        padding="max_length",  # Explicit padding
        max_length=128,
        return_special_tokens_mask=True  # Helps with padding
    )

In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="tinyllama-sms-spam",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=2e-4,
    save_steps=100,
    logging_steps=10,
    eval_strategy="epoch",  # New parameter name
    save_strategy="epoch",  # Also updated
    report_to="none"
)

Step 4: Prepare Model with QLoRA
Why this matters : QLoRA reduces memory usage by quantizing weights.

In [None]:
import torch
from transformers import AutoModelForSequenceClassification, BitsAndBytesConfig
import bitsandbytes as bnb

# Configure 4-bit quantization
bitsandbytes_config = BitsAndBytesConfig(
    load_in_4bit=True,              # Enable 4-bit quantization
    bnb_4bit_compute_dtype=torch.bfloat16,  # Use float16 for computations
    bnb_4bit_quant_type="nf4",      # Use NormalFloat4 quantization type
    bnb_4bit_use_double_quant=True  # Use double quantization for better efficiency
)

# Load model in 4-bit quantized mode
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=bitsandbytes_config,  # Now this works!
    num_labels=2,
    device_map="auto"  # Automatically map to GPU
)

# Explicitly set pad_token_id in the model's configuration
model.config.pad_token_id = model.config.eos_token_id

Make the model ready for training:

In [None]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

##Step 5: Configure LoRA Adapters
Why this matters : LoRA trains only a small subset of parameters

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,  # Rank of the adapter
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Which layers to modify
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS",  # Sequence classification task
    modules_to_save=["classifier", "score"]
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Should see ~0.1-1% of parameters trainable

##Step 6: Training Configuration
Why this matters : Proper settings ensure fast training

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="tinyllama-sms-spam",
    per_device_train_batch_size=16,  # Higher batch size possible with small model
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=2e-4,
    save_steps=100,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none",
    push_to_hub=False
)

##Step 7: Train the Model
Why this matters : This is where the model learns!

In [None]:
from transformers import Trainer
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig

# Prepare the model for k-bit training
model = prepare_model_for_kbit_training(model)

# Configure and apply LoRA adapters
lora_config = LoraConfig(
    r=16,  # Rank of the adapter
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Which layers to modify
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS",  # Sequence classification task
    modules_to_save=["classifier", "score"]
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# 4. Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 5. Train
trainer.train()

## Save PEFT Adapters Separately

To ensure proper loading of the classification head weights later, we need to save the PEFT adapters separately before merging them into the base model. This preserves all weights including the classification head for accurate inference.

In [None]:
# Save PEFT adapters with classification head weights
peft_output_dir = "./peft_tinyllama_sms_spam_model"
model.save_pretrained(peft_output_dir)
tokenizer.save_pretrained(peft_output_dir)

# Also save to the local directory for the backend
import os
import shutil

local_peft_dir = "../local_tinyllama_sms_spam_model"
if os.path.exists(local_peft_dir):
    shutil.rmtree(local_peft_dir)
shutil.copytree(peft_output_dir, local_peft_dir)

print(f"PEFT adapters with classification head saved to {peft_output_dir} and {local_peft_dir}")

## Merge LoRA Adapters with the Base Model

To save the entire fine-tuned model, we first need to merge the trained LoRA adapters back into the base model. This creates a single, consolidated model that can be loaded and used without needing the original base model and separate adapters.

In [None]:
model = model.merge_and_unload()
print("LoRA adapters merged successfully into the base model.")

In [None]:
from google.colab import files
import os

# Path to the directory containing the PEFT adapters
peft_output_dir = "./peft_tinyllama_sms_spam_model"

# List all files in the directory to download them individually or as a zip
print(f"Files in '{peft_output_dir}':")
for root, dirs, files_in_dir in os.walk(peft_output_dir):
    for file_name in files_in_dir:
        file_path = os.path.join(root, file_name)
        print(f"Downloading {file_path}...")
        files.download(file_path)

print("Download process initiated for PEFT adapters. Check your browser's download folder.")

## Save the Full Fine-Tuned Model Locally

Now that the LoRA adapters are merged, we can save the complete fine-tuned model and its tokenizer to a local directory. This model can then be reloaded directly for inference without needing to apply PEFT.

In [None]:
from google.colab import files
import os

# Path to the directory containing the full fine-tuned model
full_model_output_dir = "./full_fine_tuned_tinyllama_sms_spam_model"

# List all files in the directory to download them individually
print(f"Files in '{full_model_output_dir}':")
for root, dirs, files_in_dir in os.walk(full_model_output_dir):
    for file_name in files_in_dir:
        file_path = os.path.join(root, file_name)
        print(f"Downloading {file_path}...")
        files.download(file_path)

print("Download process initiated for the full fine-tuned model. Check your browser's download folder.")

In [None]:
full_model_output_dir = "./full_fine_tuned_tinyllama_sms_spam_model"
model.save_pretrained(full_model_output_dir)
tokenizer.save_pretrained(full_model_output_dir)

print(f"Full fine-tuned model and tokenizer saved locally to {full_model_output_dir}")

##Step 9: Evaluate and Test
Why this matters : Check if the model works well



In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

preds = trainer.predict(tokenized_datasets["test"])
y_pred = np.argmax(preds.predictions, axis=1)

accuracy = accuracy_score(tokenized_datasets["test"]["label"], y_pred)
print(f"Accuracy: {accuracy:.2f}")

In [None]:
# Test with new SMS:
def predict_spam(text):
    inputs = tokenizer(text, return_tensors="pt").to("cuda")
    with torch.no_grad():
        logits = model(**inputs).logits
    return "Spam" if torch.argmax(logits) == 1 else "Not Spam"

predict_spam("You've been selected as the winner of a free iPhone!!!")