Install required libraries:

In [4]:
!pip install transformers datasets peft bitsandbytes accelerate



In [5]:
!pip install transformers --upgrade



Login to HuggingFace (optional but recommended):

In [6]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [7]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

In [8]:
!pip install --upgrade fsspec datasets

Collecting fsspec
  Using cached fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)


Step 3: Load and Prepare Dataset
Why this matters : Clean data is essential for effective training.

Example: SMS Spam Collection dataset

In [24]:
from datasets import load_dataset, DatasetDict

dataset = load_dataset("sms_spam")
print(dataset['train'][0])  # View first sample

# Split into 80% train / 20% test
split_dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)

# Apply tokenization to the split datasets
tokenized_datasets = split_dataset.map(tokenize_function, batched=True)

# Reorganize into a DatasetDict (optional, but keeps the structure)
tokenized_datasets = DatasetDict({
    "train": tokenized_datasets["train"],
    "test": tokenized_datasets["test"]
})

{'sms': 'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...\n', 'label': 0}


Map:   0%|          | 0/4459 [00:00<?, ? examples/s]

Map:   0%|          | 0/1115 [00:00<?, ? examples/s]

In [10]:
print(dataset['train'].features)

{'sms': Value(dtype='string', id=None), 'label': ClassLabel(names=['ham', 'spam'], id=None)}


In [11]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set padding token (critical for batch processing)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Use EOS token as pad token

In [12]:
def tokenize_function(examples):
    return tokenizer(
        examples["sms"],
        truncation=True,
        padding="max_length",  # Explicit padding
        max_length=128,
        return_special_tokens_mask=True  # Helps with padding
    )

In [13]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [16]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="tinyllama-sms-spam",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=2e-4,
    save_steps=100,
    logging_steps=10,
    eval_strategy="epoch",  # New parameter name
    save_strategy="epoch",  # Also updated
    report_to="none"
)

Step 4: Prepare Model with QLoRA
Why this matters : QLoRA reduces memory usage by quantizing weights.

In [26]:
import torch
from transformers import AutoModelForSequenceClassification, BitsAndBytesConfig
import bitsandbytes as bnb

# Configure 4-bit quantization
bitsandbytes_config = BitsAndBytesConfig(
    load_in_4bit=True,              # Enable 4-bit quantization
    bnb_4bit_compute_dtype=torch.bfloat16,  # Use float16 for computations
    bnb_4bit_quant_type="nf4",      # Use NormalFloat4 quantization type
    bnb_4bit_use_double_quant=True  # Use double quantization for better efficiency
)

# Load model in 4-bit quantized mode
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=bitsandbytes_config,  # Now this works!
    num_labels=2,
    device_map="auto"  # Automatically map to GPU
)

# Explicitly set pad_token_id in the model's configuration
model.config.pad_token_id = model.config.eos_token_id

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TinyLlama/TinyLlama-1.1B-Chat-v1.0 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Make the model ready for training:

In [20]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

##Step 5: Configure LoRA Adapters
Why this matters : LoRA trains only a small subset of parameters

In [21]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,  # Rank of the adapter
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Which layers to modify
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS"  # Sequence classification task
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Should see ~0.1-1% of parameters trainable

trainable params: 2,256,896 || all params: 1,036,773,376 || trainable%: 0.2177


##Step 6: Training Configuration
Why this matters : Proper settings ensure fast training

In [22]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="tinyllama-sms-spam",
    per_device_train_batch_size=16,  # Higher batch size possible with small model
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=2e-4,
    save_steps=100,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none",
    push_to_hub=True
)

##Step 7: Train the Model
Why this matters : This is where the model learns!

In [29]:
from transformers import Trainer
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig

# Prepare the model for k-bit training
model = prepare_model_for_kbit_training(model)

# Configure and apply LoRA adapters
lora_config = LoraConfig(
    r=16,  # Rank of the adapter
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Which layers to modify
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS"  # Sequence classification task
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# 4. Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 5. Train
trainer.train()

  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 2,256,896 || all params: 1,036,773,376 || trainable%: 0.2177


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Step,Training Loss
10,0.8952
20,0.1407
30,0.0384
40,0.0206
50,0.0305
60,0.035
70,0.0434
80,0.0049
90,0.0526
100,0.0097


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*a

TrainOutput(global_step=420, training_loss=0.03691278446875956, metrics={'train_runtime': 3037.0504, 'train_samples_per_second': 4.405, 'train_steps_per_second': 0.138, 'total_flos': 9978042146881536.0, 'train_loss': 0.03691278446875956, 'epoch': 3.0})

##Step 8: Save and Push Model
Why this matters : To use/share your trained model

In [30]:
model.push_to_hub("tinyllama-sms-spam")
tokenizer.push_to_hub("tinyllama-sms-spam")

README.md: 0.00B [00:00, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/deathVader-afk/tinyllama-sms-spam/commit/ab44f2afe9a589f87145e1bc70413fb380bd65fd', commit_message='Upload tokenizer', commit_description='', oid='ab44f2afe9a589f87145e1bc70413fb380bd65fd', pr_url=None, repo_url=RepoUrl('https://huggingface.co/deathVader-afk/tinyllama-sms-spam', endpoint='https://huggingface.co', repo_type='model', repo_id='deathVader-afk/tinyllama-sms-spam'), pr_revision=None, pr_num=None)

##Step 9: Evaluate and Test
Why this matters : Check if the model works well



In [31]:
import numpy as np
from sklearn.metrics import accuracy_score

preds = trainer.predict(tokenized_datasets["test"])
y_pred = np.argmax(preds.predictions, axis=1)

accuracy = accuracy_score(tokenized_datasets["test"]["label"], y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 1.00


In [32]:
# Test with new SMS:
def predict_spam(text):
    inputs = tokenizer(text, return_tensors="pt").to("cuda")
    with torch.no_grad():
        logits = model(**inputs).logits
    return "Spam" if torch.argmax(logits) == 1 else "Not Spam"

predict_spam("WINNER!! You've been selected for a free iPhone!!!")

'Spam'