# LoRA Finetuning with SmolLM2-135M for Tweet Sentiment Evaluation

In [1]:
%%capture
!pip install unsloth datasets transformers accelerate bitsandbytes wandb huggingface_hub

In [2]:
from getpass import getpass
import os, torch, random, numpy as np
import wandb
from huggingface_hub import login

print("NumPy:", np.__version__)
print("Transformers:", __import__("transformers").__version__)
print("TRL:", __import__("trl").__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

# reproducibility
random.seed(42); np.random.seed(42); torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

NumPy: 2.0.2
Transformers: 4.57.1
TRL: 0.23.0
CUDA available: True
GPU: Tesla T4


# Authenticate to Hugging Face and Weights & Biases (W&B)

In [3]:
hf_token = getpass("üîë Enter your Hugging Face token (press Enter to skip): ").strip()
wb_token = getpass("üîë Enter your Weights & Biases token (or leave blank to skip): ").strip()

if hf_token:
    login(hf_token)
else:
    print("HF login skipped.")

if wb_token:
    wandb.login(key=wb_token)
    run = wandb.init(project="LoRA-SmolLM2-TweetEval", job_type="training", anonymous="allow")
else:
    os.environ["WANDB_DISABLED"] = "true"
    print("W&B logging disabled.")

üîë Enter your Hugging Face token (press Enter to skip): ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
üîë Enter your Weights & Biases token (or leave blank to skip): ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33maditya_rajpurohit[0m ([33maditya_rajpurohit-san-jose-state-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Load SmolLM2-135M (4-bit) and attach LoRA adapters

In [4]:
from unsloth import FastLanguageModel

max_seq_length = 1024
dtype = None
load_in_4bit = True

# unsloth-optimized SmolLM2 checkpoint
model_name = "unsloth/smollm2-135m"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_token if hf_token else None,
)

# add LoRA (parameter-efficient) adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        "q_proj","k_proj","v_proj","o_proj",
        "gate_proj","up_proj","down_proj"
    ],
    lora_alpha = 16,
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
    use_rslora = False,
    loftq_config = None,
)

print("‚úÖ Loaded 4-bit model and attached LoRA adapters.")


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.11.2: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.11.2 patched 30 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


‚úÖ Loaded 4-bit model and attached LoRA adapters.


# Load TweetEval/Sentiment and build instruction-style prompts

In [5]:
from datasets import load_dataset

# primary: tweet_eval/sentiment (labels: 0=Negative, 1=Neutral, 2=Positive)
raw = load_dataset("tweet_eval", "sentiment")
train = raw["train"]
valid = raw["validation"]

train = train.select(range(min(15000, len(train))))
valid = valid.select(range(min(2000, len(valid))))

LABELS = {0: "Negative", 1: "Neutral", 2: "Positive"}
EOS = tokenizer.eos_token

PROMPT = """You are an assistant that classifies the overall sentiment of a tweet as one of: Negative, Neutral, Positive.

### Tweet:
{}

### Instructions:
Respond with exactly one word: Negative, Neutral, or Positive.

### Sentiment:
{}"""

def to_supervised_text(batch):
    texts = batch["text"]
    labels = batch["label"]
    out = []
    for t, y in zip(texts, labels):
        gold = LABELS[int(y)]
        out.append(PROMPT.format(t, gold) + EOS)
    return {"text": out}

train = train.map(to_supervised_text, batched=True, remove_columns=train.column_names)
valid = valid.map(to_supervised_text, batched=True, remove_columns=valid.column_names)

print("‚úÖ Example formatted sample:\n", train["text"][0][:500])

‚úÖ Example formatted sample:
 You are an assistant that classifies the overall sentiment of a tweet as one of: Negative, Neutral, Positive.

### Tweet:
"QT @user In the original draft of the 7th book, Remus Lupin survived the Battle of Hogwarts. #HappyBirthdayRemusLupin"

### Instructions:
Respond with exactly one word: Negative, Neutral, or Positive.

### Sentiment:
Positive<|endoftext|>


# Configure SFTTrainer for LoRA training

In [6]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

try:
    args = TrainingArguments(
        output_dir = "outputs_lora_smollm2_tweeteval",
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 1,
        num_train_epochs = 2,
        learning_rate = 2e-4,
        warmup_ratio = 0.03,
        lr_scheduler_type = "cosine",
        weight_decay = 0.05,
        logging_steps = 10,
        save_strategy = "epoch",
        eval_strategy = "epoch",
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        report_to = ("wandb" if os.environ.get("WANDB_DISABLED","false")!="true" else "none"),
    )
except TypeError:
    args = TrainingArguments(
        output_dir = "outputs_lora_smollm2_tweeteval",
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 1,
        num_train_epochs = 2,
        learning_rate = 2e-4,
        warmup_ratio = 0.03,
        lr_scheduler_type = "cosine",
        weight_decay = 0.05,
        logging_steps = 10,
        save_strategy = "epoch",
        evaluation_strategy = "epoch",
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        report_to = ("wandb" if os.environ.get("WANDB_DISABLED","false")!="true" else "none"),
    )

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train,
    eval_dataset = valid,
    dataset_text_field = "text",
    max_seq_length = 1024,
    packing = False,
    args = args,
)

print("‚úÖ Trainer ready!")

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/15000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/2000 [00:00<?, ? examples/s]

‚úÖ Trainer ready!


# Train the model

In [7]:
if torch.cuda.is_available():
    gpu = torch.cuda.get_device_properties(0)
    print(f"GPU: {gpu.name} | VRAM: {round(gpu.total_memory/1e9, 2)} GB")

train_result = trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.


GPU: Tesla T4 | VRAM: 15.83 GB


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 15,000 | Num Epochs = 2 | Total steps = 1,876
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 1 x 1) = 16
 "-____-"     Trainable parameters = 4,884,480 of 139,400,064 (3.50% trained)


Epoch,Training Loss,Validation Loss
1,1.3658,1.381627
2,1.3831,1.371087


Unsloth: Will smartly offload gradients to save VRAM!


# Runtime statistics

In [8]:
peak_mem = round(torch.cuda.max_memory_reserved() / 1e9, 3) if torch.cuda.is_available() else "CPU"
mins = round(train_result.metrics.get("train_runtime", 0)/60, 2)
print(f"‚è± Runtime: {mins} minutes")
print(f"üíæ Peak reserved GPU memory: {peak_mem} GB")

metrics = trainer.evaluate()
print("Eval metrics :", metrics)

‚è± Runtime: 22.71 minutes
üíæ Peak reserved GPU memory: 4.161 GB


Eval metrics : {'eval_loss': 1.3710867166519165, 'eval_runtime': 44.7064, 'eval_samples_per_second': 44.736, 'eval_steps_per_second': 5.592, 'epoch': 2.0}


# Inference

In [9]:
from transformers import pipeline
from unsloth import FastLanguageModel
FastLanguageModel.for_inference(model)

def build_infer_prompt(tweet: str) -> str:
    return f"""You are an assistant that classifies the overall sentiment of a tweet as one of: Negative, Neutral, Positive.

### Tweet:
{tweet}

### Instructions:
Respond with exactly one word: Negative, Neutral, or Positive.

### Sentiment:
"""

gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=(torch.bfloat16 if is_bfloat16_supported() else torch.float16),
    device_map="auto",
)

samples = [
    "Absolutely loved the new update‚Äîeverything feels snappier!",
    "Meh, it works I guess. Nothing special.",
    "Worst experience ever. App keeps crashing and support is useless."
]

for s in samples:
    prompt = build_infer_prompt(s)
    out = gen(prompt, max_new_tokens=6, do_sample=False)
    print("Tweet:", s)
    print("Model:", out[0]["generated_text"].split("### Sentiment:")[-1].strip())
    print("-"*60)

`torch_dtype` is deprecated! Use `dtype` instead!
Device set to use cuda:0


Tweet: Absolutely loved the new update‚Äîeverything feels snappier!
Model: Positive
------------------------------------------------------------
Tweet: Meh, it works I guess. Nothing special.
Model: Neutral
------------------------------------------------------------
Tweet: Worst experience ever. App keeps crashing and support is useless.
Model: Negative
------------------------------------------------------------


# Save LoRA model

In [None]:
save_dir = "SmolLM2-135M-TweetEval-LoRA"
repo_id = "username/SmolLM2-135M-TweetEval-LoRA"

# save PEFT/LoRA model + tokenizer
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print(f"‚úÖ Saved to: {save_dir}")

model.push_to_hub(repo_id)
tokenizer.push_to_hub(repo_id)
print("‚úÖ Uploaded to Hub:", repo_id)

‚úÖ Saved to: SmolLM2-135M-TweetEval-LoRA


README.md:   0%|          | 0.00/570 [00:00<?, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...adapter_model.safetensors:   5%|5         |  527kB / 9.82MB            

Saved model to https://huggingface.co/aditya-rajpurohit/SmolLM2-135M-TweetEval-LoRA
‚úÖ Uploaded to Hub: aditya-rajpurohit/SmolLM2-135M-TweetEval-LoRA
