# Fine-Tuning Qwen2-1.5B with LoRA for Sentiment Analysis

In [None]:
!pip install -q torch transformers datasets accelerate peft trl scikit-learn

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m67.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import torch
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import warnings

warnings.filterwarnings("ignore")
transformers.logging.set_verbosity_error()

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Transformers version: {transformers.__version__}")

PyTorch version: 2.6.0+cu124
CUDA available: True
Transformers version: 4.53.2


## Load Model

In [None]:
model_id = "Qwen/Qwen2-1.5B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

## Tokenizer

In [None]:
sample_text = "This is a sample sentence for tokenization."
tokenized_output = tokenizer(sample_text)
input_ids = tokenized_output['input_ids']

print(f"Original Text: '{sample_text}'")
print(f"Token IDs: {input_ids}")

Original Text: 'This is a sample sentence for tokenization.'
Token IDs: [1986, 374, 264, 6077, 11652, 369, 3950, 2022, 13]


In [None]:
decoded_text = tokenizer.decode(input_ids)
print(f"Decoded Text: '{decoded_text}'")

Decoded Text: 'This is a sample sentence for tokenization.'


## Embedding

In [None]:
embedding_layer = model.get_input_embeddings()
embedding_dim = embedding_layer.embedding_dim
vocab_size = embedding_layer.num_embeddings

print(f"Vocabulary Size: {vocab_size}")
print(f"Embedding Dimension: {embedding_dim}")

Vocabulary Size: 151936
Embedding Dimension: 1536


In [None]:
first_token_id = torch.tensor([input_ids[0]], dtype=torch.long).to(model.device)
first_token_embedding = embedding_layer(first_token_id)

print(f"Shape: {first_token_embedding.shape}")
print(first_token_embedding)

Shape: torch.Size([1, 1536])
tensor([[ 0.0160,  0.0116, -0.0067,  ..., -0.0078, -0.0069,  0.0004]],
       device='cuda:0', dtype=torch.bfloat16, grad_fn=<EmbeddingBackward0>)


## Dataset

In [None]:
dataset = load_dataset("stanfordnlp/sst2")

In [None]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 872
    })
    test: Dataset({
        features: ['idx', 'sentence', 'label'],
        num_rows: 1821
    })
})


In [None]:
train_dataset = dataset['train'].shuffle(seed=42).select(range(1000))
validation_dataset = dataset['validation'].shuffle(seed=42).select(range(200))
print("\nSample from training set:")
print(train_dataset[5])


Sample from training set:
{'idx': 66852, 'sentence': 'wise and powerful ', 'label': 1}


In [None]:
label_map = {0: "negative", 1: "positive"}

## Zero-Shot Evaluation (Baseline)

In [None]:
def create_zero_shot_prompt(sentence):

    messages = [
        {"role": "system", "content": "You are a helpful assistant that classifies the sentiment of a sentence."},
        {"role": "user", "content": f"Classify the sentiment of the following sentence as 'positive' or 'negative': '{sentence}'"}
    ]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [None]:
def evaluate_model(model_to_eval, eval_dataset):

    predictions = []
    references = []

    for example in eval_dataset:

        sentence = example['sentence']
        true_label_text = label_map[example['label']]

        prompt = create_zero_shot_prompt(sentence)
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model_to_eval.generate(
                **inputs, max_new_tokens=10, pad_token_id=tokenizer.eos_token_id
            )

        response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).lower().strip()

        if "positive" in response:
            predicted_label_text = "positive"
        elif "negative" in response:
            predicted_label_text = "negative"
        else: # fallback
            predicted_label_text = "unknown"

        predictions.append(predicted_label_text)
        references.append(true_label_text)

    accuracy = accuracy_score(references, predictions)
    return {"accuracy": accuracy}

In [None]:
baseline_metrics = evaluate_model(model, validation_dataset)
print("\nBaseline (Zero-Shot) Performance:")
for metric, value in baseline_metrics.items():
    print(f"{metric.capitalize()}: {value:.4f}")


Baseline (Zero-Shot) Performance:
Accuracy: 0.8400


## Prepare Dataset for Training

In [None]:
def format_dataset_for_finetuning(example):

    sentence = example['sentence']
    sentiment = label_map[example['label']]

    messages = [
        {"role": "system", "content": "You are a helpful assistant that classifies the sentiment of a sentence."},
        {"role": "user", "content": f"Classify the sentiment of the following sentence as 'positive' or 'negative': '{sentence}'"},
        {"role": "assistant", "content": sentiment}
    ]

    return {"text": tokenizer.apply_chat_template(messages, tokenize=False)}

In [None]:
formatted_train_dataset = train_dataset.map(format_dataset_for_finetuning)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

## LoRA Fine-Tuning

In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

training_args = SFTConfig(
    output_dir="./qwen2-sst2-finetuned",
    per_device_train_batch_size=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="epoch",
    bf16=True,
    report_to="none",
)

trainer = SFTTrainer(
    model=model,
    train_dataset=formatted_train_dataset,
    peft_config=lora_config,
    args=training_args
)

print("\nStarting training...")
trainer.train();

Adding EOS to train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

{'loss': 2.0169, 'grad_norm': 1.5316780805587769, 'learning_rate': 0.00019047619047619048, 'num_tokens': 9661.0, 'mean_token_accuracy': 0.6833175480365753, 'epoch': 0.16}
{'loss': 0.8264, 'grad_norm': 1.5572799444198608, 'learning_rate': 0.0001798941798941799, 'num_tokens': 19206.0, 'mean_token_accuracy': 0.8557792231440544, 'epoch': 0.32}
{'loss': 0.783, 'grad_norm': 0.5066453218460083, 'learning_rate': 0.00016931216931216931, 'num_tokens': 28805.0, 'mean_token_accuracy': 0.864188614487648, 'epoch': 0.48}
{'loss': 0.7707, 'grad_norm': 0.4520634114742279, 'learning_rate': 0.00015873015873015873, 'num_tokens': 38444.0, 'mean_token_accuracy': 0.8647681847214699, 'epoch': 0.64}
{'loss': 0.7885, 'grad_norm': 0.4665123224258423, 'learning_rate': 0.00014814814814814815, 'num_tokens': 48106.0, 'mean_token_accuracy': 0.8624139562249183, 'epoch': 0.8}
{'loss': 0.7556, 'grad_norm': 0.37084922194480896, 'learning_rate': 0.00013756613756613756, 'num_tokens': 57652.0, 'mean_token_accuracy': 0.86893

TrainOutput(global_step=189, training_loss=0.8141778537205288, metrics={'train_runtime': 699.7265, 'train_samples_per_second': 4.287, 'train_steps_per_second': 0.27, 'total_flos': 1659707257294848.0, 'train_loss': 0.8141778537205288})

In [None]:
finetuned_metrics = evaluate_model(trainer.model, validation_dataset)
print("\nFine-Tuned Model Performance:")
for metric, value in finetuned_metrics.items():
    print(f"{metric.capitalize()}: {value:.4f}")


Fine-Tuned Model Performance:
Accuracy: 0.9450
