# LoRA Fine-tuning: DistilBERT on Financial PhraseBank

This notebook implements parameter-efficient fine-tuning using LoRA (Low-Rank Adaptation) for sentiment classification on financial news.

In [1]:
import torch
from datasets import load_from_disk
from transformers import (
    AutoModelForSequenceClassification,
    TrainingArguments, 
    Trainer
)
from peft import LoraConfig, get_peft_model, TaskType
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load preprocessed datasets
train_dataset = load_from_disk('processed_data/train')
val_dataset = load_from_disk('processed_data/validation')
test_dataset = load_from_disk('processed_data/test')

# Load label mappings
with open('processed_data/label_mappings.json', 'r') as f:
    label_mappings = json.load(f)
    id2label = {int(k): v for k, v in label_mappings['id2label'].items()}
    label2id = label_mappings['label2id']

In [3]:
# Load base model
model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=3,
    id2label=id2label,
    label2id=label2id
)

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using device: cpu


In [4]:
# Check parameters BEFORE LoRA
total_params_before = sum(p.numel() for p in model.parameters())
trainable_params_before = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Before LoRA:")
print(f"  Total params: {total_params_before:,}")
print(f"  Trainable params: {trainable_params_before:,}")
print(f"  Trainable %: {100 * trainable_params_before / total_params_before:.2f}%")

Before LoRA:
  Total params: 66,955,779
  Trainable params: 66,955,779
  Trainable %: 100.00%


In [5]:
# Configure LoRA
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,                              # Rank of low-rank matrices
    lora_alpha=16,                    # Scaling factor
    lora_dropout=0.1,                 # Dropout probability
    bias="none",
    target_modules=["q_lin", "v_lin"], # Target attention layers in DistilBERT
    inference_mode=False
)

print(f"\nLoRA Configuration:")
print(f"  Rank (r): {lora_config.r}")
print(f"  Alpha: {lora_config.lora_alpha}")
print(f"  Target modules: {lora_config.target_modules}")


LoRA Configuration:
  Rank (r): 8
  Alpha: 16
  Target modules: {'q_lin', 'v_lin'}


In [6]:
# Apply LoRA to model (freezes base model, adds trainable adapters)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 740,355 || all params: 67,696,134 || trainable%: 1.0936


In [7]:
# Verify parameters AFTER LoRA (should be <1% trainable)
total_params_after = sum(p.numel() for p in model.parameters())
trainable_params_after = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"\nAfter LoRA:")
print(f"  Total params: {total_params_after:,}")
print(f"  Trainable params: {trainable_params_after:,}")
print(f"  Trainable %: {100 * trainable_params_after / total_params_after:.4f}%")
print(f"\n✓ Only {trainable_params_after:,} parameters updated (LoRA adapters)")


After LoRA:
  Total params: 67,696,134
  Trainable params: 740,355
  Trainable %: 1.0936%

✓ Only 740,355 parameters updated (LoRA adapters)


In [8]:
# Define metrics computation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_score(labels, predictions)
    f1_macro = f1_score(labels, predictions, average='macro')
    f1_weighted = f1_score(labels, predictions, average='weighted')
    precision = precision_score(labels, predictions, average='macro')
    recall = recall_score(labels, predictions, average='macro')
    
    return {
        'accuracy': accuracy,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted,
        'precision': precision,
        'recall': recall
    }

In [9]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./lora_results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,               # Higher LR for LoRA adapters
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    save_total_limit=2,
    seed=42
)

In [10]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

In [11]:
# Train with LoRA (only adapter parameters updated)
train_result = trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,F1 Weighted,Precision,Recall
1,0.471,0.445777,0.814433,0.806267,0.818212,0.79211,0.833785
2,0.3459,0.362322,0.85567,0.846684,0.856957,0.833524,0.862961
3,0.3342,0.355123,0.85567,0.845132,0.856344,0.8289,0.865107




In [12]:
# Evaluate on validation set
val_results = trainer.evaluate()
print("Validation Results:")
for key, value in val_results.items():
    print(f"  {key}: {value:.4f}")



Validation Results:
  eval_loss: 0.3623
  eval_accuracy: 0.8557
  eval_f1_macro: 0.8467
  eval_f1_weighted: 0.8570
  eval_precision: 0.8335
  eval_recall: 0.8630
  eval_runtime: 10.2404
  eval_samples_per_second: 47.3610
  eval_steps_per_second: 3.0270
  epoch: 3.0000


In [13]:
# Evaluate on test set
test_results = trainer.evaluate(test_dataset)
print("\nTest Results:")
for key, value in test_results.items():
    print(f"  {key}: {value:.4f}")




Test Results:
  eval_loss: 0.3535
  eval_accuracy: 0.8474
  eval_f1_macro: 0.8258
  eval_f1_weighted: 0.8486
  eval_precision: 0.8118
  eval_recall: 0.8429
  eval_runtime: 10.5021
  eval_samples_per_second: 46.1810
  eval_steps_per_second: 2.9520
  epoch: 3.0000


In [14]:
# Get predictions for confusion matrix
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
true_labels = predictions.label_ids

# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(true_labels, pred_labels)
print("\nConfusion Matrix:")
print(cm)

print("\nClassification Report:")
print(classification_report(true_labels, pred_labels, target_names=list(id2label.values())))




Confusion Matrix:
[[ 51   5   4]
 [ 15 248  25]
 [  3  22 112]]

Classification Report:
              precision    recall  f1-score   support

    negative       0.74      0.85      0.79        60
     neutral       0.90      0.86      0.88       288
    positive       0.79      0.82      0.81       137

    accuracy                           0.85       485
   macro avg       0.81      0.84      0.83       485
weighted avg       0.85      0.85      0.85       485



In [16]:
# Save LoRA model and results
model.save_pretrained("./lora_model")

results_summary = {
    'validation': val_results,
    'test': test_results,
    'trainable_params': trainable_params_after,
    'total_params': total_params_after
}

with open('./lora_results/results_summary.json', 'w') as f:
    json.dump(results_summary, f, indent=2)