# Fine-Tuning LLM using LoRA for Sentiment Analysis
### Austin Irwin

In [13]:
from datasets import load_dataset, DatasetDict, Dataset

from transformers import (
    AutoTokenizer,
    AutoConfig, 
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer)

from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
import evaluate
import torch
import numpy as np

In [14]:
# load dataset
dataset = load_dataset('shawhin/imdb-truncated')
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
})


## Model

In [15]:
model_id = 'distilbert-base-uncased'

# define label maps
id2label = {0: 'Negative', 1: 'Positive'}
label2id = {'Negative':0, 'Positive':1}

# generate classification model from model_id
model = AutoModelForSequenceClassification.from_pretrained(
    model_id, num_labels=2, id2label=id2label, label2id=label2id)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Preprocessing

In [16]:
# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True)

# add pad token if none exists
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

# create tokenize function
def tokenize_function(examples): 
    # extract text
    text = examples['text']

    # tokenize and truncate text
    tokenizer.truncation_side = 'right'
    tokenized_inputs = tokenizer(
        text,
        return_tensors='np',
        truncation=True,
        max_length=512
    )
    return tokenized_inputs

In [17]:
# tokenize training and validation sets
tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset

Map: 100%|██████████| 1000/1000 [00:00<00:00, 5611.36 examples/s]


DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
})

In [18]:
# create data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

## Evaluation

In [19]:
# import accuracy evaluation metric
accuracy = evaluate.load('accuracy')

In [20]:
# define an evaluation function to pass into trainer later
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)

    return {'accuracy': accuracy.compute(predictions=predictions, references=labels)}

## Apply Untrained Model to Text

In [21]:
# define list of examples
text_list = ["It was good.", "Not a fan, don't recommed.", "Better than the first one.", "This is not worth watching even once.", "This one is a pass."]

print("Untrained model predictions:")
print("----------------------------")
for text in text_list:
    # tokenize text
    inputs = tokenizer.encode(text, return_tensors="pt")
    # compute logits
    logits = model(inputs).logits
    # convert logits to label
    predictions = torch.argmax(logits)

    print(text + " - " + id2label[predictions.tolist()])

Untrained model predictions:
----------------------------
It was good. - Positive
Not a fan, don't recommed. - Positive
Better than the first one. - Positive
This is not worth watching even once. - Positive
This one is a pass. - Positive


## Train Model

In [22]:
peft_config = LoraConfig(task_type='SEQ_CLS',
                         r=4,
                         lora_alpha=32,
                         lora_dropout=0.01,
                         target_modules=['q_lin', 'v_lin'])

peft_config

LoraConfig(task_type='SEQ_CLS', peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, inference_mode=False, r=4, target_modules={'v_lin', 'q_lin'}, exclude_modules=None, lora_alpha=32, lora_dropout=0.01, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', trainable_token_indices=None, loftq_config={}, eva_config=None, corda_config=None, use_dora=False, use_qalora=False, qalora_group_size=16, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False, target_parameters=None)

In [23]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

trainable params: 665,858 || all params: 67,620,868 || trainable%: 0.9847
base_model.model.distilbert.transformer.layer.0.attention.q_lin.lora_A.default.weight
base_model.model.distilbert.transformer.layer.0.attention.q_lin.lora_B.default.weight
base_model.model.distilbert.transformer.layer.0.attention.v_lin.lora_A.default.weight
base_model.model.distilbert.transformer.layer.0.attention.v_lin.lora_B.default.weight
base_model.model.distilbert.transformer.layer.1.attention.q_lin.lora_A.default.weight
base_model.model.distilbert.transformer.layer.1.attention.q_lin.lora_B.default.weight
base_model.model.distilbert.transformer.layer.1.attention.v_lin.lora_A.default.weight
base_model.model.distilbert.transformer.layer.1.attention.v_lin.lora_B.default.weight
base_model.model.distilbert.transformer.layer.2.attention.q_lin.lora_A.default.weight
base_model.model.distilbert.transformer.layer.2.attention.q_lin.lora_B.default.weight
base_model.model.distilbert.transformer.layer.2.attention.v_lin.lo

In [24]:
# define hyperparameters
lr = 5e-5
batch_size = 4
num_epochs = 5

In [25]:
# define training arguments
training_args = TrainingArguments(
    output_dir= model_id + '-lora-text-classification',
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
)

In [26]:
# create trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# train the model
trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.325567,{'accuracy': 0.871}
2,0.459900,0.41643,{'accuracy': 0.858}
3,0.459900,0.360069,{'accuracy': 0.879}
4,0.374900,0.324521,{'accuracy': 0.897}
5,0.374900,0.325655,{'accuracy': 0.899}




TrainOutput(global_step=1250, training_loss=0.40467142333984374, metrics={'train_runtime': 1337.7675, 'train_samples_per_second': 3.738, 'train_steps_per_second': 0.934, 'total_flos': 557259681556032.0, 'train_loss': 0.40467142333984374, 'epoch': 5.0})

## Generate Predictions

In [15]:
model.to('mps')

print('Trained model predictions:')
print('--------------------------')
for text in text_list:
    inputs = tokenizer.encode(text, return_tensors='pt').to('mps')

    logits = model(inputs).logits
    predictions = torch.max(logits,1).indices

    print(text + ' - ' + id2label[predictions.tolist()[0]])

Trained model predictions:
--------------------------
It was good. - Positive
Not a fan, don't recommed. - Negative
Better than the first one. - Positive
This is not worth watching even once. - Negative
This one is a pass. - Negative
