# Fine-Tuning LLM using LoRA for Sentiment Analysis
### Austin Irwin

In [1]:
from datasets import load_dataset, DatasetDict, Dataset

from transformers import (
    AutoTokenizer,
    AutoConfig, 
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer)

from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
import evaluate
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load dataset
dataset = load_dataset('shawhin/imdb-truncated')
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
})


## Model

In [3]:
model_id = 'distilbert-base-uncased'

# define label maps
id2label = {0: 'Negative', 1: 'Positive'}
label2id = {'Negative':0, 'Positive':1}

# generate classification model from model_id
model = AutoModelForSequenceClassification.from_pretrained(
    model_id, num_labels=2, id2label=id2label, label2id=label2id)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Preprocessing

In [4]:
# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True)

# add pad token if none exists
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

# create tokenize function
def tokenize_function(examples): 
    # extract text
    text = examples['text']

    # tokenize and truncate text
    tokenizer.truncation_side = 'left'
    tokenized_inputs = tokenizer(
        text,
        return_tensors='np',
        truncation=True,
        max_length=512
    )
    return tokenized_inputs

In [5]:
# tokenize training and validation sets
tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset

Map: 100%|██████████| 1000/1000 [00:00<00:00, 4946.33 examples/s]


DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
})

In [6]:
# create data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

## Evaluation

In [7]:
# import accuracy evaluation metric
accuracy = evaluate.load('accuracy')

In [8]:
# define an evaluation function to pass into trainer later
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)

    return {'accuracy': accuracy.compute(predictions=predictions, references=labels)}

## Apply Untrained Model to Text

In [9]:
# define list of examples
text_list = ["It was good.", "Not a fan, don't recommed.", "Better than the first one.", "This is not worth watching even once.", "This one is a pass."]

print("Untrained model predictions:")
print("----------------------------")
for text in text_list:
    # tokenize text
    inputs = tokenizer.encode(text, return_tensors="pt")
    # compute logits
    logits = model(inputs).logits
    # convert logits to label
    predictions = torch.argmax(logits)

    print(text + " - " + id2label[predictions.tolist()])

Untrained model predictions:
----------------------------
It was good. - Negative
Not a fan, don't recommed. - Negative
Better than the first one. - Negative
This is not worth watching even once. - Negative
This one is a pass. - Negative


## Train Model

In [10]:
peft_config = LoraConfig(task_type='SEQ_CLS',
                         r=4,
                         lora_alpha=32,
                         lora_dropout=0.01,
                         target_modules=['q_lin'])

peft_config

LoraConfig(task_type='SEQ_CLS', peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, inference_mode=False, r=4, target_modules={'q_lin'}, exclude_modules=None, lora_alpha=32, lora_dropout=0.01, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', trainable_token_indices=None, loftq_config={}, eva_config=None, corda_config=None, use_dora=False, use_qalora=False, qalora_group_size=16, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False, target_parameters=None)

In [11]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 628,994 || all params: 67,584,004 || trainable%: 0.9307


In [12]:
# define hyperparameters
lr = 1e-3
batch_size = 4
num_epochs = 10

In [13]:
# define training arguments
training_args = TrainingArguments(
    output_dir= model_id + '-lora-text-classification',
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
)

In [None]:
# create trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# train the model
trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.414962,{'accuracy': 0.878}
2,0.431500,0.480642,{'accuracy': 0.859}
3,0.431500,0.65579,{'accuracy': 0.883}
4,0.210500,0.740795,{'accuracy': 0.869}
5,0.210500,0.800289,{'accuracy': 0.873}
6,0.072700,0.876718,{'accuracy': 0.879}
7,0.072700,1.01957,{'accuracy': 0.879}
8,0.032900,1.012518,{'accuracy': 0.884}
9,0.032900,1.048725,{'accuracy': 0.88}
10,0.013000,1.060838,{'accuracy': 0.882}




TrainOutput(global_step=2500, training_loss=0.15212634925842286, metrics={'train_runtime': 2420.8018, 'train_samples_per_second': 4.131, 'train_steps_per_second': 1.033, 'total_flos': 1112883852759936.0, 'train_loss': 0.15212634925842286, 'epoch': 10.0})

## Generate Predictions

In [15]:
model.to('mps')

print('Trained model predictions:')
print('--------------------------')
for text in text_list:
    inputs = tokenizer.encode(text, return_tensors='pt').to('mps')

    logits = model(inputs).logits
    predictions = torch.max(logits,1).indices

    print(text + ' - ' + id2label[predictions.tolist()[0]])

Trained model predictions:
--------------------------
It was good. - Positive
Not a fan, don't recommed. - Negative
Better than the first one. - Positive
This is not worth watching even once. - Negative
This one is a pass. - Negative
