# Lightweight Fine-Tuning Project

## Loading and Evaluation a Foundation Model

In [3]:
#installing necessary packages in Colab environment
!pip install datasets
!pip install transformers[torch]
!pip install accelerate -U
!pip install peft








In [4]:
#necessary imports
from transformers import GPT2ForSequenceClassification, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorWithPadding, AutoModelForCausalLM, AutoModelForSequenceClassification
from datasets import load_dataset, load_metric
import numpy as np
from peft import LoftQConfig, LoraConfig, get_peft_model, TaskType

In [13]:
# Load reduced dataset for faster processing
dataset = load_dataset("amazon_polarity", split='train[:50]').train_test_split(
    test_size=0.5, shuffle=True, seed=23 #selecting higher split ratio for faster processing
    )

splits = ["train", "test"]

model_name = "gpt2" # working with gpt2 as a model
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [14]:


# Lambda function to tokenize all the examples
tokenized_dataset = {}
for split in splits:
    tokenized_dataset[split] = dataset[split].map(
        lambda x: tokenizer(x["content"], truncation=True, padding=True), batched=True #truncation and padding True for GPT
    )
tokenized_dataset["train"]

#converting dataset to Torch Tensor and define expected col labels
tokenized_dataset['train'].set_format('torch', columns=['label', 'input_ids', 'attention_mask'])
tokenized_dataset['test'].set_format('torch', columns=['label', 'input_ids', 'attention_mask'])

In [15]:
# Load GPT-2 model - not optimal for sentiment analysis, but used for exercise purposes

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    id2label={0: "bad review", 1: "good review"},
    label2id={"bad review": 0, "good review": 1},
    pad_token_id=tokenizer.eos_token_id,  # Set pad token id

)

# Unfreeze all the model parameters.
for param in model.parameters():
    param.requires_grad = True

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
# Define training arguments for the trainer
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    num_train_epochs=2,
    weight_decay=0.01,
    per_device_train_batch_size=128,
    logging_dir='./logs',
    remove_unused_columns=False
)

def compute_metrics(eval_pred):
    # Compute accuracy
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": (predictions == labels).mean()}

In [17]:
metric = load_metric("accuracy")

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"]
)


trainer.train()

# Evaluate the model
evaluation_results = trainer.evaluate()

print(evaluation_results)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,2.602645,0.48
2,No log,2.032965,0.48


{'eval_loss': 2.0329651832580566, 'eval_accuracy': 0.48, 'eval_runtime': 4.4778, 'eval_samples_per_second': 5.583, 'eval_steps_per_second': 0.893, 'epoch': 2.0}


## Performing PEFT

In [18]:

# Create a PEFT Config for LoRA
config = LoraConfig(
r=8, # Rank
lora_alpha=32,
target_modules=['c_attn', 'c_proj'],#Assign correct layers (see model)
lora_dropout=0.1,
bias="none",
task_type=TaskType.SEQ_CLS
)

lora_model = get_peft_model(model, config)







In [19]:
# Initialize the Trainer with the PEFT model
trainer = Trainer(
    model=lora_model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics
)



# Training loop with at least one epoch
trainer.train()

# Save the trained PEFT model
lora_model.save_pretrained("gpt-lora")

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.992417,0.48
2,No log,1.971583,0.48


## Performing Inference with a PEFT model

In [20]:
# Evaluate the original pre-trained model
evaluation_results_before_finetuning = trainer.evaluate()

# Load the saved PEFT model weights
peft_model = GPT2ForSequenceClassification.from_pretrained('./gpt-lora')
peft_model.config.pad_token_id = tokenizer.pad_token_id

# Initialize the Trainer with the PEFT model
trainer.model = peft_model

# Evaluate the PEFT model after fine-tuning
evaluation_results_after_finetuning = trainer.evaluate()

# Compare the resultsSche
print("Results before fine-tuning: ", evaluation_results_before_finetuning)
print("Results after fine-tuning: ", evaluation_results_after_finetuning)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Results before fine-tuning:  {'eval_loss': 1.9715827703475952, 'eval_accuracy': 0.48, 'eval_runtime': 5.2821, 'eval_samples_per_second': 4.733, 'eval_steps_per_second': 0.757, 'epoch': 2.0}
Results after fine-tuning:  {'eval_loss': 3.5907483100891113, 'eval_accuracy': 0.48, 'eval_runtime': 4.7544, 'eval_samples_per_second': 5.258, 'eval_steps_per_second': 0.841, 'epoch': 2.0}


The PEFT fine tuned model actually performs worse in terms of evaluation loss. This is suprising as the saved pre-trained model (GPT Lora) has shown slightly better performance. Possibly, the difference in performance is based on random variation given the small size of the dataset.