In [None]:
pip install datasets evaluate torch peft

In [None]:
pip install transformers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
output_dir = "/content/drive/MyDrive/Cognizant/results"

# Lightweight Fine-Tuning Project

TODO: In this cell, describe your choices for each of the following

* PEFT technique: LoRA
* Model: GPT-2
* Evaluation approach: Evaluate method with a Hugging Face Trainer
* Fine-tuning dataset: climatebert/environmental_claims

## Loading and Evaluating a Foundation Model


TODO: In the cells below, load your chosen pre-trained Hugging Face model and evaluate its performance prior to fine-tuning. This step includes loading an appropriate tokenizer and dataset.

In [None]:
from transformers import GPT2Config, GPT2Tokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset
import numpy as np
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


from peft import LoraConfig, get_peft_model, TaskType, AutoPeftModelForSequenceClassification
random.seed(10)

In [None]:
import json

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForSequenceClassification.from_pretrained('gpt2',
                                                               num_labels=2,
                                                               id2label={0: "no", 1: "yes"},
                                                               label2id={"no": 0, "yes": 1})
# Model recognizes padding
model.config.pad_token_id = model.config.eos_token_id

# Load the dataset
splits = ["train", "validation"]
dataset = {split: load_dataset("climatebert/environmental_claims", split=split) for split in splits}


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading readme:   0%|          | 0.00/4.25k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/215k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/28.9k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/28.5k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2117 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/265 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/265 [00:00<?, ? examples/s]

In [None]:
for split in splits:
    print(f"Columns in {split} dataset:", dataset[split].column_names)

Columns in train dataset: ['text', 'label']
Columns in validation dataset: ['text', 'label']


In [None]:
def preprocess_function(examples):

    print("Original text length:", [len(text) for text in examples['text'][:5]])
    tokenized = tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

    print("Tokenized input_ids length:", [len(ids) for ids in tokenized['input_ids'][:5]])
    return tokenized

encoded_dataset = {split: dataset[split].map(preprocess_function, batched=True) for split in splits}


Map:   0%|          | 0/2117 [00:00<?, ? examples/s]

Original text length: [142, 94, 147, 162, 130]
Tokenized input_ids length: [128, 128, 128, 128, 128]
Original text length: [166, 212, 150, 166, 81]
Tokenized input_ids length: [128, 128, 128, 128, 128]
Original text length: [78, 205, 223, 168, 92]
Tokenized input_ids length: [128, 128, 128, 128, 128]


Map:   0%|          | 0/265 [00:00<?, ? examples/s]

Original text length: [96, 158, 165, 262, 228]
Tokenized input_ids length: [128, 128, 128, 128, 128]


In [None]:
for split in splits:
    print(f"Lengths of tokenized sequences in {split} dataset:")
    for i in range(5):  # Print lengths for the first 5 examples
        print(len(encoded_dataset[split][i]['input_ids']))


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='binary', pos_label=1)
    recall = recall_score(labels, preds, average='binary', pos_label=1)
    f1 = f1_score(labels, preds, average='binary', pos_label=1)

    tn, fp, fn, tp = confusion_matrix(labels, preds).ravel()
    specificity = tn / (tn + fp)

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,  # Sensitivity
        "specificity": specificity,
        "f1": f1
    }

Lengths of tokenized sequences in train dataset:
128
128
128
128
128
Lengths of tokenized sequences in validation dataset:
128
128
128
128
128


In [None]:

training_args = TrainingArguments(
    output_dir= output_dir,
    evaluation_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    compute_metrics=compute_metrics,
)

# Evaluate the model
eval_result = trainer.evaluate()
print(f"Evaluation result: {eval_result}")

eval_results_file = f"{output_dir}/evaluation_initial_results.json"
with open(eval_results_file, 'w') as f:
    json.dump(eval_result, f)
print(f"Evaluation results saved to: {eval_results_file}")




Evaluation result: {'eval_loss': 2.088829517364502, 'eval_accuracy': 0.2490566037735849, 'eval_precision': 0.2490566037735849, 'eval_recall': 1.0, 'eval_specificity': 0.0, 'eval_f1': 0.3987915407854985, 'eval_runtime': 3.8431, 'eval_samples_per_second': 68.955, 'eval_steps_per_second': 8.847}
Evaluation results saved to: /content/drive/MyDrive/Cognizant/results/evaluation_initial_results.json


## Performing Parameter-Efficient Fine-Tuning


TODO: In the cells below, create a PEFT model from your loaded model, run a training loop, and save the PEFT model weights.





In [None]:
# Configure LoRA
config = LoraConfig(
    r=10,  # Rank
    lora_alpha=32,
    target_modules=['c_attn', 'c_proj'],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS
)

# Create PEFT model
peft_model = get_peft_model(model, config)
peft_model.print_trainable_parameters()

# Initialize the Trainer with the PEFT model
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    compute_metrics=compute_metrics,
)

trainable params: 1,015,296 || all params: 125,456,640 || trainable%: 0.8093




In [None]:
# Train the model
trainer.train()

# Evaluate the model
eval_result_finetuned = trainer.evaluate()
print(f"Evaluation result: {eval_result_finetuned}")

# Save the PEFT model weights

eval_results_file_finetuned = f"{output_dir}/evaluation_finetuned_results.json"
with open(eval_results_file_finetuned, 'w') as f:
  json.dump(eval_result_finetuned, f)
print(f"Evaluation results saved to: {eval_results_file_finetuned}")

# Save the peft_model to the specified directory in Google Drive
peft_model.save_pretrained(f'{output_dir}/peft_model')
print(f"peft_model saved to: {output_dir}/peft_model")


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,Specificity,F1
1,No log,0.244882,0.898113,0.791045,0.80303,0.929648,0.796992
2,0.264600,0.245697,0.89434,0.779412,0.80303,0.924623,0.791045
3,0.264600,0.245429,0.89434,0.779412,0.80303,0.924623,0.791045


Evaluation result: {'eval_loss': 0.24542857706546783, 'eval_accuracy': 0.8943396226415095, 'eval_precision': 0.7794117647058824, 'eval_recall': 0.803030303030303, 'eval_specificity': 0.9246231155778895, 'eval_f1': 0.7910447761194029, 'eval_runtime': 2.3657, 'eval_samples_per_second': 112.016, 'eval_steps_per_second': 14.372, 'epoch': 3.0}
Evaluation results saved to: /content/drive/MyDrive/Cognizant/results/evaluation_finetuned_results.json
peft_model saved to: /content/drive/MyDrive/Cognizant/results/peft_model


As we increase the number of epochs during the fine-tuning process, we observe notable improvements in several key performance metrics. Specifically, accuracy, precision, and specificity show significant enhancements. This indicates that the model is becoming better at correctly classifying the input data, distinguishing between different classes with higher precision, and reducing the number of false positives.

Moreover, after fine-tuning, the model exhibits higher accuracy and lower training loss compared to its performance before fine-tuning. The increase in accuracy reflects the model's improved ability to generalize from the training data to unseen data. The reduction in training loss signifies that the model's predictions are becoming more aligned with the actual labels during training, indicating a better fit to the training data.

## Performing Inference with a PEFT Model

TODO: In the cells below, load the saved PEFT model weights and evaluate the performance of the trained PEFT model. Be sure to compare the results to the results from prior to fine-tuning.

In [None]:
# Load the tokenizer and the PEFT model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Update the path to the correct directory containing the PEFT model
peft_model = AutoPeftModelForSequenceClassification.from_pretrained(f'{output_dir}/peft_model', # Changed this line
                                                                num_labels=2,
                                                                id2label={0: "no", 1: "yes"},
                                                                label2id={"no": 0, "yes": 1})
peft_model.config.pad_token_id = peft_model.config.eos_token_id

# Re-setup the Trainer with the PEFT model
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    compute_metrics=compute_metrics,
)

# Evaluate the PEFT model
peft_eval_result = trainer.evaluate()
print(f"PEFT model evaluation result: {peft_eval_result}")

# Compare the results
initial_eval_accuracy = eval_result['eval_accuracy']
finetuned_eval_accuracy = eval_result_finetuned['eval_accuracy']
peft_eval_accuracy = peft_eval_result['eval_accuracy']

print(f"Initial model evaluation accuracy: {initial_eval_accuracy}")
print(f"Model evaluation after fine-tuning accuracy: {finetuned_eval_accuracy}")
print(f"PEFT model evaluation accuracy: {peft_eval_accuracy}")

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PEFT model evaluation result: {'eval_loss': 0.24542857706546783, 'eval_accuracy': 0.8943396226415095, 'eval_precision': 0.7794117647058824, 'eval_recall': 0.803030303030303, 'eval_specificity': 0.9246231155778895, 'eval_f1': 0.7910447761194029, 'eval_runtime': 2.6367, 'eval_samples_per_second': 100.504, 'eval_steps_per_second': 12.895}
Initial model evaluation accuracy: 0.2490566037735849
Model evaluation after fine-tuning accuracy: 0.8943396226415095
PEFT model evaluation accuracy: 0.8943396226415095


After fine-tuning, the model achieves higher accuracy compared to its initial performance before fine-tuning. This improvement demonstrates that the fine-tuning process effectively adapts the pre-trained model to our specific task, enhancing its ability to make correct predictions on the given dataset. The increased accuracy indicates that the model has learned to better understand the nuances and patterns within the data, resulting in more reliable and precise classification outcomes.