<a href="https://colab.research.google.com/github/danjshaw/ece57000-finalProject/blob/main/lora-bert/source/lora-bert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initial Setup

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import os
if not os.path.exists('/content/drive/MyDrive/lora-bert/'):
    os.makedirs('/content/drive/MyDrive/lora-bert/')
output_dir = '/content/drive/MyDrive/lora-bert/'

In [5]:
!pip install datasets
!pip install evaluate
!pip install codecarbon
!pip freeze > /content/drive/MyDrive/lora-bert-tiny/requirements.txt



In [6]:
import torch
import torch.nn as nn
import numpy as np
from codecarbon import EmissionsTracker

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [8]:
from transformers import set_seed
set_seed(0)

In [9]:
def get_trainable_parameters(model):
  trainable_parameters = 0
  parameters = 0
  for param in model.parameters():
    parameters += param.numel()
    if param.requires_grad:
      trainable_parameters += param.numel()
  return {'total_parameters': parameters, 'trainable_parameters': trainable_parameters}

In [10]:
import csv
def write_results_to_csv(file_name, results):
  with open(file_name, 'w', newline='') as csvfile:
      writer = csv.DictWriter(csvfile, fieldnames=list(results[0].keys()))
      writer.writeheader()
      for result in results:
        writer.writerow(result)

In [11]:
def get_model_name(name_and_path):
  return name_and_path.split('/')[1]

# Full Fine-Tuning



Followed examples from this [Hugging Face NLP course](https://huggingface.co/learn/nlp-course/chapter3/3?fw=pt#fine-tuning-a-model-with-the-trainer-api) on how to use the trainer API for fine-tuning.

In [12]:
from transformers import AutoTokenizer, DataCollatorWithPadding
from datasets import load_dataset
import evaluate

def fine_tune(checkpoint, epochs, batch_size, learning_rate):
  model_name = checkpoint
  if '/' in checkpoint:
    model_name = get_model_name(checkpoint)

  # Initialize result
  result = {"batch_size": batch_size, "learning_rate": learning_rate}

  # Track emissions
  tracker = EmissionsTracker('fine-tuning-' + model_name, save_to_file=False)
  try:
    tracker.start()

    # Setup model and dataset
    raw_datasets = load_dataset("glue", "mrpc")
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)

    def tokenize_function(example):
        return tokenizer(example["sentence1"], example["sentence2"])

    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    def compute_metrics(eval_preds):
        metric = evaluate.load("glue", "mrpc")
        logits, labels = eval_preds
        predictions = np.argmax(logits, axis=-1)
        return metric.compute(predictions=predictions, references=labels)

    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2).to(device)

    # Setup the trainer
    training_args = TrainingArguments(
        "fine-tuning-trainer",
        eval_strategy="epoch",
        per_device_eval_batch_size=batch_size,
        per_device_train_batch_size=batch_size,
        num_train_epochs=epochs,
        learning_rate=batch_size,
        disable_tqdm=True,
        report_to="none"
    )
    trainer = Trainer(
        model,
        training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["validation"],
        data_collator=data_collator,
        processing_class=tokenizer,
        compute_metrics=compute_metrics
    )

    # Train the model
    result |= trainer.train().metrics

    # Stop tracking emissions
    tracker.stop()

    # Store metrics and delete the tracker
    result |= trainer.evaluate() | get_trainable_parameters(model) | tracker.final_emissions_data.values
  finally:
    del tracker

  return result

In [13]:
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, set_seed
import time

def hyperparameter_tune(checkpoint, batch_sizes, learning_rates):
  model_name = checkpoint
  if '/' in checkpoint:
    model_name = get_model_name(checkpoint)

  ft_results = []
  progress = 0
  iterations = len(batch_sizes)*len(learning_rates)

  # Hyperparameter tuning
  start_time = time.time()
  for size in batch_sizes:
    for rate in learning_rates:
      # Track progress
      progress += 1
      print(f"Progress: {progress}/{iterations}")

      # Fine-tune the model and store the results
      ft_result = fine_tune(checkpoint, epochs, size, rate)
      ft_results.append(ft_result)

  write_results_to_csv(output_dir + model_name + '-ft-results.csv', ft_results)

  end_time = time.time()

  runtime_seconds = end_time - start_time
  runtime_minutes = runtime_seconds / 60

  # Output the best result
  max_ft_result = ft_results[0]
  for _, result in enumerate(ft_results):
    if result['eval_f1'] > max_ft_result['eval_f1']:
      max_ft_result = result
  print(f'\n================ \
          \nTotal Runtime: {runtime_minutes} minutes \
          \nBest Result: \
          \n\tF1={max_ft_result["eval_f1"]} \
          \n\tBatch Size={max_ft_result["batch_size"]} \
          \n\tLearning Rate={max_ft_result["learning_rate"]}'
  )

  return ft_results

# Low-Rank Adaptation (LoRA)

In [14]:
class LoraModule(nn.Module):
  def __init__(self, in_features, out_features, rank, alpha):
    super().__init__()
    self.scale = alpha / rank
    self.A = nn.Parameter(torch.randn(in_features, rank))
    self.B = nn.Parameter(torch.zeros(rank, out_features))

  def forward(self, x):
    return (self.scale * (x @ self.A @ self.B))

class LoraLinear(nn.Module):
  def __init__(self, linear, rank, alpha):
    super().__init__()
    if (isinstance(linear, LoraLinear)):
      self.linear = linear.linear
      self.lora = LoraModule(self.linear.in_features, self.linear.out_features, rank, alpha)

    else:
      self.linear = linear
      self.lora = LoraModule(self.linear.in_features, self.linear.out_features, rank, alpha)

  def forward(self, x):
    return self.linear(x) + self.lora(x)

In [15]:
def configure_lora_model(model, rank, alpha):
  # Replace the query and value linear layers with LoRA layers
  for _, layer in enumerate(model.bert.encoder.layer):
    s = layer.attention.self
    s.query = LoraLinear(s.query, rank, alpha)
    s.value = LoraLinear(s.value, rank, alpha)

  # Freeze the pre-trained weights
  for name, param in model.named_parameters():
    if 'A' in name or 'B' in name:
      param.requires_grad = True
    else:
      param.requires_grad = False

In [16]:
from transformers import Trainer, TrainingArguments, set_seed
import time

def lora_hyperparameter_tune(checkpoint, ranks, alphas, epochs, batch_sizes, learning_rates):
  model_name = checkpoint
  if '/' in checkpoint:
    model_name = get_model_name(checkpoint)

  # Setup model and dataset
  raw_datasets = load_dataset("glue", "mrpc")
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)

  def tokenize_function(example):
      return tokenizer(example["sentence1"], example["sentence2"])

  tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

  def compute_metrics(eval_preds):
      metric = evaluate.load("glue", "mrpc")
      logits, labels = eval_preds
      predictions = np.argmax(logits, axis=-1)
      return metric.compute(predictions=predictions, references=labels)

  model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2).to(device)

  lora_results = []
  progress = 0
  iterations = len(batch_sizes)*len(learning_rates)*len(ranks)*len(alphas)

  # Hyperparameter tuning
  start_time = time.time()
  for rank in ranks:
    for alpha in alphas:
      for size in batch_sizes:
        for rate in learning_rates:
          # Track progress
          progress += 1
          print(f"Progress: {progress}/{iterations}")

          # Initialize result
          result = {"rank": rank, "alpha": alpha, "batch_size": size, "learning_rate": rate}

          # Track emissions
          tracker = EmissionsTracker('lora-bert-tiny', save_to_file=False)
          try:
            tracker.start()

            # Configure an existing model with new LoRA layers
            configure_lora_model(model, rank, alpha)

            # Setup the trainer
            training_args = TrainingArguments(
                "lora-trainer",
                eval_strategy="epoch",
                per_device_eval_batch_size=size,
                per_device_train_batch_size=size,
                num_train_epochs=epochs,
                learning_rate=rate,
                disable_tqdm=True,
                report_to="none"
            )
            trainer = Trainer(
                model,
                training_args,
                train_dataset=tokenized_datasets["train"],
                eval_dataset=tokenized_datasets["validation"],
                data_collator=data_collator,
                processing_class=tokenizer,
                compute_metrics=compute_metrics
            )

            # Train the model
            result |= trainer.train().metrics

            # Stop tracking emissions
            tracker.stop()

            # Store metrics and delete the tracker
            result |= trainer.evaluate() | get_trainable_parameters(model) | tracker.final_emissions_data.values
          finally:
            del tracker

          lora_results.append(result)

  write_results_to_csv(output_dir + model_name +'-lora-results.csv', lora_results)

  end_time = time.time()

  runtime_seconds = end_time - start_time
  runtime_hours = runtime_seconds / 3600

  # Output the best result
  max_lora_result = lora_results[0]
  for _, result in enumerate(lora_results):
    if result['eval_f1'] > max_lora_result['eval_f1']:
      max_lora_result = result
  print(f'\n================ \
          \nTotal Runtime: {runtime_hours} hours \
          \nBest Result: \
          \n\tF1={max_lora_result["eval_f1"]} \
          \n\tRank={max_lora_result["rank"]} \
          \n\tAlpha={max_lora_result["alpha"]} \
          \n\tBatch Size={max_lora_result["batch_size"]} \
          \n\tLearning Rate={max_lora_result["learning_rate"]}'
  )

  return lora_results

# Run Program

In [17]:
models = ["prajjwal1/bert-tiny", "prajjwal1/bert-mini", "prajjwal1/bert-small"]

Hyperparameters from [google-research/bert](https://github.com/google-research/bert):



> For each task, we selected the best fine-tuning hyperparameters from the lists below, and trained for 4 epochs:
> * batch sizes: 8, 16, 32, 64, 128
> * learning rates: 3e-4, 1e-4, 5e-5, 3e-5



In [18]:
epochs = 4
batch_sizes = [8]#, 16, 32, 64, 128]
learning_rates = [3e-4]#, 1e-4, 5e-5, 3e-5]

In [19]:
ranks = [1]#, 2, 4, 8, 16]
alphas = [1]#, 2, 4, 8, 16]

In [None]:
for model_name in models:
  hyperparameter_tune(model_name, batch_sizes, learning_rates)
  lora_hyperparameter_tune(model_name, ranks, alphas, epochs, batch_sizes, learning_rates)

[codecarbon INFO @ 03:40:36] [setup] RAM Tracking...
[codecarbon INFO @ 03:40:36] [setup] GPU Tracking...
[codecarbon INFO @ 03:40:36] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 03:40:36] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU



Progress: 1/1


[codecarbon INFO @ 03:40:37] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 03:40:37] >>> Tracker's metadata:
[codecarbon INFO @ 03:40:37]   Platform system: Linux-6.1.85+-x86_64-with-glibc2.35
[codecarbon INFO @ 03:40:37]   Python version: 3.10.12
[codecarbon INFO @ 03:40:37]   CodeCarbon version: 2.7.4
[codecarbon INFO @ 03:40:37]   Available RAM : 50.994 GB
[codecarbon INFO @ 03:40:37]   CPU count: 8
[codecarbon INFO @ 03:40:37]   CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 03:40:37]   GPU count: 1
[codecarbon INFO @ 03:40:37]   GPU model: 1 x Tesla T4
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to 

{'eval_loss': nan, 'eval_accuracy': 0.3161764705882353, 'eval_f1': 0.0, 'eval_runtime': 1.2633, 'eval_samples_per_second': 322.963, 'eval_steps_per_second': 40.37, 'epoch': 1.0}
{'loss': 117.5354, 'grad_norm': nan, 'learning_rate': 5.821350762527233, 'epoch': 1.0893246187363834}
