# LoRA Implementation



# Import Libraries
First, we need to import the necessary libraries for our implementation. These include PyTorch for building and training the model, the Transformers library for using pre-trained BERT, and the Datasets library for loading the SST-2 dataset.

In [None]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
from datasets import load_dataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler


## Define LoRA Layer and Model
Here, we define the LoRALayer class, which introduces low-rank adaptation by adding trainable low-rank matrices to the existing weights. We also define the LoRABertModel class that integrates the LoRALayer into a pre-trained BERT model and adds a classification head.


In [None]:
class LoRALayer(nn.Module):
    def __init__(self, in_features, out_features, rank):
        super(LoRALayer, self).__init__()
        self.rank = rank
        self.lora_a = nn.Parameter(torch.randn(in_features, rank))
        self.lora_b = nn.Parameter(torch.randn(rank, out_features))

    def forward(self, x):
        return x + (x @ self.lora_a @ self.lora_b)

class LoRABertModel(nn.Module):
    def __init__(self, model_name, rank):
        super(LoRABertModel, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.lora = LoRALayer(self.bert.config.hidden_size, self.bert.config.hidden_size, rank)
        self.classifier = nn.Linear(self.bert.config.hidden_size, 2)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state
        lora_output = self.lora(last_hidden_state[:, 0, :])  # Use [CLS] token representation
        logits = self.classifier(lora_output)
        return logits


## Initialize Tokenizer and Model
We initialize the BERT tokenizer and the LoRABertModel. The tokenizer will preprocess the text data, and the model will be used for training and evaluation.


In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = LoRABertModel("bert-base-uncased", rank=4)


## Load and Prepare Dataset
We load the SST-2 dataset from the GLUE benchmark using the Datasets library. The dataset is then tokenized and formatted for use in PyTorch DataLoader.


In [None]:
optimizer = AdamW(model.parameters(), lr=2e-5)
total_steps = len(train_dataloader) * 3  # 3 epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(3):
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        outputs = model(input_ids, attention_mask)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        total_loss += loss.item()
        optimizer.step()
        scheduler.step()
    
    avg_train_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1}, Loss: {avg_train_loss}")


## Training Loop
We define the training loop for the model. This includes setting up the optimizer, learning rate scheduler, and training the model for a specified number of epochs. The training loss is printed at the end of each epoch.


In [None]:
optimizer = AdamW(model.parameters(), lr=2e-5)
total_steps = len(train_dataloader) * 3  # 3 epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(3):
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        outputs = model(input_ids, attention_mask)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        total_loss += loss.item()
        optimizer.step()
        scheduler.step()
    
    avg_train_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1}, Loss: {avg_train_loss}")


## Validation Loop
We evaluate the model on the validation set to measure its accuracy. The number of correct predictions is compared to the total number of samples to calculate the accuracy.


In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in val_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        outputs = model(input_ids, attention_mask)
        predictions = torch.argmax(outputs, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)

accuracy = correct / total
print(f"Validation Accuracy: {accuracy}")
