In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
from evaluate import load
from peft import PeftModel
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm

In [3]:
torch.set_printoptions(threshold=torch.inf)  # Display all elements
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.device_count()

1

In [4]:
BASE_ID = "roberta-base"
base = AutoModelForSequenceClassification.from_pretrained(
           BASE_ID, num_labels=2, device_map="auto")

model = PeftModel.from_pretrained(base, "uilinlora_adapter").to(device)
tok = AutoTokenizer.from_pretrained("uilinlora_adapter", use_fast=True)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
model.config.pad_token_id = tok.pad_token_id

# ---------- data ----------
raw_ds = load_dataset("glue", "sst2")

def tokenize(batch):
    natural   = tok(batch["sentence"], add_special_tokens=True)
    true_lens = [len(ids) for ids in natural["input_ids"]]

    padded = tok(
        batch["sentence"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )

    padded["real_length"] = true_lens
    return padded

tokenized_ds = raw_ds.map(
    tokenize,
    batched=True,
    remove_columns=["sentence", "idx"]
)

# rename + set Torch format
tokenized_ds = tokenized_ds.rename_column("label", "labels")
tokenized_ds.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels", "real_length"],
)

# ---------- stats ----------
max_len = max(tokenized_ds["train"]["real_length"])
print(f"Longest raw sentence: {max_len} tokens")


# ---------- data ----------
raw_datasets = load_dataset("glue", "sst2")
def tokenize_function(example):
    return tok(example["sentence"], truncation=True, padding="max_length", max_length=100)

# Tokenize the entire dataset
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
del raw_datasets
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Longest raw sentence: 67 tokens


Map:   0%|          | 0/872 [00:00<?, ? examples/s]

In [6]:
model.eval()
dataloader = DataLoader(tokenized_datasets["validation"], batch_size=32)

all_preds = []
all_labels = []

with torch.no_grad():
    for batch in tqdm(dataloader):
        batch = {k: v.to(model.device) for k, v in batch.items()}
        logits = model(**batch).logits
        preds = torch.argmax(logits, dim=-1)
        all_preds.append(preds.cpu())
        all_labels.append(batch["labels"].cpu())

preds = torch.cat(all_preds).numpy()
labels = torch.cat(all_labels).numpy()


accuracy = load("accuracy")
print(accuracy.compute(predictions=preds, references=labels))

100%|██████████| 28/28 [00:04<00:00,  6.30it/s]


{'accuracy': 0.4908256880733945}


In [7]:
adapter_layer = model.base_model.model.classifier.dense
print(adapter_layer)

UILinLoRALayer(Linear(in_features=768, out_features=768, bias=True))


In [8]:
adapter_name = adapter_layer.active_adapters[0]
print(adapter_name)

default


In [9]:

U  = getattr(adapter_layer, f"{adapter_name}_U")         # (out, r)
V  = getattr(adapter_layer, f"{adapter_name}_V")         # (r, in)
Σ  = adapter_layer.uilinlora_sigma[adapter_name]         # (r,)
D  = adapter_layer.uilinlora_D[adapter_name]             # (in,)
E  = adapter_layer.uilinlora_E[adapter_name]             # (out,)
print(adapter_layer._meta[adapter_name])

print(U.shape)
print(V.shape)
print(Σ.shape)
print(D.shape)
print(E.shape)


{'sf': 1.0, 'pos': True}
torch.Size([768, 128])
torch.Size([128, 768])
torch.Size([128])
torch.Size([768])
torch.Size([768])


In [16]:
print(E)

Parameter containing:
tensor([ 4.5508e-01,  4.2494e+00,  5.4673e+00, -6.4329e-01, -2.0603e+00,
         4.1422e+00,  3.9753e+00,  1.7745e+00,  2.0048e+00,  6.0946e-04,
         4.3415e+00,  1.3171e+00, -1.0574e+00,  5.4145e+00,  3.7849e+00,
         2.2630e-01,  2.1057e+00,  6.0981e+00, -2.3913e-01,  2.0191e+00,
         8.0657e-01,  1.5636e+00,  4.5298e-01,  1.4221e+00, -1.5554e-01,
         2.9866e+00,  3.2797e+00, -2.0310e+00, -1.9117e+00, -2.6473e-01,
        -2.4256e+00, -4.4462e+00, -1.3043e+00,  7.9812e-01, -4.3749e+00,
         2.8115e+00,  1.3007e+00, -1.0592e+00, -1.7940e+00,  1.1622e+00,
         3.4326e+00, -1.4188e+00, -3.5767e-01,  4.3794e+00, -3.3461e+00,
         3.1927e+00,  3.8577e+00,  4.2472e+00,  2.5564e+00,  4.6914e+00,
         3.4049e+00,  1.0233e-01,  5.5391e+00,  1.0219e+00,  3.4998e+00,
         2.0713e+00,  3.0071e+00, -4.3255e-01, -1.7998e+00,  1.5135e+00,
         1.3618e+00,  2.2651e+00,  4.2473e+00,  1.3553e+00,  3.2405e+00,
         1.8943e-01,  2.3252e

In [11]:
print(model)

PeftModel(
  (base_model): UILinLoRAModel(
    (model): RobertaForSequenceClassification(
      (roberta): RobertaModel(
        (embeddings): RobertaEmbeddings(
          (word_embeddings): Embedding(50265, 768, padding_idx=1)
          (position_embeddings): Embedding(514, 768, padding_idx=1)
          (token_type_embeddings): Embedding(1, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): RobertaEncoder(
          (layer): ModuleList(
            (0-11): 12 x RobertaLayer(
              (attention): RobertaAttention(
                (self): RobertaSelfAttention(
                  (query): Linear(in_features=768, out_features=768, bias=True)
                  (key): Linear(in_features=768, out_features=768, bias=True)
                  (value): Linear(in_features=768, out_features=768, bias=True)
                  (dropout): Dropout(p=0.1, inplace=False)
                )
  

In [12]:
# For non trained model accuracy 0.4919
# For r=128 one epoch lr 3e-3 accuracy 0.932
# For r=128 two epochs lr 3e-3 accuracy 0.939