In [1]:
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EvalPrediction
from datasets import load_dataset
from peft import DiagConfig, get_peft_model
import numpy as np
import torch
import evaluate

In [3]:
# torch.set_printoptions(threshold=torch.inf)  # Display all elements
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# torch.cuda.device_count()

In [5]:
BASE_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tok  = AutoTokenizer.from_pretrained(BASE_ID, use_fast=True)

# model = AutoModelForSequenceClassification.from_pretrained(
#         BASE_ID,
#         num_labels=2,
#         load_in_4bit=True,
#         device_map={"": 0})

# diag_cfg = DiagConfig(
#         target_modules=["q_proj","v_proj"],
#         diag_alpha=1.0,
#         diag_dropout=0.0,
#         fan_in_fan_out=False,
#         bias="none",
#         init_diag_weights=True)

In [6]:
model = AutoModelForSequenceClassification.from_pretrained(
    BASE_ID,
    num_labels=2,
).to("cpu")

diag_cfg = DiagConfig(
    target_modules=["q_proj", "v_proj"],
    diag_alpha=1.0,
    diag_dropout=0.0,
    fan_in_fan_out=False,
    bias="none",
    init_diag_weights=True
)

model = get_peft_model(model, diag_cfg)

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TinyLlama/TinyLlama-1.1B-Chat-v1.0 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[DiagConfig] Initializing configuration
[DiagConfig] Target modules: ['q_proj', 'v_proj']
[DiagConfig] Alpha: 1.0, Dropout: 0.0
[DiagConfig] Fan in/out: False, Bias: none
[DiagConfig] Init weights: True
[DiagModel] Creating/replacing layer q_proj with base layer type: Linear
[DiagModel] Base layer weight shape: torch.Size([2048, 2048])
[DiagModel] Creating new DiagLayer for q_proj
[DiagLayer] Initializing with base layer: Linear
[DiagLayer] Feature dims  – in: 2048, out: 2048
[DiagLayer] Updating layer for adapter: default
[DiagLayer] Base weight shape: torch.Size([2048, 2048]), device: cpu
[DiagLayer] Created row weight with shape: torch.Size([2048, 2048]), dtype: torch.float32
[DiagLayer] Added adapter default to active adapters
[DiagModel] Creating/replacing layer v_proj with base layer type: Linear
[DiagModel] Base layer weight shape: torch.Size([256, 2048])
[DiagModel] Creating new DiagLayer for v_proj
[DiagLayer] Initializing with base layer: Linear
[DiagLayer] Feature dims  – in

In [7]:
print(model)

PeftModel(
  (base_model): DiagModel(
    (model): LlamaForSequenceClassification(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): Linear(
                in_features=2048, out_features=2048, bias=False
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (row_weight): ParameterDict(  (default): Parameter containing: [torch.FloatTensor of size 2048x2048])
                (row_bias): ParameterDict()
                (diag_dropout): ModuleDict(
                  (default): Identity()
                )
              )
              (k_proj): Linear(in_features=2048, out_features=256, bias=False)
              (v_proj): Linear(
                in_features=2048, out_features=256, bias=False
                (base_layer): Linear(in_features=2048, out_features=256, bias=False)
     

In [8]:
core_model   = model.get_base_model()        # → LlamaForSequenceClassification
llama_blocks = core_model.model.layers       # → ModuleList of decoder layers
qproj_0      = llama_blocks[0].self_attn.q_proj

print(type(qproj_0))          # should be your Linear4bit / Linear8bitLt
print(qproj_0.weight.shape)   # should be (out, in)  e.g.  (4096, 2048)


<class 'peft.tuners.diag.layer.Linear'>
torch.Size([2048, 2048])


In [9]:
model.config.pad_token_id = tok.pad_token_id

In [10]:
# ---------- data ----------
raw_ds = load_dataset("glue", "sst2")

def tokenize(batch):
    natural   = tok(batch["sentence"], add_special_tokens=True)
    true_lens = [len(ids) for ids in natural["input_ids"]]

    padded = tok(
        batch["sentence"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )

    padded["real_length"] = true_lens
    return padded

tokenized_ds = raw_ds.map(
    tokenize,
    batched=True,
    remove_columns=["sentence", "idx"]
)

# rename + set Torch format
tokenized_ds = tokenized_ds.rename_column("label", "labels")
tokenized_ds.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels", "real_length"],
)

# ---------- stats ----------
max_len = max(tokenized_ds["train"]["real_length"])
print(f"Longest raw sentence: {max_len} tokens")

Longest raw sentence: 74 tokens


In [11]:
# ---------- data ----------
raw_datasets = load_dataset("glue", "sst2")
def tokenize_function(example):
    return tok(example["sentence"], truncation=True, padding="max_length", max_length=100)

# Tokenize the entire dataset
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
del raw_datasets
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

In [None]:
# # ---------- trainer ----------
# args = TrainingArguments(
#         output_dir="vera-tiny-sst2",
#         per_device_train_batch_size=32,
#         num_train_epochs=2,
#         learning_rate=3e-3,
#         fp16=True,
#         evaluation_strategy="epoch",
#         save_strategy="no",
#         logging_steps=50)


# trainer = Trainer(model=model,
#                   args=args,
#                   train_dataset=tokenized_datasets["train"],
#                   eval_dataset=tokenized_datasets["validation"])

In [16]:
# ---------- trainer ----------
args = TrainingArguments(
        output_dir="vera-tiny-sst2",
        per_device_train_batch_size=32,
        num_train_epochs=2,
        learning_rate=3e-3,
        fp16=False,
        no_cuda=True,
        use_cpu=True,
        evaluation_strategy="epoch",
        save_strategy="no",
        logging_steps=50)


trainer = Trainer(model=model,
                  args=args,
                  train_dataset=tokenized_datasets["train"],
                  eval_dataset=tokenized_datasets["validation"])



In [18]:
trainer.train()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [7]:
predictions = trainer.predict(tokenized_datasets["validation"])
logits = predictions.predictions[1]
preds = np.argmax(logits, axis=-1)
metric = evaluate.load("accuracy")

tokenized_datasets["validation"]["labels"]
metric.compute(predictions=preds, references=tokenized_datasets["validation"]["labels"])

{'accuracy': 0.9392201834862385}

In [None]:
# For non trained model accuracy 0.4919
# For r=128 one epoch lr 3e-3 accuracy 0.932
# For r=128 two epochs lr 3e-3 accuracy 0.939
/home/storage/guy_bilitski/Advanced experiments/vera-tiny-sst2/LLM adaptors.ipynb

In [9]:
# after trainer.train()
adapter_dir = "vera_adapter"
model.save_pretrained(adapter_dir, safe_serialization=True)  # adapter only
tok.save_pretrained(adapter_dir)                             # optional, for easy reload

('vera_adapter/tokenizer_config.json',
 'vera_adapter/special_tokens_map.json',
 'vera_adapter/tokenizer.model',
 'vera_adapter/added_tokens.json',
 'vera_adapter/tokenizer.json')

In [10]:
base = AutoModelForSequenceClassification.from_pretrained(
           BASE_ID, load_in_4bit=True, device_map="auto")
from peft import PeftModel
model = PeftModel.from_pretrained(base, "vera_adapter").to(device)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TinyLlama/TinyLlama-1.1B-Chat-v1.0 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
