In [11]:
import torch
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    TrainingArguments, Trainer, DataCollatorWithPadding
)
from datasets import load_dataset
import evaluate
from peft import LoraConfig, get_peft_model

#### 1. Load dataset ####

In [14]:
dataset = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["text"], truncation=True, padding=False)

tokenized_ds = dataset.map(tokenize, batched=True)

Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25000/25000 [00:04<00:00, 5210.07 examples/s]
Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25000/25000 [00:04<00:00, 5341.38 examples/s]
Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:09<00:00, 5000.15 examples/s]


In [16]:
train_ds = tokenized_ds["train"].shuffle(seed=42).select(range(5000))
test_ds = tokenized_ds["test"].shuffle(seed=42).select(range(2000))

#### 2. Load Base model ####

In [17]:
base_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
print(tokenized_datasets["train"]["input_ids"][0])  # Check the first example
print(len(tokenized_datasets["train"]["input_ids"][0]))  # Check the length of the first example

[101, 2572, 3217, 5831, 5496, 2010, 2567, 1010, 3183, 2002, 2170, 1000, 1996, 7409, 1000, 1010, 1997, 9969, 4487, 23809, 3436, 2010, 3350, 1012, 102, 7727, 2000, 2032, 2004, 2069, 1000, 1996, 7409, 1000, 1010, 2572, 3217, 5831, 5496, 2010, 2567, 1997, 9969, 4487, 23809, 3436, 2010, 3350, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
128


#### 3. Evaluate the Pre-trained Model ####

In [5]:
# from transformers import Trainer, TrainingArguments

# training_args = TrainingArguments(
#     output_dir="./../../../../data/GenAI/02_genai_fundamentals/project2/results",
#     eval_strategy="epoch",
#     learning_rate=2e-5,
#     per_device_train_batch_size=16,
#     num_train_epochs=3,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=tokenized_datasets["train"],
#     eval_dataset=tokenized_datasets["validation"],
# )

# trainer.evaluate()


{'eval_loss': 0.6831324100494385,
 'eval_model_preparation_time': 0.0011,
 'eval_runtime': 3.605,
 'eval_samples_per_second': 113.176,
 'eval_steps_per_second': 14.147}

In [None]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = torch.argmax(torch.tensor(logits), dim=-1)
    return metric.compute(predictions=preds, references=labels)

training_args = TrainingArguments(
    output_dir="./../../../../data/GenAI/02_genai_fundamentals/project2/results/base_eval",
    per_device_eval_batch_size=8,
)

trainer = Trainer(
    model=base_model,
    args=training_args,
    eval_dataset=test_ds,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer),
    compute_metrics=compute_metrics,
)

base_eval = trainer.evaluate()
print("📈 Base model accuracy:", base_eval)

Downloading builder script: 4.20kB [00:00, 2.58MB/s]
  trainer = Trainer(


In [8]:
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


#### 4. Perform Parameter-Efficient Fine-Tuning ####

In [None]:
# from peft import LoraConfig, get_peft_model

# peft_config = LoraConfig(
#     r=8,
#     lora_alpha=32,
#     lora_dropout=0.1,
#     target_modules=["attention"],
# )
# peft_model = get_peft_model(model, peft_config)


In [10]:
from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model

# Load a compatible model
model_name = "bert-base-uncased"  # Change to a model that has query, key, value layers
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Configure PEFT
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["attention"],  # Adjusted for the model
)

# Get PEFT model
peft_model = get_peft_model(model, peft_config)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ValueError: Target module BertAttention(
  (self): BertSdpaSelfAttention(
    (query): Linear(in_features=768, out_features=768, bias=True)
    (key): Linear(in_features=768, out_features=768, bias=True)
    (value): Linear(in_features=768, out_features=768, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (output): BertSelfOutput(
    (dense): Linear(in_features=768, out_features=768, bias=True)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
) is not supported. Currently, only the following modules are supported: `torch.nn.Linear`, `torch.nn.Embedding`, `torch.nn.Conv1d`, `torch.nn.Conv2d`, `torch.nn.Conv3d`, `transformers.pytorch_utils.Conv1D`, `torch.nn.MultiheadAttention.`.

In [None]:
# trainer = Trainer(
#     model=peft_model,
#     args=training_args,
#     train_dataset=tokenized_datasets["train"],
#     eval_dataset=tokenized_datasets["validation"],
# )

# trainer.train()


#### 5. Save the Fine-Tuned Model ####

In [None]:
# peft_model.save_pretrained("./fine_tuned_model")

#### 6. Perform Inference Using the Fine-Tuned Model ####

In [None]:
# from transformers import AutoPeftModelForSequenceClassification

# fine_tuned_model = AutoPeftModelForSequenceClassification.from_pretrained("./fine_tuned_model")
# trainer = Trainer(
#     model=fine_tuned_model,
#     args=training_args,
#     eval_dataset=tokenized_datasets["validation"],
# )

# trainer.evaluate()
