In [None]:
import torch
import copy
from trl import SFTTrainer
from transformers import TrainerCallback, Trainer
from peft import get_peft_model_state_dict, set_peft_model_state_dict, LoraConfig, get_peft_model


In [12]:
from datasets import load_dataset
import os

# Load the IMDb dataset
dataset = load_dataset('imdb', split="train[:1%]")
small_dataset = dataset.train_test_split(test_size=0.2)
# Create a local directory to save the dataset
os.makedirs('dataset', exist_ok=True)

# Save the dataset locally in Arrow format
# dataset['train'].save_to_disk('dataset/imdb_train')
# dataset['test'].save_to_disk('dataset/imdb_test')
# dataset['unsupervised'].save_to_disk('dataset/imdb_unsupervised')

print("Dataset saved locally in the 'dataset' folder in Arrow format.")

Dataset saved locally in the 'dataset' folder in Arrow format.


In [22]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_from_disk, load_metric
from accelerate import Accelerator
from peft import get_peft_model, LoraConfig
import evaluate
import numpy as np

# Load the IMDb dataset from local storage
# train_dataset = load_from_disk('dataset/imdb_train')
# test_dataset = load_from_disk('dataset/imdb_test')
# Initialize the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2, id2label=id2label, label2id=label2id)

# Preprocess the data
def preprocess_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

tokenized_datasets = small_dataset.map(preprocess_function, batched=True)

# train_dataset = train_dataset.map(preprocess_function, batched=True)
# test_dataset = test_dataset.map(preprocess_function, batched=True)

# Set format for PyTorch
# train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
# test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Apply LoRA using PEFT
lora_config = LoraConfig(
    r=8,  # rank
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
)

model = get_peft_model(model, lora_config)

# Initialize the accelerator
accelerator = Accelerator()

# Setup evaluation 
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    
    return metric.compute(predictions=predictions, references=labels)

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    metric_for_best_model='accuracy'
    # use_mps_device=True,
)

# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test'],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

# Train the model
trainer.train()

# Evaluate the model
results = trainer.evaluate()
print(f"Evaluation results: {results}")

# Save the model
model.save_pretrained('lora_finetuned_bert')
tokenizer.save_pretrained('lora_finetuned_bert')

print("Model fine-tuning complete and saved.")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 13%|█▎        | 10/75 [00:08<00:56,  1.16it/s]

{'loss': 0.5357, 'grad_norm': 0.5591304898262024, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.4}


 27%|██▋       | 20/75 [00:17<00:47,  1.16it/s]

{'loss': 0.5443, 'grad_norm': 0.597666323184967, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.8}


 33%|███▎      | 25/75 [00:21<00:43,  1.16it/s]
 33%|███▎      | 25/75 [00:24<00:43,  1.16it/s]

{'eval_runtime': 2.0747, 'eval_samples_per_second': 24.1, 'eval_steps_per_second': 3.374, 'epoch': 1.0}


 40%|████      | 30/75 [00:28<00:46,  1.03s/it]

{'loss': 0.5177, 'grad_norm': 0.4279663860797882, 'learning_rate': 3e-06, 'epoch': 1.2}


 53%|█████▎    | 40/75 [00:37<00:30,  1.15it/s]

{'loss': 0.5201, 'grad_norm': 0.5947186350822449, 'learning_rate': 4.000000000000001e-06, 'epoch': 1.6}


 67%|██████▋   | 50/75 [00:46<00:22,  1.12it/s]

{'loss': 0.5175, 'grad_norm': 0.5387445092201233, 'learning_rate': 5e-06, 'epoch': 2.0}



 67%|██████▋   | 50/75 [00:48<00:22,  1.12it/s]

{'eval_runtime': 2.1165, 'eval_samples_per_second': 23.624, 'eval_steps_per_second': 3.307, 'epoch': 2.0}


 80%|████████  | 60/75 [00:56<00:13,  1.12it/s]

{'loss': 0.5154, 'grad_norm': 0.4416813850402832, 'learning_rate': 6e-06, 'epoch': 2.4}


 93%|█████████▎| 70/75 [01:05<00:04,  1.16it/s]

{'loss': 0.5112, 'grad_norm': 0.4578378200531006, 'learning_rate': 7.000000000000001e-06, 'epoch': 2.8}


100%|██████████| 75/75 [01:09<00:00,  1.16it/s]
100%|██████████| 75/75 [01:11<00:00,  1.04it/s]


{'eval_runtime': 2.0511, 'eval_samples_per_second': 24.377, 'eval_steps_per_second': 3.413, 'epoch': 3.0}
{'train_runtime': 71.9248, 'train_samples_per_second': 8.342, 'train_steps_per_second': 1.043, 'train_loss': 0.5239956919352213, 'epoch': 3.0}


100%|██████████| 7/7 [00:01<00:00,  4.13it/s]


Evaluation results: {'eval_runtime': 2.0234, 'eval_samples_per_second': 24.71, 'eval_steps_per_second': 3.459, 'epoch': 3.0}




Model fine-tuning complete and saved.


In [23]:
lora_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
lora_params

294912

In [24]:
model.print_trainable_parameters()

trainable params: 294,912 || all params: 109,778,690 || trainable%: 0.2686
