In [1]:
!pip install transformers datasets peft accelerate bitsandbytes --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
# Install dependencies
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType

# Load model and tokenizer
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Setup LoRA
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    target_modules=["q_lin", "k_lin", "v_lin"]  # <-- specify target modules
)
model = get_peft_model(model, lora_config, )

# Load and tokenize dataset
dataset = load_dataset("glue", "sst2")
def tokenize(batch):
    return tokenizer(batch["sentence"], truncation=True, padding="max_length", max_length=128)

encoded = dataset.map(tokenize, batched=True)
encoded = encoded.rename_column("label", "labels")
encoded.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    num_train_epochs=3,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    fp16=True
)

# Trainer setup with small subset for speed
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded["train"].shuffle(seed=42).select(range(1000)),
    eval_dataset=encoded["validation"].select(range(500))
)

# Train
trainer.train()


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33manthonylitwin[0m ([33manthonylitwin-old-dominion-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,0.685
20,0.672
30,0.6822
40,0.6781
50,0.6581
60,0.672
70,0.6409
80,0.6512
90,0.6499
100,0.6225


TrainOutput(global_step=189, training_loss=0.6248976692320809, metrics={'train_runtime': 108.7805, 'train_samples_per_second': 27.578, 'train_steps_per_second': 1.737, 'total_flos': 101224424448000.0, 'train_loss': 0.6248976692320809, 'epoch': 3.0})

In [6]:
trainer.save_model("./lora-distilbert-sst2")
tokenizer.save_pretrained("./lora-distilbert-sst2")

('./lora-distilbert-sst2/tokenizer_config.json',
 './lora-distilbert-sst2/special_tokens_map.json',
 './lora-distilbert-sst2/vocab.txt',
 './lora-distilbert-sst2/added_tokens.json',
 './lora-distilbert-sst2/tokenizer.json')

In [7]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Load the model and tokenizer
model_path = "./lora-distilbert-sst2"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Prepare input
sentence = "I really enjoyed this movie, it was fantastic!"
inputs = tokenizer(sentence, return_tensors="pt")

# Run inference
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=-1).item()

print(f"Sentence: {sentence}")
print(f"Predicted class: {predicted_class}")  # 1 = positive, 0 = negative


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Sentence: I really enjoyed this movie, it was fantastic!
Predicted class: 1
