In [8]:
!pip install transformers datasets peft accelerate evaluate torch scikit-learn




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: C:\Users\william\AppData\Local\Programs\Python\Python313\python.exe -m pip install --upgrade pip


In [10]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification, 
    DataCollatorWithPadding, 
    TrainingArguments, 
    Trainer
)
from peft import get_peft_model, LoraConfig, TaskType
import evaluate

# 1. 設定參數與檢查 GPU
model_checkpoint = "bert-base-uncased"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 2. 載入 IMDb 資料集
# IMDb 是一個二元分類任務 (0: 負評, 1: 正評)
dataset = load_dataset("imdb")

# 為了演示方便，我們只取部分資料進行快速訓練 (若要完整訓練請註解掉這兩行)
# dataset["train"] = dataset["train"].shuffle(seed=42).select(range(2000))
# dataset["test"] = dataset["test"].shuffle(seed=42).select(range(500))

# 3. 資料預處理 (Tokenization)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=256)

tokenized_datasets = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 4. 載入基礎 BERT 模型
# 這裡我們定義 label 映射
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, 
    num_labels=2, 
    id2label=id2label, 
    label2id=label2id
)

# 5. 設定 LoRA Config (關鍵步驟)
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, # 任務類型：序列分類
    r=8,                        # LoRA rank: 矩陣的秩，越小參數量越少，通常 4, 8, 16
    lora_alpha=16,              # LoRA scaling factor
    lora_dropout=0.1,           # Dropout 機率
    bias="none",                # 是否訓練 bias，通常設為 none
    target_modules=["query", "value"] # 指定將 LoRA 應用於 Attention 的 query 和 value 層
)

# 將 LoRA 適配器應用於模型
model = get_peft_model(model, peft_config)

# 顯示可訓練參數的數量對比
print("\n=== LoRA Parameter Check ===")
model.print_trainable_parameters()
print("============================\n")

# 6. 定義評估指標
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

# 7. 設定訓練參數
training_args = TrainingArguments(
    output_dir="bert-lora-imdb-checkpoint",
    learning_rate=2e-4,             # LoRA 通常需要比全量微調稍高的 learning rate
    per_device_train_batch_size=16, # 因為參數量少，可以嘗試大一點的 batch size
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    logging_dir='./logs',
    logging_steps=100,
)

# 8. 開始訓練
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

print("Starting training...")
trainer.train()

# 9. 儲存模型
model.save_pretrained("bert-lora-imdb-final")
print("Model saved to bert-lora-imdb-final")

Using device: cpu


Map: 100%|██████████| 25000/25000 [00:03<00:00, 6941.86 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



=== LoRA Parameter Check ===
trainable params: 296,450 || all params: 109,780,228 || trainable%: 0.2700



  trainer = Trainer(


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2556,0.241393,0.89992
2,0.2195,0.229798,0.90788
3,0.2315,0.223584,0.91504




Model saved to bert-lora-imdb-final


In [None]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# 1. 載入原始 BERT
base_model_name = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(base_model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

# 2. 載入訓練好的 LoRA adapter 並合併
# 請確保路徑對應你剛才儲存的路徑
lora_model_path = "bert-lora-imdb-final" 
model = PeftModel.from_pretrained(model, lora_model_path)

# 3. 測試
#text = "This movie was absolutely fantastic! The acting was superb."
text = "This movie was absolutely normal! The acting was fine."
inputs = tokenizer(text, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
labels = ["NEGATIVE", "POSITIVE"]

print(f"Review: {text}")
print(f"Sentiment: {labels[predicted_class_id]}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Review: This movie was absolutely normal! The acting was fine.
Sentiment: POSITIVE
