# Llama-2-7B + PEFT LoRA 微调 PokerBench 分类任务

此 Notebook 演示如何在 Kaggle 环境下，使用 Llama-2-7B 模型，结合 PEFT 技术进行扑克动作分类的微调训练。

In [None]:
# 0. 环境依赖安装
!pip install transformers datasets accelerate peft bitsandbytes scikit-learn

In [None]:
import json
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import LlamaTokenizer, LlamaForSequenceClassification, get_scheduler, AdamW
from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report


In [None]:
# 2. 加载 PokerBench 数据集
with open('/kaggle/input/pokerbench/pokerbench_data.json', 'r') as f:
    data = json.load(f)

texts = [item['instruction'] for item in data]
labels_raw = [item['output'] for item in data]

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels_raw)
num_labels = len(label_encoder.classes_)


In [None]:
# 3. 初始化 Tokenizer 和模型
model_name = "meta-llama/Llama-2-7b-hf"
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForSequenceClassification.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto",
    num_labels=num_labels,
)

model = prepare_model_for_int8_training(model)


In [None]:
# 4. 配置 LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)


In [None]:
# 5. 构建 Dataset
class PokerBenchDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = tokenizer(self.texts[idx], truncation=True, max_length=256, padding="max_length", return_tensors="pt")
        enc = {k: v.squeeze(0) for k, v in enc.items()}
        enc["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return enc

dataset = PokerBenchDataset(texts, labels)


In [None]:
# 6. 划分训练/验证集
train_size = int(len(dataset) * 0.8)
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)


In [None]:
# 7. 优化器和学习率调度
optimizer = AdamW(model.parameters(), lr=2e-4)
num_epochs = 3
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
# 8. 训练和评估函数
def train_epoch():
    model.train()
    total_loss = 0
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def evaluate():
    model.eval()
    preds = []
    trues = []
    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            logits = outputs.logits
            pred_labels = torch.argmax(logits, dim=-1).cpu().numpy()
            preds.extend(pred_labels)
            trues.extend(batch["labels"].cpu().numpy())
    return preds, trues


In [None]:
# 9. 主训练循环
for epoch in range(num_epochs):
    loss = train_epoch()
    preds, trues = evaluate()
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {loss:.4f}")
    print(classification_report(trues, preds, target_names=label_encoder.classes_))


In [None]:
# 10. 保存模型和 Tokenizer
model.save_pretrained('./llama2-7b-pokerbench-peft')
tokenizer.save_pretrained('./llama2-7b-pokerbench-peft')


In [None]:
# 11. 简单推理示例
def predict(text):
    model.eval()
    inputs = tokenizer(text, truncation=True, max_length=256, padding="max_length", return_tensors="pt").to(device)
    outputs = model(**inputs)
    logits = outputs.logits
    pred_idx = torch.argmax(logits, dim=-1).item()
    return label_encoder.inverse_transform([pred_idx])[0]

print(predict("You have Ah Kh, the board is 9h Th Jc. What is the best move?"))
