In [11]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/rolechat/zzx.jsonl
/kaggle/input/rolechat/lz.jsonl
/kaggle/input/rolechat/lbxx.jsonl


In [12]:
# 环境依赖
!pip uninstall -y transformers accelerate peft bitsandbytes datasets
!pip install -U torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 \
  transformers==4.40.2 accelerate==0.28.0 bitsandbytes==0.43.1 \
  peft==0.9.0 datasets==2.19.0 sentencepiece==0.2.0 protobuf==3.20.*

Found existing installation: transformers 4.40.2
Uninstalling transformers-4.40.2:
  Successfully uninstalled transformers-4.40.2
Found existing installation: accelerate 0.28.0
Uninstalling accelerate-0.28.0:
  Successfully uninstalled accelerate-0.28.0
Found existing installation: peft 0.9.0
Uninstalling peft-0.9.0:
  Successfully uninstalled peft-0.9.0
Found existing installation: bitsandbytes 0.43.1
Uninstalling bitsandbytes-0.43.1:
  Successfully uninstalled bitsandbytes-0.43.1
Found existing installation: datasets 2.19.0
Uninstalling datasets-2.19.0:
  Successfully uninstalled datasets-2.19.0
Collecting transformers==4.40.2
  Using cached transformers-4.40.2-py3-none-any.whl.metadata (137 kB)
Collecting accelerate==0.28.0
  Using cached accelerate-0.28.0-py3-none-any.whl.metadata (18 kB)
Collecting bitsandbytes==0.43.1
  Using cached bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl.metadata (2.2 kB)
Collecting peft==0.9.0
  Using cached peft-0.9.0-py3-none-any.whl.metadata (

In [13]:
# 显示配置
import torch, bitsandbytes as bnb
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0))
print("BitsAndBytes version:", bnb.__version__)

CUDA available: True
GPU: Tesla T4
BitsAndBytes version: 0.43.1


In [14]:
# 库函数引用
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, LoraConfig, get_peft_model

In [15]:
import json

# 数据集检查
def preview_head_tail(data_path: str, head: int = 5, tail: int = 5):
    """
    预览对话数据，仅输出开头 head 条和结尾 tail 条
    Args:
        data_path (str): jsonl 文件路径
        head (int): 开头
        tail (int): 结尾
    """
    conversations = []
    
    with open(data_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                conv = json.loads(line)["conversations"]
            except Exception as e:
                print("跳过出错行:", line[:100], e)
                continue

            # 提取 system/user/assistant 的对话内容
            parts = [msg["content"] for msg in conv if msg["role"] in ["system", "user", "assistant"]]
            conversations.append(" / ".join(parts))
    
    total = len(conversations)
    print(f"{data_path}： {total} 条数据")

    # 输出开头 head 条
    print("\n=== 开头 ===")
    for c in conversations[:head]:
        print(c)

    # 输出结尾tail 条
    print("\n=== 结尾 ===")
    for c in conversations[-tail:]:
        print(c)

    return

In [16]:
import json

# 读取数据集
def load_conversations(data_path: str):
    """
    数据集格式：{"messages": [{"role": "user", "content": "句子1"}, {"role": "assistant", "content": "句子2"}]}
    句子作为用户输入范例，句子2作为输出参考
    从 jsonl 文件读取对话数据，转换为 (prompt, completion) 对
    Args:
        data_path (str): jsonl 文件路径
    Returns:
        list: [(prompt, completion), ...]
    """
    pairs = []
    with open(data_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                conv = json.loads(line)["conversations"]
            except Exception as e:
                print("跳过出错行:", line[:100], e)
                continue

            persona = user_msg = assistant_msg = ""
            for msg in conv:
                if msg["role"] == "system":
                    persona = msg["content"]
                elif msg["role"] == "user":
                    user_msg = msg["content"]
                elif msg["role"] == "assistant":
                    assistant_msg = msg["content"]

            prompt = f"{persona}\n用户: {user_msg}\n助手: "
            completion = assistant_msg
            pairs.append((prompt, completion))

    print(f"{data_path}： {len(pairs)} 条数据可用")
    return pairs

In [17]:
# 量化压缩，加速微调
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# 加载预训练大模型
base_model = AutoModelForCausalLM.from_pretrained(
    "THUDM/chatglm3-6b",
    trust_remote_code=True,
    device_map={"": 0}, 
    quantization_config=bnb_config,
    torch_dtype=torch.float16
)

# 加载分词器
tokenizer = AutoTokenizer.from_pretrained(
    "THUDM/chatglm3-6b",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

In [28]:
# 配置 LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

loaded_loras = dict()

# ====== 角色对应的 LoRA ======
role2lora = {
    "蜡笔小新": "/kaggle/working/lora/lora_lbxx",
    "老子": "/kaggle/working/lora/lora_lz",
    "蜘蛛侠": "/kaggle/working/lora/lora_zzx",
    "c4": "/kaggle/working/lora/lora_c4",
    "c5": "/kaggle/working/lora/lora_c5",
    "c6": "/kaggle/working/lora/lora_c6",
}

In [19]:
# 模型微调
def train_lora(
    data_path,
    model_name="THUDM/chatglm3-6b",
    save_dir="/kaggle/working/lora",
    lr=1e-4,
    max_steps=100,
    lora=lora_config
):
    """LoRA 微调主函数"""

    # ========= 加载数据 =========
    pairs = load_conversations(data_path)

    # ========= # 配置 LoRA =========
    lora_config = lora

    model = get_peft_model(base_model, lora_config)
    model.train()

    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

    # ========= 训练循环 =========
    for step, (prompt, completion) in enumerate(pairs[:max_steps]):
        full_text = prompt + completion
        inputs = tokenizer(full_text, return_tensors="pt", truncation=True, padding=True).to(model.device)

        labels = inputs.input_ids.clone().to(model.device)

        prompt_len = tokenizer(prompt, return_tensors="pt")["input_ids"].shape[-1]
        labels[:, :prompt_len] = -100  # 避免计算 prompt 的 loss

        outputs = model(**inputs, labels=labels)
        loss = outputs.loss

        if step % 5 == 0:
            print(f"Step {step}, Loss: {loss.item():.4f}")

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    # ========= 保存模型 =========
    os.makedirs(save_dir, exist_ok=True)
    model.save_pretrained(save_dir)
    tokenizer.save_pretrained(save_dir)
    print(f"LoRA 模型保存到 {save_dir}")

    return

In [20]:
# 蜡笔小新的lora
data_path = "/kaggle/input/rolechat/lbxx.jsonl"
train_lora(data_path,"THUDM/chatglm3-6b","/kaggle/working/lora/lora_lbxx",1e-4,100,lora_config)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


/kaggle/input/rolechat/lbxx.jsonl： 110 条数据可用


2025-09-27 20:13:30.528269: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759004010.711870     299 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759004010.772256     299 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Step 0, Loss: 7.0273
Step 5, Loss: 5.8203
Step 10, Loss: 3.8320
Step 15, Loss: 3.1836
Step 20, Loss: 3.8477
Step 25, Loss: 1.7988
Step 30, Loss: 2.2363
Step 35, Loss: 3.8711
Step 40, Loss: 4.5781
Step 45, Loss: 3.5156
Step 50, Loss: 4.8359
Step 55, Loss: 3.9023
Step 60, Loss: 4.6367
Step 65, Loss: 4.1602
Step 70, Loss: 4.2266
Step 75, Loss: 3.6758
Step 80, Loss: 3.5605
Step 85, Loss: 3.5723
Step 90, Loss: 2.4785
Step 95, Loss: 3.0137




LoRA 模型保存到 /kaggle/working/lora/lora_lbxx


In [21]:
# 老子的lora
data_path = "/kaggle/input/rolechat/lz.jsonl"
train_lora(data_path,"THUDM/chatglm3-6b","/kaggle/working/lora/lora_lz",1e-4,100,lora_config)

/kaggle/input/rolechat/lz.jsonl： 110 条数据可用
Step 0, Loss: 6.2539
Step 5, Loss: 4.1602
Step 10, Loss: 1.9600
Step 15, Loss: 3.2480
Step 20, Loss: 0.2035
Step 25, Loss: 0.1285
Step 30, Loss: 1.0645
Step 35, Loss: 4.2422
Step 40, Loss: 4.2109
Step 45, Loss: 2.6348
Step 50, Loss: 2.2578
Step 55, Loss: 1.1748
Step 60, Loss: 3.2559
Step 65, Loss: 0.1835
Step 70, Loss: 1.3584
Step 75, Loss: 4.4727
Step 80, Loss: 4.0273
Step 85, Loss: 3.6230
Step 90, Loss: 4.6484
Step 95, Loss: 4.5430
LoRA 模型保存到 /kaggle/working/lora/lora_lz


In [22]:
# 蜘蛛侠的lora
data_path = "/kaggle/input/rolechat/zzx.jsonl"
train_lora(data_path,"THUDM/chatglm3-6b","/kaggle/working/lora/lora_zzx",1e-4,100,lora_config)

跳过出错行: {"conversations":[{"role":"system","content":"你是蜘蛛侠（守护纽约）"},{"role":"user","content":"你觉得城市需要你吗？"},{ Expecting ',' delimiter: line 1 column 119 (char 118)
/kaggle/input/rolechat/zzx.jsonl： 109 条数据可用
Step 0, Loss: 2.4355
Step 5, Loss: 2.6289
Step 10, Loss: 4.5547
Step 15, Loss: 5.8984
Step 20, Loss: 5.0742
Step 25, Loss: 5.9883
Step 30, Loss: 5.4688
Step 35, Loss: 4.1641
Step 40, Loss: 3.5234
Step 45, Loss: 2.2012
Step 50, Loss: 4.1211
Step 55, Loss: 4.8711
Step 60, Loss: 2.8711
Step 65, Loss: 2.9238
Step 70, Loss: 5.6133
Step 75, Loss: 3.5430
Step 80, Loss: 3.2852
Step 85, Loss: 3.5234
Step 90, Loss: 3.6387
Step 95, Loss: 4.1914
LoRA 模型保存到 /kaggle/working/lora/lora_zzx


In [23]:
def load_lora(base_model, lora_path):
    """
    给基座模型加载指定的 LoRA
    """
    model = PeftModel.from_pretrained(base_model, lora_path).eval()
    return model

In [24]:
# 回复生成函数
def generate_reply(role: str, user_input: str) -> str:
    import re
    
    """
    输入角色（c1-c6）和用户输入，返回模型回复
    """
    if role not in role2lora:
        raise ValueError(f"未知角色 {role}，可选: {list(role2lora.keys())}")

    # 如果没加载过该角色的 LoRA，就先加载
    if role not in loaded_loras:
        lora_path = role2lora[role]
        print(f"[INFO] 正在加载 LoRA: {role} ({lora_path})")
        loaded_loras[role] = PeftModel.from_pretrained(base_model, lora_path).eval()

    model = loaded_loras[role]

    # 构造 prompt
    persona = f"{role}的身份"
    prompt = f"{persona}\n用户: {user_input.strip()}\n{role}: "
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # 推理
    outputs = model.generate(**inputs, max_new_tokens=128, do_sample=False)

    raw = tokenizer.decode(
        outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True
    ).strip()

    # 基础清洗
    answer = re.sub(r"[\r\n]+", "\n", raw)

    # 只提取角色回答，并去掉后续可能生成的 “用户:” 部分
    match = re.search(rf"{role}[:：]\s*(.+)", answer, re.DOTALL)
    if match:
        answer_clean = match.group(1).strip()
        # 截断在第一个 “用户:” 之前
        if "用户:" in answer_clean:
            answer_clean = answer_clean.split("用户:")[0].strip()
    else:
        answer_clean = answer.strip()

    print(f"用户: {user_input}")
    print(f"{role}: {answer_clean}")
    print("-" * 60)

    return answer_clean

In [25]:
generate_reply("蜡笔小新", "你是谁？")
generate_reply("蜡笔小新", "你喜欢动感超人吗？")

[INFO] 正在加载 LoRA: 蜡笔小新 (/kaggle/working/lora/lora_lbxx)
用户: 你是谁？
蜡笔小新: 我是野原新之助，不过大家都叫我是小新。
------------------------------------------------------------
用户: 你喜欢动感超人吗？
蜡笔小新: 喜欢！但我觉得他的红斗篷太重了。
------------------------------------------------------------


'喜欢！但我觉得他的红斗篷太重了。'

In [26]:
generate_reply("老子", "早睡早起怎么样？")

[INFO] 正在加载 LoRA: 老子 (/kaggle/working/lora/lora_lz)
用户: 早睡早起怎么样？
老子: 少私寡欲，自然规律。
------------------------------------------------------------


'少私寡欲，自然规律。'

In [29]:
generate_reply("蜘蛛侠", "你是谁？")
generate_reply("蜘蛛侠", "你住在哪？")

[INFO] 正在加载 LoRA: 蜘蛛侠 (/kaggle/working/lora/lora_zzx)
用户: 你是谁？
蜘蛛侠: 我是托尼，一个普通的高中生，被选中拥有特殊能力，为了保护城市，我会利用这些力量。
------------------------------------------------------------
用户: 你住在哪？
蜘蛛侠: 纽约市。
------------------------------------------------------------


'纽约市。'