# AI Agent Core - 模型训练 Notebook

本Notebook用于训练AI Agent所需的模型组件。

## 训练目标
- 推理模型 (Reasoning Model)
- 嵌入模型 (Embedding Model)
- 工具选择模型 (Tool Selection Model)

## 1. 环境设置

In [None]:
# 检查GPU
!nvidia-smi

import torch
print(f"PyTorch版本: {torch.__version__}")
print(f"CUDA可用: {torch.cuda.is_available()}")
print(f"CUDA版本: {torch.version.cuda}")
print(f"GPU数量: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU名称: {torch.cuda.get_device_name(0)}")

In [None]:
# 安装依赖
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers datasets accelerate peft bitsandbytes trl
!pip install -q wandb tensorboard
!pip install -q qdrant-client pgvector psycopg2-binary

In [None]:
# 导入库
import os
import json
import torch
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig,
)
from datasets import Dataset, load_dataset
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from trl import SFTTrainer

import warnings
warnings.filterwarnings('ignore')

# 设置随机种子
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

print("环境设置完成!")

## 2. 配置参数

In [None]:
# 训练配置
CONFIG = {
    # 模型配置
    "model_name": "Qwen/Qwen2.5-7B-Instruct",  # 可替换为其他模型
    "max_length": 2048,
    "vocab_size": None,  # 自动获取
    
    # LoRA配置
    "lora_r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    
    # 训练配置
    "output_dir": "./outputs",
    "num_train_epochs": 3,
    "per_device_train_batch_size": 4,
    "per_device_eval_batch_size": 4,
    "gradient_accumulation_steps": 4,
    "learning_rate": 2e-4,
    "weight_decay": 0.01,
    "warmup_ratio": 0.1,
    "lr_scheduler_type": "cosine",
    "logging_steps": 10,
    "save_steps": 500,
    "eval_steps": 500,
    "save_total_limit": 3,
    
    # 量化配置
    "use_4bit": True,
    "bnb_4bit_compute_dtype": "float16",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": True,
}

# 创建输出目录
os.makedirs(CONFIG["output_dir"], exist_ok=True)

# 保存配置
with open(os.path.join(CONFIG["output_dir"], "config.json"), "w") as f:
    json.dump(CONFIG, f, indent=2, ensure_ascii=False)

print("配置参数:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

## 3. 数据准备

In [None]:
# Agent训练数据格式
TRAIN_DATA_EXAMPLE = [
    {
        "instruction": "分析用户请求并决定下一步行动",
        "input": "用户想查询北京明天的天气",
        "output": "{\"thought\": \"用户需要天气信息，我需要调用天气查询工具\", \"action\": \"weather_query\", \"action_input\": {\"city\": \"北京\", \"date\": \"明天\"}}"
    },
    {
        "instruction": "根据工具执行结果生成最终回答",
        "input": "工具返回: 北京明天晴天，温度15-25°C",
        "output": "{\"thought\": \"已获取天气信息，可以给出最终答案\", \"action\": \"final_answer\", \"action_input\": {\"answer\": \"北京明天天气晴朗，温度在15到25摄氏度之间，适合外出活动。\"}}"
    }
]

# 创建示例数据集
def create_sample_dataset(size: int = 100) -> List[Dict]:
    """创建示例训练数据集"""
    data = []
    templates = [
        {"task": "查询天气", "tool": "weather_query", "params": ["city", "date"]},
        {"task": "计算数学表达式", "tool": "calculator", "params": ["expression"]},
        {"task": "搜索信息", "tool": "search", "params": ["query"]},
        {"task": "获取时间", "tool": "datetime", "params": ["format"]},
    ]
    
    cities = ["北京", "上海", "广州", "深圳", "杭州"]
    
    for i in range(size):
        template = templates[i % len(templates)]
        
        if template["tool"] == "weather_query":
            city = cities[i % len(cities)]
            data.append({
                "instruction": "分析用户请求并决定下一步行动",
                "input": f"用户想查询{city}的天气情况",
                "output": json.dumps({
                    "thought": f"用户需要{city}的天气信息，调用天气查询工具",
                    "action": "weather_query",
                    "action_input": {"city": city}
                }, ensure_ascii=False)
            })
        elif template["tool"] == "calculator":
            expr = f"{i * 2} + {i * 3}"
            data.append({
                "instruction": "分析用户请求并决定下一步行动",
                "input": f"计算 {expr}",
                "output": json.dumps({
                    "thought": "用户需要进行数学计算，调用计算器工具",
                    "action": "calculator",
                    "action_input": {"expression": expr}
                }, ensure_ascii=False)
            })
    
    return data

# 生成数据
train_data = create_sample_dataset(1000)
eval_data = create_sample_dataset(200)

print(f"训练数据: {len(train_data)} 条")
print(f"验证数据: {len(eval_data)} 条")
print(f"\n示例数据:")
print(json.dumps(train_data[0], indent=2, ensure_ascii=False))

In [None]:
# 格式化数据为训练格式
def format_data(example: Dict) -> str:
    """将数据格式化为模型输入格式"""
    return f"""<|im_start|>system
你是一个智能AI助手，能够分析用户请求并选择合适的工具执行任务。
你的输出必须是JSON格式：{{"thought": "思考过程", "action": "工具名或final_answer", "action_input": {{参数}}}}
<|im_end|>
<|im_start|>user
{example['instruction']}

{example['input']}<|im_end|>
<|im_start|>assistant
{example['output']}<|im_end|>"""

# 创建Dataset
train_dataset = Dataset.from_list([
    {"text": format_data(d)} for d in train_data
])
eval_dataset = Dataset.from_list([
    {"text": format_data(d)} for d in eval_data
])

print("数据集创建完成!")
print(f"\n格式化示例:")
print(train_dataset[0]['text'][:500])

## 4. 模型加载

In [None]:
# 加载Tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    CONFIG["model_name"],
    trust_remote_code=True,
    padding_side='right'
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"Tokenizer加载完成")
print(f"词表大小: {len(tokenizer)}")
print(f"PAD Token: {tokenizer.pad_token}")
print(f"EOS Token: {tokenizer.eos_token}")

In [None]:
# 量化配置
bnb_config = None
if CONFIG["use_4bit"]:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type=CONFIG["bnb_4bit_quant_type"],
        bnb_4bit_compute_dtype=getattr(torch, CONFIG["bnb_4bit_compute_dtype"]),
        bnb_4bit_use_double_quant=CONFIG["bnb_4bit_use_double_quant"],
    )

# 加载模型
model = AutoModelForCausalLM.from_pretrained(
    CONFIG["model_name"],
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16,
)

print(f"模型加载完成: {CONFIG['model_name']}")
print(f"模型参数量: {model.num_parameters() / 1e9:.2f}B")

In [None]:
# LoRA配置
if CONFIG["use_4bit"]:
    model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=CONFIG["lora_r"],
    lora_alpha=CONFIG["lora_alpha"],
    lora_dropout=CONFIG["lora_dropout"],
    target_modules=CONFIG["target_modules"],
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

print("\nLoRA配置完成!")

## 5. 训练

In [None]:
# 训练参数
training_args = TrainingArguments(
    output_dir=CONFIG["output_dir"],
    num_train_epochs=CONFIG["num_train_epochs"],
    per_device_train_batch_size=CONFIG["per_device_train_batch_size"],
    per_device_eval_batch_size=CONFIG["per_device_eval_batch_size"],
    gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
    learning_rate=CONFIG["learning_rate"],
    weight_decay=CONFIG["weight_decay"],
    warmup_ratio=CONFIG["warmup_ratio"],
    lr_scheduler_type=CONFIG["lr_scheduler_type"],
    logging_steps=CONFIG["logging_steps"],
    save_steps=CONFIG["save_steps"],
    eval_steps=CONFIG["eval_steps"],
    save_total_limit=CONFIG["save_total_limit"],
    fp16=True,
    bf16=False,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    evaluation_strategy="steps",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    report_to="tensorboard",
    run_name=f"agent-training-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
)

print("训练参数配置完成!")

In [None]:
# 创建Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=CONFIG["max_length"],
    packing=False,
)

print("Trainer创建完成!")

In [None]:
# 开始训练
print("="*50)
print("开始训练...")
print("="*50)

train_result = trainer.train()

print("\n" + "="*50)
print("训练完成!")
print("="*50)
print(f"\n训练损失: {train_result.training_loss:.4f}")
print(f"训练步数: {train_result.global_step}")

## 6. 保存模型

In [None]:
# 保存LoRA权重
lora_output_dir = os.path.join(CONFIG["output_dir"], "lora_weights")
trainer.model.save_pretrained(lora_output_dir)
tokenizer.save_pretrained(lora_output_dir)

print(f"LoRA权重已保存到: {lora_output_dir}")

In [None]:
# 合并LoRA权重并保存完整模型
from peft import PeftModel

# 重新加载基础模型
base_model = AutoModelForCausalLM.from_pretrained(
    CONFIG["model_name"],
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

# 加载LoRA权重
merged_model = PeftModel.from_pretrained(base_model, lora_output_dir)
merged_model = merged_model.merge_and_unload()

# 保存合并后的模型
merged_output_dir = os.path.join(CONFIG["output_dir"], "merged_model")
merged_model.save_pretrained(merged_output_dir)
tokenizer.save_pretrained(merged_output_dir)

print(f"合并模型已保存到: {merged_output_dir}")

## 7. 模型测试

In [None]:
# 测试推理
def generate_response(model, tokenizer, prompt: str, max_new_tokens: int = 256) -> str:
    """生成模型响应"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 测试用例
test_prompts = [
    """<|im_start|>system
你是一个智能AI助手，能够分析用户请求并选择合适的工具执行任务。
你的输出必须是JSON格式：{"thought": "思考过程", "action": "工具名或final_answer", "action_input": {参数}}
<|im_end|>
<|im_start|>user
分析用户请求并决定下一步行动

用户想查询上海今天的天气<|im_end|>
<|im_start|>assistant
"""
]

print("="*50)
print("模型测试")
print("="*50)

for prompt in test_prompts:
    print(f"\n输入: {prompt.split('<|im_start|>user\n')[1].split('<|im_end|>')[0][:100]}...")
    response = generate_response(merged_model, tokenizer, prompt)
    print(f"\n输出: {response}")
    print("-"*50)

## 8. 上传到Hugging Face (可选)

In [None]:
# 上传到Hugging Face
from huggingface_hub import HfApi, login

# 登录 (需要先设置HF_TOKEN环境变量或在Colab中使用huggingface-cli login)
# login(token="your_hf_token")

# 上传模型
HF_REPO_ID = "your-username/your-model-name"  # 修改为你的仓库ID

def upload_to_hf(model_path: str, repo_id: str):
    """上传模型到Hugging Face"""
    api = HfApi()
    
    try:
        api.create_repo(repo_id=repo_id, exist_ok=True)
        api.upload_folder(
            folder_path=model_path,
            repo_id=repo_id,
            repo_type="model",
        )
        print(f"模型已上传到: https://huggingface.co/{repo_id}")
    except Exception as e:
        print(f"上传失败: {e}")

# 取消注释以执行上传
# upload_to_hf(merged_output_dir, HF_REPO_ID)

## 9. 下载模型到本地

In [None]:
# 打包模型用于下载
import shutil

zip_path = f"{CONFIG['output_dir']}/model_archive"
shutil.make_archive(zip_path, 'zip', merged_output_dir)

print(f"模型已打包: {zip_path}.zip")
print(f"文件大小: {os.path.getsize(zip_path + '.zip') / 1024 / 1024:.2f} MB")

In [None]:
# 从Colab下载
from google.colab import files

files.download(f"{zip_path}.zip")

## 10. 训练总结

In [None]:
# 训练总结
summary = {
    "model_name": CONFIG["model_name"],
    "training_epochs": CONFIG["num_train_epochs"],
    "train_loss": train_result.training_loss,
    "global_step": train_result.global_step,
    "lora_r": CONFIG["lora_r"],
    "learning_rate": CONFIG["learning_rate"],
    "output_dir": CONFIG["output_dir"],
    "timestamp": datetime.now().isoformat(),
}

# 保存总结
with open(os.path.join(CONFIG["output_dir"], "training_summary.json"), "w") as f:
    json.dump(summary, f, indent=2)

print("="*50)
print("训练总结")
print("="*50)
for k, v in summary.items():
    print(f"{k}: {v}")