# AI Agent Core - 模型训练 Notebook

本Notebook用于训练AI Agent所需的模型组件。

**GitHub仓库**: 请修改下方的 `GITHUB_REPO` 变量为你的仓库地址

## 1. 配置参数

In [None]:
# ==================== 配置区 ====================
# 请修改以下参数

# GitHub仓库地址 (修改为你的仓库)
GITHUB_REPO = "https://github.com/your-username/ai-agent-core.git"

# Hugging Face配置
HF_TOKEN = ""  # 你的HF token (可选)
HF_REPO_ID = "your-username/ai-agent-model"  # 上传目标仓库 (可选)

# 模型配置
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"  # 基础模型

# 训练配置
NUM_EPOCHS = 3
BATCH_SIZE = 4
LEARNING_RATE = 2e-4
MAX_LENGTH = 2048

# LoRA配置
LORA_R = 16
LORA_ALPHA = 32

# ==================== 配置结束 ====================

print("配置参数已设置!")
print(f"GitHub仓库: {GITHUB_REPO}")
print(f"基础模型: {BASE_MODEL}")

## 2. 环境设置

In [None]:
# 检查GPU
!nvidia-smi

import torch
print(f"\nPyTorch版本: {torch.__version__}")
print(f"CUDA可用: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU内存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

In [None]:
# 安装依赖
print("安装依赖中...")

!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers datasets accelerate peft bitsandbytes trl
!pip install -q wandb tensorboard
!pip install -q qdrant-client pgvector psycopg2-binary
!pip install -q fastapi uvicorn pydantic pydantic-settings httpx sqlalchemy asyncpg redis

print("依赖安装完成!")

In [None]:
# 克隆项目
import os

PROJECT_DIR = "/content/ai-agent-core"

if not os.path.exists(PROJECT_DIR):
    print(f"克隆项目: {GITHUB_REPO}")
    !git clone {GITHUB_REPO} {PROJECT_DIR}
else:
    print(f"项目已存在: {PROJECT_DIR}")
    !cd {PROJECT_DIR} && git pull

# 添加项目路径
import sys
sys.path.insert(0, PROJECT_DIR)

print(f"\n项目结构:")
!ls -la {PROJECT_DIR}

In [None]:
# 导入库
import json
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    BitsAndBytesConfig,
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from trl import SFTTrainer

import warnings
warnings.filterwarnings('ignore')

# 设置随机种子
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

print("环境设置完成!")

## 3. 数据准备

In [None]:
# Agent训练数据格式定义
# 数据格式: instruction + input -> output (JSON格式的思考过程和动作)

TRAIN_DATA_EXAMPLES = [
    {
        "instruction": "分析用户请求并决定下一步行动",
        "input": "用户想查询北京明天的天气",
        "output": {
            "thought": "用户需要天气信息，我需要调用天气查询工具获取北京的天气数据",
            "action": "weather_query",
            "action_input": {"city": "北京", "date": "明天"}
        }
    },
    {
        "instruction": "根据工具执行结果生成最终回答",
        "input": "工具返回: 北京明天晴天，温度15-25°C，空气质量良好",
        "output": {
            "thought": "已获取完整的天气信息，可以给出最终答案",
            "action": "final_answer",
            "action_input": {"answer": "北京明天天气晴朗，温度在15到25摄氏度之间，空气质量良好，适合外出活动。"}
        }
    },
    {
        "instruction": "分析用户请求并决定下一步行动",
        "input": "计算 (123 + 456) * 2 的结果",
        "output": {
            "thought": "用户需要进行数学计算，调用计算器工具",
            "action": "calculator",
            "action_input": {"expression": "(123 + 456) * 2"}
        }
    },
    {
        "instruction": "分析用户请求并决定下一步行动",
        "input": "现在几点了？",
        "output": {
            "thought": "用户想知道当前时间，调用时间工具",
            "action": "datetime",
            "action_input": {"format": "time"}
        }
    },
    {
        "instruction": "分析用户请求并决定下一步行动",
        "input": "帮我搜索一下Python异步编程的最佳实践",
        "output": {
            "thought": "用户需要搜索信息，调用搜索工具",
            "action": "search",
            "action_input": {"query": "Python异步编程最佳实践"}
        }
    }
]

print("训练数据示例:")
print(json.dumps(TRAIN_DATA_EXAMPLES[0], indent=2, ensure_ascii=False))

In [None]:
# 生成训练数据集
def generate_training_data(num_samples: int = 1000) -> List[Dict]:
    """生成Agent训练数据集"""
    data = []
    
    # 工具定义
    tools = {
        "weather_query": {
            "params": ["city", "date"],
            "cities": ["北京", "上海", "广州", "深圳", "杭州", "成都", "武汉", "西安", "南京", "苏州"],
            "dates": ["今天", "明天", "后天", "本周", "下周"]
        },
        "calculator": {
            "expressions": [
                "{a} + {b}",
                "{a} - {b}",
                "{a} * {b}",
                "{a} / {b}",
                "({a} + {b}) * {c}",
                "{a} ** 2 + {b}",
            ]
        },
        "datetime": {
            "formats": ["iso", "date", "time", "timestamp"]
        },
        "search": {
            "queries": [
                "Python教程",
                "机器学习入门",
                "深度学习框架",
                "自然语言处理",
                "数据可视化",
                "API开发",
                "数据库优化",
                "前端框架",
                "云服务部署",
                "代码测试"
            ]
        },
        "text_process": {
            "operations": ["lowercase", "uppercase", "reverse", "word_count", "char_count"]
        }
    }
    
    import random
    random.seed(SEED)
    
    for i in range(num_samples):
        tool_type = list(tools.keys())[i % len(tools)]
        
        if tool_type == "weather_query":
            city = random.choice(tools["weather_query"]["cities"])
            date = random.choice(tools["weather_query"]["dates"])
            data.append({
                "instruction": "分析用户请求并决定下一步行动",
                "input": f"用户想查询{city}{date}的天气",
                "output": {
                    "thought": f"用户需要{city}{date}的天气信息，调用天气查询工具",
                    "action": "weather_query",
                    "action_input": {"city": city, "date": date}
                }
            })
            
        elif tool_type == "calculator":
            a, b, c = random.randint(1, 100), random.randint(1, 100), random.randint(1, 10)
            expr_template = random.choice(tools["calculator"]["expressions"])
            expr = expr_template.format(a=a, b=b, c=c)
            data.append({
                "instruction": "分析用户请求并决定下一步行动",
                "input": f"计算 {expr}",
                "output": {
                    "thought": "用户需要进行数学计算，调用计算器工具",
                    "action": "calculator",
                    "action_input": {"expression": expr}
                }
            })
            
        elif tool_type == "datetime":
            fmt = random.choice(tools["datetime"]["formats"])
            questions = ["现在几点了？", "今天日期是什么？", "给我当前时间", "显示当前时间戳"]
            data.append({
                "instruction": "分析用户请求并决定下一步行动",
                "input": random.choice(questions),
                "output": {
                    "thought": "用户想知道当前时间信息，调用时间工具",
                    "action": "datetime",
                    "action_input": {"format": fmt}
                }
            })
            
        elif tool_type == "search":
            query = random.choice(tools["search"]["queries"])
            data.append({
                "instruction": "分析用户请求并决定下一步行动",
                "input": f"帮我搜索{query}相关内容",
                "output": {
                    "thought": f"用户需要搜索{query}相关信息，调用搜索工具",
                    "action": "search",
                    "action_input": {"query": query}
                }
            })
            
        elif tool_type == "text_process":
            op = random.choice(tools["text_process"]["operations"])
            texts = ["Hello World", "Python Programming", "AI Agent Core", "Machine Learning"]
            text = random.choice(texts)
            data.append({
                "instruction": "分析用户请求并决定下一步行动",
                "input": f"对文本 '{text}' 进行{op}处理",
                "output": {
                    "thought": f"用户需要对文本进行{op}处理，调用文本处理工具",
                    "action": "text_process",
                    "action_input": {"text": text, "operation": op}
                }
            })
    
    # 添加最终答案示例
    for i in range(num_samples // 5):
        data.append({
            "instruction": "根据工具执行结果生成最终回答",
            "input": f"工具返回: 操作成功完成，结果为 {random.randint(1, 1000)}",
            "output": {
                "thought": "已获取工具执行结果，可以给出最终答案",
                "action": "final_answer",
                "action_input": {"answer": f"操作已成功完成。根据查询结果，答案是 {random.randint(1, 1000)}。"}
            }
        })
    
    random.shuffle(data)
    return data

# 生成数据
all_data = generate_training_data(2000)

# 划分训练集和验证集
split_idx = int(len(all_data) * 0.9)
train_data = all_data[:split_idx]
eval_data = all_data[split_idx:]

print(f"训练数据: {len(train_data)} 条")
print(f"验证数据: {len(eval_data)} 条")

In [None]:
# 格式化数据为训练格式
def format_example(example: Dict) -> str:
    """将数据格式化为模型输入格式"""
    output_str = json.dumps(example['output'], ensure_ascii=False)
    return f"""<|im_start|>system
你是一个智能AI助手，能够分析用户请求并选择合适的工具执行任务。
你的输出必须是JSON格式：{{"thought": "思考过程", "action": "工具名或final_answer", "action_input": {{参数}}}}

可用工具:
- weather_query: 查询天气 (参数: city, date)
- calculator: 数学计算 (参数: expression)
- datetime: 获取时间 (参数: format)
- search: 搜索信息 (参数: query)
- text_process: 文本处理 (参数: text, operation)
<|im_end|>
<|im_start|>user
{example['instruction']}

{example['input']}<|im_end|>
<|im_start|>assistant
{output_str}<|im_end|>"""

# 创建Dataset
train_dataset = Dataset.from_list([
    {"text": format_example(d)} for d in train_data
])
eval_dataset = Dataset.from_list([
    {"text": format_example(d)} for d in eval_data
])

print("数据集创建完成!")
print(f"\n格式化示例:")
print(train_dataset[0]['text'][:800])

## 4. 模型加载

In [None]:
# 加载Tokenizer
print(f"加载Tokenizer: {BASE_MODEL}")

tokenizer = AutoTokenizer.from_pretrained(
    BASE_MODEL,
    trust_remote_code=True,
    padding_side='right'
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"词表大小: {len(tokenizer)}")
print(f"PAD Token: {tokenizer.pad_token}")

In [None]:
# 量化配置
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# 加载模型
print(f"加载模型: {BASE_MODEL}")

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16,
)

print(f"模型参数量: {model.num_parameters() / 1e9:.2f}B")

In [None]:
# LoRA配置
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

print("\nLoRA配置完成!")

## 5. 训练

In [None]:
# 输出目录
OUTPUT_DIR = "/content/outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# 训练参数
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=4,
    learning_rate=LEARNING_RATE,
    weight_decay=0.01,
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
    logging_steps=10,
    save_steps=200,
    eval_steps=200,
    save_total_limit=3,
    fp16=True,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    evaluation_strategy="steps",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    report_to="tensorboard",
    run_name=f"agent-core-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
)

print("训练参数配置完成!")

In [None]:
# 创建Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=MAX_LENGTH,
    packing=False,
)

print("Trainer创建完成!")

In [None]:
# 开始训练
print("="*60)
print("开始训练...")
print("="*60)

train_result = trainer.train()

print("\n" + "="*60)
print("训练完成!")
print("="*60)
print(f"\n最终训练损失: {train_result.training_loss:.4f}")
print(f"训练步数: {train_result.global_step}")

## 6. 保存模型

In [None]:
# 保存LoRA权重
lora_output_dir = os.path.join(OUTPUT_DIR, "lora_weights")
trainer.model.save_pretrained(lora_output_dir)
tokenizer.save_pretrained(lora_output_dir)

print(f"LoRA权重已保存: {lora_output_dir}")

In [None]:
# 合并LoRA权重
from peft import PeftModel

print("合并LoRA权重...")

# 重新加载基础模型
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

# 加载并合并LoRA
merged_model = PeftModel.from_pretrained(base_model, lora_output_dir)
merged_model = merged_model.merge_and_unload()

# 保存合并后的模型
merged_output_dir = os.path.join(OUTPUT_DIR, "merged_model")
merged_model.save_pretrained(merged_output_dir)
tokenizer.save_pretrained(merged_output_dir)

print(f"合并模型已保存: {merged_output_dir}")

## 7. 模型测试

In [None]:
# 测试推理
def generate_response(model, tokenizer, prompt: str, max_new_tokens: int = 256) -> str:
    """生成模型响应"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 测试用例
test_cases = [
    "用户想查询上海今天的天气",
    "计算 123 * 456",
    "现在几点了？",
    "帮我搜索Python教程",
]

print("="*60)
print("模型测试")
print("="*60)

for test_input in test_cases:
    prompt = f"""<|im_start|>system
你是一个智能AI助手，能够分析用户请求并选择合适的工具执行任务。
你的输出必须是JSON格式：{{"thought": "思考过程", "action": "工具名或final_answer", "action_input": {{参数}}}}
<|im_end|>
<|im_start|>user
分析用户请求并决定下一步行动

{test_input}<|im_end|>
<|im_start|>assistant
"""
    
    print(f"\n输入: {test_input}")
    response = generate_response(merged_model, tokenizer, prompt)
    
    # 提取assistant回复
    if "<|im_start|>assistant" in response:
        response = response.split("<|im_start|>assistant")[-1].strip()
    if "<|im_end|>" in response:
        response = response.split("<|im_end|>")[0].strip()
    
    print(f"输出: {response}")
    print("-"*60)

## 8. 上传到Hugging Face

In [None]:
# 上传到Hugging Face (可选)
if HF_TOKEN:
    from huggingface_hub import HfApi, login
    
    print(f"上传模型到: {HF_REPO_ID}")
    
    login(token=HF_TOKEN)
    
    api = HfApi()
    api.create_repo(repo_id=HF_REPO_ID, exist_ok=True)
    api.upload_folder(
        folder_path=merged_output_dir,
        repo_id=HF_REPO_ID,
        repo_type="model",
    )
    
    print(f"模型已上传: https://huggingface.co/{HF_REPO_ID}")
else:
    print("跳过上传 (未设置HF_TOKEN)")

## 9. 下载模型

In [None]:
# 打包模型
import shutil

zip_path = f"{OUTPUT_DIR}/model_weights"
shutil.make_archive(zip_path, 'zip', merged_output_dir)

print(f"模型已打包: {zip_path}.zip")
print(f"文件大小: {os.path.getsize(zip_path + '.zip') / 1024 / 1024:.2f} MB")

In [None]:
# 下载模型
from google.colab import files

files.download(f"{zip_path}.zip")
print("模型下载已开始...")

## 10. 训练总结

In [None]:
# 训练总结
summary = {
    "base_model": BASE_MODEL,
    "github_repo": GITHUB_REPO,
    "training_epochs": NUM_EPOCHS,
    "train_loss": train_result.training_loss,
    "global_step": train_result.global_step,
    "lora_r": LORA_R,
    "lora_alpha": LORA_ALPHA,
    "learning_rate": LEARNING_RATE,
    "train_samples": len(train_data),
    "eval_samples": len(eval_data),
    "output_dir": OUTPUT_DIR,
    "timestamp": datetime.now().isoformat(),
}

# 保存总结
with open(os.path.join(OUTPUT_DIR, "training_summary.json"), "w") as f:
    json.dump(summary, f, indent=2, ensure_ascii=False)

print("="*60)
print("训练总结")
print("="*60)
for k, v in summary.items():
    print(f"{k}: {v}")

print("\n" + "="*60)
print("训练流程全部完成!")
print("="*60)