# Flan-T5 意图判别器微调

**目标**: 微调 Flan-T5-base 用于意图判别 (JudgeRLVR)

**环境**: Google Colab (免费 GPU)

**输出**: ONNX 模型，可在 CPU 服务器部署

## 1. 环境准备

In [None]:
# 安装依赖
!pip install -q transformers datasets accelerate peft bitsandbytes
!pip install -q optimum[onnxruntime] onnx onnxruntime
!pip install -q scikit-learn pandas

In [None]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import (
    T5Tokenizer, 
    T5ForConditionalGeneration,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq
)
from peft import LoraConfig, get_peft_model, TaskType
import os

# 检查 GPU
print(f"GPU 可用: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU 型号: {torch.cuda.get_device_name(0)}")
    print(f"显存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

## 2. 上传训练数据

请上传以下文件到 Colab:
- `intent_judge_train.csv`
- `intent_judge_valid.csv`

In [None]:
from google.colab import files

# 上传训练数据
print("请上传 intent_judge_train.csv 和 intent_judge_valid.csv")
uploaded = files.upload()

In [None]:
# 加载数据
train_df = pd.read_csv('intent_judge_train.csv', comment='#')
valid_df = pd.read_csv('intent_judge_valid.csv', comment='#')

print(f"训练集: {len(train_df)} 条")
print(f"验证集: {len(valid_df)} 条")
print(f"\n数据样例:")
train_df.head()

## 3. 数据预处理

In [None]:
def prepare_data(df):
    """将数据转换为 T5 格式"""
    inputs = []
    targets = []
    
    for _, row in df.iterrows():
        # 输入格式: "判断用户输入是否匹配意图。用户输入: {input} 意图: {intent} - {description}"
        input_text = f"判断用户输入是否匹配意图。用户输入: {row['user_input']} 意图: {row['intent_code']} - {row['intent_description']}"
        # 输出格式: "是" 或 "否"
        target_text = "是" if row['label'] == 1 else "否"
        
        inputs.append(input_text)
        targets.append(target_text)
    
    return Dataset.from_dict({
        'input_text': inputs,
        'target_text': targets
    })

train_dataset = prepare_data(train_df)
valid_dataset = prepare_data(valid_df)

print(f"训练样例: {train_dataset[0]}")

## 4. 加载模型和 Tokenizer

In [None]:
# 使用 flan-t5-base (中等大小，适合微调)
MODEL_NAME = "google/flan-t5-base"

tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model = T5ForConditionalGeneration.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,  # 使用半精度节省显存
    device_map="auto"
)

print(f"模型参数量: {model.num_parameters() / 1e6:.1f}M")

## 5. 配置 LoRA (低秩适配)

LoRA 可以大幅减少需要训练的参数，节省显存和时间

In [None]:
# LoRA 配置
lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    r=16,                    # LoRA 秩
    lora_alpha=32,           # LoRA alpha
    lora_dropout=0.1,        # Dropout
    target_modules=["q", "v"],  # 只训练注意力的 Q 和 V
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

## 6. 数据 Tokenization

In [None]:
MAX_INPUT_LENGTH = 256
MAX_TARGET_LENGTH = 8

def tokenize_function(examples):
    model_inputs = tokenizer(
        examples['input_text'],
        max_length=MAX_INPUT_LENGTH,
        truncation=True,
        padding='max_length'
    )
    
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples['target_text'],
            max_length=MAX_TARGET_LENGTH,
            truncation=True,
            padding='max_length'
        )
    
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

train_tokenized = train_dataset.map(tokenize_function, batched=True)
valid_tokenized = valid_dataset.map(tokenize_function, batched=True)

## 7. 训练配置

In [None]:
OUTPUT_DIR = "./flan-t5-intent-judge"

training_args = Seq2SeqTrainingArguments(
    output_dir=OUTPUT_DIR,
    
    # 训练参数
    num_train_epochs=10,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=3e-4,
    warmup_steps=100,
    weight_decay=0.01,
    
    # 评估
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    
    # 日志
    logging_steps=10,
    report_to="none",
    
    # 优化
    fp16=True,
    gradient_accumulation_steps=2,
    
    # 生成
    predict_with_generate=True,
    generation_max_length=MAX_TARGET_LENGTH,
)

data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=model,
    label_pad_token_id=-100
)

## 8. 开始训练

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=valid_tokenized,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print("开始训练...")
trainer.train()

## 9. 评估模型

In [None]:
from sklearn.metrics import accuracy_score, classification_report

def evaluate_model(model, tokenizer, dataset, df):
    model.eval()
    predictions = []
    
    for i, example in enumerate(dataset):
        input_ids = torch.tensor([example['input_ids']]).to(model.device)
        
        with torch.no_grad():
            outputs = model.generate(
                input_ids,
                max_length=MAX_TARGET_LENGTH,
                num_beams=1,
                do_sample=False
            )
        
        pred_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        pred_label = 1 if "是" in pred_text else 0
        predictions.append(pred_label)
    
    # 计算指标
    true_labels = df['label'].tolist()
    accuracy = accuracy_score(true_labels, predictions)
    
    print(f"准确率: {accuracy:.4f}")
    print("\n分类报告:")
    print(classification_report(true_labels, predictions, target_names=['否', '是']))
    
    return accuracy, predictions

print("评估验证集...")
accuracy, preds = evaluate_model(model, tokenizer, valid_tokenized, valid_df)

## 10. 合并 LoRA 权重并保存

In [None]:
# 合并 LoRA 权重到基础模型
merged_model = model.merge_and_unload()

# 保存完整模型
MERGED_DIR = "./flan-t5-intent-judge-merged"
merged_model.save_pretrained(MERGED_DIR)
tokenizer.save_pretrained(MERGED_DIR)

print(f"模型已保存到: {MERGED_DIR}")

## 11. 导出 ONNX 模型 (用于 CPU 部署)

In [None]:
from optimum.onnxruntime import ORTModelForSeq2SeqLM

ONNX_DIR = "./flan-t5-intent-judge-onnx"

# 导出 ONNX
print("导出 ONNX 模型...")
ort_model = ORTModelForSeq2SeqLM.from_pretrained(
    MERGED_DIR,
    export=True
)
ort_model.save_pretrained(ONNX_DIR)
tokenizer.save_pretrained(ONNX_DIR)

print(f"ONNX 模型已保存到: {ONNX_DIR}")

# 显示文件大小
import os
for f in os.listdir(ONNX_DIR):
    path = os.path.join(ONNX_DIR, f)
    size = os.path.getsize(path) / 1024 / 1024
    print(f"  {f}: {size:.1f} MB")

## 12. 测试 ONNX 模型

In [None]:
# 加载 ONNX 模型测试
onnx_model = ORTModelForSeq2SeqLM.from_pretrained(ONNX_DIR)
onnx_tokenizer = T5Tokenizer.from_pretrained(ONNX_DIR)

def test_inference(text, intent_code, intent_desc):
    prompt = f"判断用户输入是否匹配意图。用户输入: {text} 意图: {intent_code} - {intent_desc}"
    inputs = onnx_tokenizer(prompt, return_tensors="pt")
    
    import time
    start = time.time()
    outputs = onnx_model.generate(**inputs, max_length=8)
    latency = (time.time() - start) * 1000
    
    result = onnx_tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"输入: {text}")
    print(f"意图: {intent_code}")
    print(f"判断: {result}")
    print(f"延迟: {latency:.1f}ms")
    print()
    return result

# 测试用例
test_inference("这个月销售怎么样", "sales_overview", "销售情况概览查询")  # 应该输出: 是
test_inference("删除这条记录", "sales_overview", "销售情况概览查询")     # 应该输出: 否
test_inference("查一下库存", "material_query", "原料库存查询")           # 应该输出: 是

## 13. 下载模型

In [None]:
# 打包 ONNX 模型
!zip -r flan-t5-intent-judge-onnx.zip flan-t5-intent-judge-onnx/

# 下载
from google.colab import files
files.download('flan-t5-intent-judge-onnx.zip')

print("\n请下载 flan-t5-intent-judge-onnx.zip 并上传到服务器:")
print("scp flan-t5-intent-judge-onnx.zip root@139.196.165.140:/www/wwwroot/cretas/models/")

## 部署说明

### 1. 上传到服务器
```bash
scp flan-t5-intent-judge-onnx.zip root@139.196.165.140:/www/wwwroot/cretas/models/
ssh root@139.196.165.140
cd /www/wwwroot/cretas/models/
unzip flan-t5-intent-judge-onnx.zip
mv flan-t5-intent-judge-onnx flan-t5-base
```

### 2. 修改 Java 配置
```properties
cretas.ai.flan-t5.enabled=true
cretas.ai.flan-t5.engine=ONNX
cretas.ai.flan-t5.model-path=/www/wwwroot/cretas/models/flan-t5-base
```

### 3. 重启服务
```bash
cd /www/wwwroot/cretas && bash restart.sh
```