In [None]:
# 安装指定 commit 的 unsloth（10 月 23 日的版本）
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@bbdab300de3eb76a435999e92815de452560e51d"

# 安装你之前用过的兼容依赖版本
!pip install --no-deps "xformers<0.0.26" "trl<0.9.0" "peft<0.12.0" "accelerate<0.32.0" "bitsandbytes<0.44.0" "transformers<4.43.0"


Collecting unsloth@ git+https://github.com/unslothai/unsloth.git@bbdab300de3eb76a435999e92815de452560e51d (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git@bbdab300de3eb76a435999e92815de452560e51d)
  Cloning https://github.com/unslothai/unsloth.git (to revision bbdab300de3eb76a435999e92815de452560e51d) to /tmp/pip-install-x3071af8/unsloth_c3cc16bd96a54ca59400e742f4f97657
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-x3071af8/unsloth_c3cc16bd96a54ca59400e742f4f97657
  Running command git rev-parse -q --verify 'sha^bbdab300de3eb76a435999e92815de452560e51d'
  Running command git fetch -q https://github.com/unslothai/unsloth.git bbdab300de3eb76a435999e92815de452560e51d
  Running command git checkout -q bbdab300de3eb76a435999e92815de452560e51d
  Resolved https://github.com/unslothai/unsloth.git to commit bbdab300de3eb76a435999e92815de452560e51d
  Installing build dependencies ... [?25l[?25hdone
  Getti

In [None]:


from google.colab import drive
import os, gc, torch, json, shutil
from datetime import datetime
import pandas as pd
from tqdm import tqdm
from datasets import load_dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments

drive.mount('/content/drive')


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.




🦥 Unsloth Zoo will now patch everything to make training faster!
Mounted at /content/drive


In [None]:


# =====================================================
# 🎛️ 配置中心
# =====================================================
class Config:
    # ==================== 动态参数（会被实验覆盖）====================
    CURRENT_EXPERIMENT = "default"
    PROMPT_VERSION = "v2"
    LEARNING_RATE = 3e-4
    LORA_R = 16
    LORA_ALPHA = 32
    SAMPLE_SIZE = 3000
    MAX_STEPS = 300
    NUM_EPOCHS = 2

    # ==================== 固定参数 ====================
    VAL_SPLIT = 0.1
    MODEL_NAME = "unsloth/Meta-Llama-3.1-8B"
    MAX_SEQ_LENGTH = 2048
    LOAD_IN_4BIT = True
    BATCH_SIZE = 16
    GRADIENT_ACCUMULATION = 2
    LORA_DROPOUT = 0.1  # ✅ 修复拼写
    WEIGHT_DECAY = 0.01
    WARMUP_STEPS = 50
    BASE_DIR = "/content/drive/MyDrive/colab_math_sft"
    MAX_NEW_TOKENS = 5
    TEMPERATURE = 0.1
    DO_SAMPLE = False
    LOGGING_STEPS = 20
    SAVE_STEPS = 100
    EVAL_STEPS = 100

    @classmethod
    def update_from_dict(cls, config_dict):
        """从字典更新配置"""
        cls.CURRENT_EXPERIMENT = config_dict["name"]
        cls.PROMPT_VERSION = config_dict["prompt"]
        cls.LEARNING_RATE = config_dict["lr"]
        cls.LORA_R = config_dict["r"]
        cls.LORA_ALPHA = config_dict["alpha"]
        cls.SAMPLE_SIZE = config_dict["samples"]
        cls.MAX_STEPS = config_dict["steps"]
        cls.NUM_EPOCHS = 2 if config_dict["samples"] <= 3000 else 1

os.makedirs(Config.BASE_DIR, exist_ok=True)

# =====================================================
# 实验配置库
# =====================================================
STAGE1_EXPERIMENTS = [
    # 阶段一：Prompt 选择（小数据快速筛选）
    {"name": "stage1_prompt_v1", "prompt": "v1", "lr": 3e-4, "r": 16, "alpha": 32, "samples": 3000, "steps": 300},
    {"name": "stage1_prompt_v2", "prompt": "v2", "lr": 3e-4, "r": 16, "alpha": 32, "samples": 3000, "steps": 300},
    {"name": "stage1_prompt_v3", "prompt": "v3", "lr": 3e-4, "r": 16, "alpha": 32, "samples": 3000, "steps": 300},
    {"name": "stage1_prompt_v4", "prompt": "v4", "lr": 3e-4, "r": 16, "alpha": 32, "samples": 3000, "steps": 300},
    {"name": "stage1_prompt_v5", "prompt": "v5", "lr": 3e-4, "r": 16, "alpha": 32, "samples": 3000, "steps": 300},
    {"name": "stage1_prompt_v6", "prompt": "v6", "lr": 3e-4, "r": 16, "alpha": 32, "samples": 3000, "steps": 300},
]

# ========== 你的配置（3个核心学习率扫描）==========
STAGE2_EXPERIMENTS_YOU = [
    # 在固定 r=16 下扫描学习率，找出最佳学习率
    # {"name": "stage2_baseline",     "prompt": "v4", "lr": 3e-4, "r": 16, "alpha": 32,  "samples": 10000, "steps": 2000},
    # {"name": "stage2_lr1e-4",       "prompt": "v4", "lr": 1e-4, "r": 16, "alpha": 32,  "samples": 10000, "steps": 2000},
    # {"name": "stage2_lr5e-4",       "prompt": "v4", "lr": 5e-4, "r": 16, "alpha": 32,  "samples": 10000, "steps": 2000},
    {"name": "stage2_lr7e-4_r32_50k_2000",     "prompt": "v4", "lr": 7e-4, "r": 32, "alpha": 64, "samples": 50000, "steps": 2000},
]

# ========== 朋友 A 的配置（3个 LoRA 容量探索）==========
STAGE2_EXPERIMENTS_FRIEND_A = [
    # 在固定 lr=3e-4 下扫描 LoRA 大小，找出最佳容量
    {"name": "stage2_r8",           "prompt": "v4", "lr": 3e-4, "r": 8,  "alpha": 16,  "samples": 50000, "steps": 2000},
    {"name": "stage2_r32",          "prompt": "v4", "lr": 3e-4, "r": 32, "alpha": 64,  "samples": 50000, "steps": 2000},
    {"name": "stage2_r64",          "prompt": "v4", "lr": 3e-4, "r": 64, "alpha": 128, "samples": 50000, "steps": 2000},
]

# ========== 朋友 B 的配置（3个黄金组合探索）==========
STAGE2_EXPERIMENTS_FRIEND_B = [
    # 关键组合：覆盖学习率和 LoRA 的交互效应
    {"name": "stage2_lr5e-4_r32",   "prompt": "v4", "lr": 5e-4, "r": 32, "alpha": 64,  "samples": 50000, "steps": 2000},  # 激进 lr + 中等 r
    {"name": "stage2_lr1e-4_r8",    "prompt": "v4", "lr": 1e-4, "r": 8,  "alpha": 16,  "samples": 50000, "steps": 2000},  # 保守 lr + 小 r（防过拟合）
    {"name": "stage2_lr2e-4_r16",   "prompt": "v4", "lr": 2e-4, "r": 16, "alpha": 32,  "samples": 50000, "steps": 2000},  # 中庸之道（经典配置）
]

# 完整配置（9 个实验）
STAGE2_EXPERIMENTS_ALL = STAGE2_EXPERIMENTS_YOU + STAGE2_EXPERIMENTS_FRIEND_A + STAGE2_EXPERIMENTS_FRIEND_B

# =====================================================
# Prompt模板库
# =====================================================
class PromptLibrary:
    TEMPLATES = {
        "v1": {
            "train": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a mathematics verification expert.<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: {}
Solution: {}
Is this solution correct?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{}""",
            "infer": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a mathematics verification expert.<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: {}
Solution: {}
Is this solution correct?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

""",
            "answer": {"true": "True", "false": "False"}
        },
        "v2": {
            "train": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are an expert at verifying mathematical solutions. Respond with ONLY "True" if correct or "False" if incorrect.<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: {}
Solution: {}
Answer (True/False):<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{}""",
            "infer": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are an expert at verifying mathematical solutions. Respond with ONLY "True" if correct or "False" if incorrect.<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: {}
Solution: {}
Answer (True/False):<|eot_id|><|start_header_id|>assistant<|end_header_id|>

""",
            "answer": {"true": "True", "false": "False"}
        },
        "v3": {
            "train": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a mathematics expert. Analyze step by step and conclude with "CORRECT" or "INCORRECT".<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: {}
Solution: {}
Verification:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{}""",
            "infer": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a mathematics expert. Analyze step by step and conclude with "CORRECT" or "INCORRECT".<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: {}
Solution: {}
Verification:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

""",
            "answer": {"true": "CORRECT", "false": "INCORRECT"}
        },
        "v4": {
            "train": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Math Problem: {}
Proposed Solution: {}
Is this solution correct? Answer True or False:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{}""",
            "infer": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Math Problem: {}
Proposed Solution: {}
Is this solution correct? Answer True or False:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

""",
            "answer": {"true": "True", "false": "False"}
        },
        "v5": {
            "train": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a rigorous math validator. Check each calculation step carefully before giving your final judgment.<|eot_id|><|start_header_id|>user<|end_header_id|>

Problem: {}
Solution: {}
After thorough verification, this solution is:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{}""",
            "infer": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a rigorous math validator. Check each calculation step carefully before giving your final judgment.<|eot_id|><|start_header_id|>user<|end_header_id|>

Problem: {}
Solution: {}
After thorough verification, this solution is:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

""",
            "answer": {"true": "True", "false": "False"}
        },
        "v6": {
            "train": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a math assessment AI. Provide only the final answer.<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: {}
Given Solution: {}
Valid? (True/False)<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{}""",
            "infer": """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a math assessment AI. Provide only the final answer.<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: {}
Given Solution: {}
Valid? (True/False)<|eot_id|><|start_header_id|>assistant<|end_header_id|>

""",
            "answer": {"true": "True", "false": "False"}
        }
    }

    @classmethod
    def get(cls, version, mode="train"):
        return cls.TEMPLATES[version][mode]

    @classmethod
    def format_answer(cls, is_correct, version):
        ans_format = cls.TEMPLATES[version]["answer"]
        return ans_format["true"] if is_correct else ans_format["false"]

# =====================================================
# 输出解析器
# =====================================================
def parse_output(response_text, prompt_version):
    if "<|start_header_id|>assistant<|end_header_id|>" in response_text:
        response_text = response_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1]

    response_lower = response_text.lower().strip()
    response_lower = response_lower.replace("<|eot_id|>", "").replace("<|end_of_text|>", "").strip()

    if prompt_version == "v3":
        if "correct" in response_lower and "incorrect" not in response_lower:
            return True
        elif "incorrect" in response_lower:
            return False

    if response_lower.startswith("true"):
        return True
    elif response_lower.startswith("false"):
        return False

    true_count = response_lower.count("true") + response_lower.count("correct")
    false_count = response_lower.count("false") + response_lower.count("incorrect")

    return true_count > false_count

# =====================================================
# ✨ 新增：实验状态管理
# =====================================================
def is_experiment_completed(experiment_name):
    """检查实验是否已完成"""
    result_file = f"{Config.BASE_DIR}/experiment_results.json"
    if not os.path.exists(result_file):
        return False

    with open(result_file, 'r') as f:
        results = json.load(f)

    # 检查是否存在该实验的结果
    for exp_id, result in results.items():
        if result['config']['experiment_name'] == experiment_name:
            return True
    return False

def cleanup_experiment_files(experiment_name):
    """✨ 清理实验文件，只保留结果"""
    output_dir = f"{Config.BASE_DIR}/{experiment_name}"

    if os.path.exists(output_dir):
        print(f"🧹 清理实验文件: {experiment_name}")
        try:
            # 删除整个实验目录（包含模型和 checkpoint）
            shutil.rmtree(output_dir)
            print(f"✅ 已删除: {output_dir}")
        except Exception as e:
            print(f"⚠️ 清理失败: {str(e)}")

# =====================================================
# 结果显示函数
# =====================================================
def show_experiment_results():
    """显示所有实验结果"""
    result_file = f"{Config.BASE_DIR}/experiment_results.json"
    if not os.path.exists(result_file):
        print("📊 还没有实验结果，请先运行一些实验!")
        return

    with open(result_file, 'r') as f:
        results = json.load(f)

    print("="*80)
    print("📊 所有实验结果")
    print("="*80)
    print(f"{'实验ID':<30} {'准确率':<10} {'Prompt':<8} {'LR':<12} {'LoRA r':<8}")
    print("-"*80)

    for exp_id, result in sorted(results.items(), key=lambda x: x[1]['accuracy'], reverse=True):
        config = result['config']
        print(f"{exp_id:<30} {result['accuracy']:.4f}    {config['prompt_version']:<8} "
              f"{config['learning_rate']:.1e}    {config['lora_r']:<8}")

    print("="*80)

def analyze_stage1_results():
    """分析阶段一结果，推荐最佳 Prompt"""
    result_file = f"{Config.BASE_DIR}/experiment_results.json"
    if not os.path.exists(result_file):
        print("⚠️ 还没有实验结果！")
        return None

    with open(result_file, 'r') as f:
        all_results = json.load(f)

    # 筛选阶段一实验
    stage1_results = {k: v for k, v in all_results.items()
                      if 'stage1' in v['config'].get('experiment_name', '')}

    if not stage1_results:
        print("⚠️ 没有找到阶段一的实验结果！")
        return None

    print("="*70)
    print("📊 阶段一：Prompt 选择结果")
    print("="*70)

    for exp_id, result in sorted(stage1_results.items(), key=lambda x: x[1]['accuracy'], reverse=True):
        config = result['config']
        print(f"Prompt {config['prompt_version']}: {result['accuracy']:.4f} ({result['accuracy']*100:.2f}%)")

    best_exp = max(stage1_results.items(), key=lambda x: x[1]['accuracy'])
    best_prompt = best_exp[1]['config']['prompt_version']
    best_acc = best_exp[1]['accuracy']

    print("="*70)
    print(f"🏆 推荐使用: Prompt {best_prompt} (准确率: {best_acc:.4f})")
    print(f"💡 请修改 STAGE2_EXPERIMENTS 中所有 'prompt' 值为 '{best_prompt}'")
    print("="*70)

    return best_prompt


# =====================================================
# 🌟 新增：使用阶段二已有的最佳模型
# =====================================================
def use_best_stage2_model():
    """
    从阶段二结果中找出最佳模型，直接使用（不需要重新训练）
    """
    result_file = f"{Config.BASE_DIR}/experiment_results.json"
    if not os.path.exists(result_file):
        print("❌ 找不到实验结果文件！")
        return None

    with open(result_file, 'r') as f:
        all_results = json.load(f)

    # 筛选阶段二结果
    stage2_results = {k: v for k, v in all_results.items()
                      if 'stage2' in v['config'].get('experiment_name', '')}

    if not stage2_results:
        print("❌ 找不到阶段二的结果！")
        return None

    # 找出最佳
    best_exp = max(stage2_results.items(), key=lambda x: x[1]['accuracy'])
    best_config = best_exp[1]['config']
    best_acc = best_exp[1]['accuracy']
    model_path = best_config.get('model_path')

    print("="*70)
    print("🏆 阶段二最佳模型")
    print("="*70)
    print(f"实验名称: {best_config['experiment_name']}")
    print(f"准确率: {best_acc:.4f} ({best_acc*100:.2f}%)")
    print(f"配置:")
    print(f"  - Prompt: {best_config['prompt_version']}")
    print(f"  - 学习率: {best_config['learning_rate']:.1e}")
    print(f"  - LoRA r: {best_config['lora_r']}")
    print(f"  - LoRA α: {best_config['lora_alpha']}")
    print(f"模型路径: {model_path}")
    print("="*70)

    if model_path and os.path.exists(model_path):
        print(f"\n✅ 模型已存在，可以直接使用！")
        print(f"📂 模型位置: {model_path}")
        print(f"\n💡 提交时使用这个模型即可，无需重新训练！")
        print(f"⏰ 节省时间: 约 1 小时")
        return model_path
    else:
        print(f"\n⚠️ 模型文件不存在，可能被删除了")
        print(f"💡 建议: 使用 train_final_model() 重新训练")
        return None
# =====================================================
# 核心训练函数（✨ 修改版：阶段二保留模型）
# =====================================================
def run_single_experiment():
    """运行单个实验并保存结果"""
    experiment_name = Config.CURRENT_EXPERIMENT
    output_dir = f"{Config.BASE_DIR}/{experiment_name}"
    os.makedirs(output_dir, exist_ok=True)

    print("="*70)
    print(f"🚀 开始实验: {experiment_name}")
    print("="*70)
    print(f"Prompt版本: {Config.PROMPT_VERSION}")
    print(f"学习率: {Config.LEARNING_RATE:.1e}")
    print(f"LoRA配置: r={Config.LORA_R}, alpha={Config.LORA_ALPHA}")
    print(f"数据量: {Config.SAMPLE_SIZE:,}")
    print(f"训练步数: {Config.MAX_STEPS}")
    print(f"输出目录: {output_dir}")
    print("="*70 + "\n")

    # 1. 加载模型
    print("📦 加载模型...")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=Config.MODEL_NAME,
        max_seq_length=Config.MAX_SEQ_LENGTH,
        dtype=None,
        load_in_4bit=Config.LOAD_IN_4BIT,
    )

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.pad_token_id = tokenizer.eos_token_id

    # 2. 加载数据
    print("📊 加载数据...")
    full_dataset = load_dataset("ad6398/nyu-dl-teach-maths-comp", split="train")
    full_dataset = full_dataset.shuffle(seed=42).select(range(Config.SAMPLE_SIZE))
    dataset_dict = full_dataset.train_test_split(test_size=Config.VAL_SPLIT, seed=42)
    train_dataset = dataset_dict['train']
    validation_dataset = dataset_dict['test']

    print(f"  训练集: {len(train_dataset):,} | 验证集: {len(validation_dataset):,}")

    # 3. 格式化数据
    print(f"🔧 格式化数据...")

    def format_dataset(examples):
        train_template = PromptLibrary.get(Config.PROMPT_VERSION, "train")
        texts = []
        for q, s, o in zip(examples["question"], examples["solution"], examples["is_correct"]):
            solution_str = str(s)[:2000]
            answer = PromptLibrary.format_answer(o, Config.PROMPT_VERSION)
            text = train_template.format(q, solution_str, answer) + "<|eot_id|>"
            texts.append(text)
        return {"text": texts}

    formatted_train = train_dataset.map(format_dataset, batched=True, remove_columns=train_dataset.column_names)
    formatted_val = validation_dataset.map(format_dataset, batched=True, remove_columns=validation_dataset.column_names)

    # 4. 配置LoRA
    print("⚙️  配置LoRA...")
    model = FastLanguageModel.get_peft_model(
        model,
        r=Config.LORA_R,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_alpha=Config.LORA_ALPHA,
        lora_dropout=Config.LORA_DROPOUT,
        bias="none",
        use_gradient_checkpointing="unsloth",
        random_state=42,
    )

    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    print(f"  可训练参数: {trainable:,} ({trainable/total*100:.2f}%)")

    # 5. 检查断点
    resume_from_checkpoint = None
    if os.path.exists(output_dir):
        checkpoints = [d for d in os.listdir(output_dir) if d.startswith("checkpoint-")]
        if checkpoints:
            latest_checkpoint = max(checkpoints, key=lambda x: int(x.split("-")[1]))
            resume_from_checkpoint = os.path.join(output_dir, latest_checkpoint)
            print(f"🔄 发现断点: {resume_from_checkpoint}，将从此恢复训练")

    # 6. 训练
    print("🎯 开始训练...\n")

    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=formatted_train,
        eval_dataset=formatted_val,
        dataset_text_field="text",
        max_seq_length=Config.MAX_SEQ_LENGTH,
        args=TrainingArguments(
            output_dir=output_dir,
            per_device_train_batch_size=Config.BATCH_SIZE,
            gradient_accumulation_steps=Config.GRADIENT_ACCUMULATION,
            learning_rate=Config.LEARNING_RATE,
            num_train_epochs=Config.NUM_EPOCHS,
            max_steps=Config.MAX_STEPS,
            warmup_steps=Config.WARMUP_STEPS,
            fp16=not torch.cuda.is_bf16_supported(),
            bf16=torch.cuda.is_bf16_supported(),
            logging_steps=Config.LOGGING_STEPS,
            save_steps=Config.SAVE_STEPS,
            eval_steps=Config.EVAL_STEPS,
            save_strategy="steps",
            eval_strategy="steps",
            optim="adamw_8bit",
            weight_decay=Config.WEIGHT_DECAY,
            save_total_limit=2,
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            seed=42,
            report_to="none",
        ),
        packing=False,
    )

    torch.cuda.empty_cache()
    gc.collect()

    train_result = trainer.train(resume_from_checkpoint=resume_from_checkpoint)
    final_loss = train_result.training_loss

    print(f"\n✅ 训练完成! 最终损失: {final_loss:.4f}")

    # 7. 评估
    print("📊 评估验证集...")
    FastLanguageModel.for_inference(model)

    infer_template = PromptLibrary.get(Config.PROMPT_VERSION, "infer")
    correct = 0

    for example in tqdm(validation_dataset, desc="评估", leave=False):
        q = example["question"]
        s = str(example["solution"])[:2000]
        label = example["is_correct"]

        prompt = infer_template.format(q, s)
        inputs = tokenizer([prompt], return_tensors="pt", truncation=True, max_length=Config.MAX_SEQ_LENGTH).to("cuda")

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=Config.MAX_NEW_TOKENS,
                temperature=Config.TEMPERATURE,
                do_sample=Config.DO_SAMPLE,
                pad_token_id=tokenizer.pad_token_id
            )

        response = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]
        pred = parse_output(response, Config.PROMPT_VERSION)

        if pred == label:
            correct += 1

    accuracy = correct / len(validation_dataset)

    print(f"✅ 验证集准确率: {accuracy:.4f} ({accuracy*100:.2f}%)")

    # ✨✨✨ 8. 保存模型（修改点 1：阶段二保留模型）✨✨✨
    model_path = None
    if "stage2" in experiment_name:
        # 阶段二：保留最终模型
        model_path = f"{output_dir}/final_model"
        model.save_pretrained(model_path)
        tokenizer.save_pretrained(model_path)
        print(f"💾 阶段二模型已保存: {model_path}")
    else:
        # 阶段一：不保存模型（节省空间）
        print(f"💾 阶段一模型不保存（节省空间）")

    # 9. 保存实验结果
    experiment_info = {
        "experiment_name": experiment_name,
        "prompt_version": Config.PROMPT_VERSION,
        "learning_rate": Config.LEARNING_RATE,
        "lora_r": Config.LORA_R,
        "lora_alpha": Config.LORA_ALPHA,
        "sample_size": Config.SAMPLE_SIZE,
        "max_steps": Config.MAX_STEPS,
        "num_epochs": Config.NUM_EPOCHS,
        "final_loss": final_loss,
        "validation_accuracy": accuracy,
        "timestamp": datetime.now().isoformat(),
        "model_path": model_path  # ✨✨✨ 修改点 2：记录模型路径 ✨✨✨
    }

    # 保存到总结果文件
    result_file = f"{Config.BASE_DIR}/experiment_results.json"
    if os.path.exists(result_file):
        with open(result_file, 'r') as f:
            all_results = json.load(f)
    else:
        all_results = {}

    exp_id = f"{Config.PROMPT_VERSION}_lr{Config.LEARNING_RATE:.1e}_r{Config.LORA_R}_{experiment_name}"
    all_results[exp_id] = {
        "accuracy": accuracy,
        "config": experiment_info
    }

    with open(result_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    with open(f"{Config.BASE_DIR}/results_readable.txt", 'a') as f:
        f.write(f"{exp_id}: {accuracy:.4f} ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n")

    print(f"💾 结果已保存到: {result_file}")

    # 10. 清理内存
    print("🧹 清理内存...")
    del model, tokenizer, trainer
    torch.cuda.empty_cache()
    gc.collect()

    # ✨✨✨ 11. 选择性清理文件（修改点 3：阶段二保留模型，只删除 checkpoint）✨✨✨
    if "stage2" in experiment_name:
        # 阶段二：删除 checkpoint，但保留 final_model
        print("🗑️  清理训练 checkpoint（保留最终模型）...")
        if os.path.exists(output_dir):
            checkpoints = [d for d in os.listdir(output_dir) if d.startswith("checkpoint-")]
            for ckpt in checkpoints:
                ckpt_path = os.path.join(output_dir, ckpt)
                try:
                    shutil.rmtree(ckpt_path)
                    print(f"   ✅ 已删除: {ckpt}")
                except Exception as e:
                    print(f"   ⚠️ 删除失败: {ckpt} - {str(e)}")
        print(f"✅ 保留最终模型: {model_path}")
    else:
        # 阶段一：删除整个实验目录
        cleanup_experiment_files(experiment_name)

    print("\n" + "🎉"*35)
    print(f"🎯 实验完成: {experiment_name}")
    print(f"📊 验证集准确率: {accuracy:.4f} ({accuracy*100:.2f}%)")
    if model_path:
        print(f"💾 模型已保存: {model_path}")
    print("🎉"*35)

    return accuracy
# =====================================================
# 批量实验运行器（支持断点续跑）
# =====================================================
def run_experiments(experiment_list, stage_name):
    """运行一系列实验，自动跳过已完成的"""
    print("="*70)
    print(f"🚀 {stage_name}")
    print("="*70)
    print(f"📋 共 {len(experiment_list)} 个实验\n")

    # 检查已完成的实验
    completed = []
    pending = []

    for exp_config in experiment_list:
        if is_experiment_completed(exp_config["name"]):
            completed.append(exp_config["name"])
        else:
            pending.append(exp_config)

    if completed:
        print(f"✅ 已完成 {len(completed)} 个实验，将跳过:")
        for name in completed:
            print(f"   - {name}")
        print()

    if not pending:
        print("🎉 所有实验都已完成！")
        return

    print(f"📝 待运行 {len(pending)} 个实验:")
    for exp_config in pending:
        print(f"   - {exp_config['name']}")
    print("\n" + "="*70 + "\n")

    # 运行未完成的实验
    results = {}
    for i, exp_config in enumerate(pending):
        print(f"\n{'='*70}")
        print(f"🚀 运行实验 {i+1}/{len(pending)}: {exp_config['name']}")
        print(f"{'='*70}\n")

        Config.update_from_dict(exp_config)

        try:
            accuracy = run_single_experiment()
            results[exp_config['name']] = accuracy
            print(f"\n✅ 实验 {exp_config['name']} 完成: {accuracy:.4f}")
            print(f"📊 进度: {i+1}/{len(pending)} 完成\n")
        except Exception as e:
            print(f"\n❌ 实验 {exp_config['name']} 失败: {str(e)}")
            print(f"💡 提示: 重新运行此 Cell 将从下一个实验继续\n")
            results[exp_config['name']] = None
            break

    # 显示本轮结果
    if results:
        print("\n" + "="*70)
        print(f"📊 {stage_name} - 本轮完成的实验")
        print("="*70)
        for name, acc in results.items():
            if acc is not None:
                print(f"{name:<30} {acc:.4f} ({acc*100:.2f}%)")
            else:
                print(f"{name:<30} 失败")
        print("="*70 + "\n")

In [None]:

# =====================================================
# %% Cell 2: 运行阶段一 - Prompt 选择
# =====================================================
def run_stage1():
    """运行阶段一：Prompt 选择实验"""
    run_experiments(STAGE1_EXPERIMENTS, "阶段一：Prompt 选择")

    # 显示所有结果
    print("\n" + "="*70)
    show_experiment_results()

    # 分析并推荐最佳 Prompt
    print("\n")
    analyze_stage1_results()

In [None]:
run_stage1()

🚀 阶段一：Prompt 选择
📋 共 6 个实验

📝 待运行 6 个实验:
   - stage1_prompt_v1
   - stage1_prompt_v2
   - stage1_prompt_v3
   - stage1_prompt_v4
   - stage1_prompt_v5
   - stage1_prompt_v6



🚀 运行实验 1/6: stage1_prompt_v1

🚀 开始实验: stage1_prompt_v1
Prompt版本: v1
学习率: 3.0e-04
LoRA配置: r=16, alpha=32
数据量: 3,000
训练步数: 300
输出目录: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v1

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

📊 加载数据...


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/3.65M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

  训练集: 2,700 | 验证集: 300
🔧 格式化数据...


Map:   0%|          | 0/2700 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


⚙️  配置LoRA...


Unsloth 2025.10.8 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


  可训练参数: 41,943,040 (0.90%)
🔄 发现断点: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v1/checkpoint-100，将从此恢复训练
🎯 开始训练...



Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/2700 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/300 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,700 | Num Epochs = 4 | Total steps = 300
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
150,0.8567,0.995114
200,0.7556,1.008277
250,0.7635,0.999056
300,0.6061,1.044212


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient



✅ 训练完成! 最终损失: 0.5083
📊 评估验证集...




✅ 验证集准确率: 0.7500 (75.00%)
💾 结果已保存到: /content/drive/MyDrive/colab_math_sft/experiment_results.json
🧹 清理内存...
🧹 清理实验文件: stage1_prompt_v1
✅ 已删除: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v1

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
🎯 实验完成: stage1_prompt_v1
📊 验证集准确率: 0.7500 (75.00%)
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

✅ 实验 stage1_prompt_v1 完成: 0.7500
📊 进度: 1/6 完成


🚀 运行实验 2/6: stage1_prompt_v2

🚀 开始实验: stage1_prompt_v2
Prompt版本: v2
学习率: 3.0e-04
LoRA配置: r=16, alpha=32
数据量: 3,000
训练步数: 300
输出目录: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v2

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading 

Map:   0%|          | 0/2700 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

⚙️  配置LoRA...
  可训练参数: 41,943,040 (0.90%)
🎯 开始训练...



Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/2700 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/300 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,700 | Num Epochs = 4 | Total steps = 300
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss,Validation Loss
50,1.0478,0.965285
100,0.8651,0.931297
150,0.8166,0.915524
200,0.7055,0.94715
250,0.7065,0.92977
300,0.5757,0.984704



✅ 训练完成! 最终损失: 0.8543
📊 评估验证集...




✅ 验证集准确率: 0.7400 (74.00%)
💾 结果已保存到: /content/drive/MyDrive/colab_math_sft/experiment_results.json
🧹 清理内存...
🧹 清理实验文件: stage1_prompt_v2
✅ 已删除: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v2

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
🎯 实验完成: stage1_prompt_v2
📊 验证集准确率: 0.7400 (74.00%)
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

✅ 实验 stage1_prompt_v2 完成: 0.7400
📊 进度: 2/6 完成


🚀 运行实验 3/6: stage1_prompt_v3

🚀 开始实验: stage1_prompt_v3
Prompt版本: v3
学习率: 3.0e-04
LoRA配置: r=16, alpha=32
数据量: 3,000
训练步数: 300
输出目录: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v3

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading 

Map:   0%|          | 0/2700 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

⚙️  配置LoRA...
  可训练参数: 41,943,040 (0.90%)
🎯 开始训练...



Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/2700 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/300 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,700 | Num Epochs = 4 | Total steps = 300
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss,Validation Loss
50,1.0605,0.975456
100,0.874,0.940343
150,0.8267,0.92323
200,0.7192,0.952904
250,0.7198,0.938597
300,0.5894,0.989764



✅ 训练完成! 最终损失: 0.8650
📊 评估验证集...




✅ 验证集准确率: 0.6333 (63.33%)
💾 结果已保存到: /content/drive/MyDrive/colab_math_sft/experiment_results.json
🧹 清理内存...
🧹 清理实验文件: stage1_prompt_v3
✅ 已删除: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v3

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
🎯 实验完成: stage1_prompt_v3
📊 验证集准确率: 0.6333 (63.33%)
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

✅ 实验 stage1_prompt_v3 完成: 0.6333
📊 进度: 3/6 完成


🚀 运行实验 4/6: stage1_prompt_v4

🚀 开始实验: stage1_prompt_v4
Prompt版本: v4
学习率: 3.0e-04
LoRA配置: r=16, alpha=32
数据量: 3,000
训练步数: 300
输出目录: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v4

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading 

Map:   0%|          | 0/2700 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

⚙️  配置LoRA...
  可训练参数: 41,943,040 (0.90%)
🎯 开始训练...



Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/2700 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/300 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,700 | Num Epochs = 4 | Total steps = 300
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss,Validation Loss
50,1.0592,0.978083
100,0.8695,0.941795
150,0.816,0.925501
200,0.6959,0.95965
250,0.6976,0.940775
300,0.5539,1.003205



✅ 训练完成! 最终损失: 0.8406
📊 评估验证集...




✅ 验证集准确率: 0.7833 (78.33%)
💾 结果已保存到: /content/drive/MyDrive/colab_math_sft/experiment_results.json
🧹 清理内存...
🧹 清理实验文件: stage1_prompt_v4
✅ 已删除: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v4

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
🎯 实验完成: stage1_prompt_v4
📊 验证集准确率: 0.7833 (78.33%)
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

✅ 实验 stage1_prompt_v4 完成: 0.7833
📊 进度: 4/6 完成


🚀 运行实验 5/6: stage1_prompt_v5

🚀 开始实验: stage1_prompt_v5
Prompt版本: v5
学习率: 3.0e-04
LoRA配置: r=16, alpha=32
数据量: 3,000
训练步数: 300
输出目录: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v5

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading 

Map:   0%|          | 0/2700 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

⚙️  配置LoRA...
  可训练参数: 41,943,040 (0.90%)
🎯 开始训练...



Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/2700 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/300 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,700 | Num Epochs = 4 | Total steps = 300
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss,Validation Loss
50,1.0624,0.981981
100,0.879,0.944742
150,0.8304,0.929988
200,0.7244,0.957995
250,0.7257,0.943877
300,0.5943,0.998513



✅ 训练完成! 最终损失: 0.8745
📊 评估验证集...




✅ 验证集准确率: 0.7767 (77.67%)
💾 结果已保存到: /content/drive/MyDrive/colab_math_sft/experiment_results.json
🧹 清理内存...
🧹 清理实验文件: stage1_prompt_v5
✅ 已删除: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v5

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
🎯 实验完成: stage1_prompt_v5
📊 验证集准确率: 0.7767 (77.67%)
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

✅ 实验 stage1_prompt_v5 完成: 0.7767
📊 进度: 5/6 完成


🚀 运行实验 6/6: stage1_prompt_v6

🚀 开始实验: stage1_prompt_v6
Prompt版本: v6
学习率: 3.0e-04
LoRA配置: r=16, alpha=32
数据量: 3,000
训练步数: 300
输出目录: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v6

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading 

Map:   0%|          | 0/2700 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

⚙️  配置LoRA...
  可训练参数: 41,943,040 (0.90%)
🎯 开始训练...



Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/2700 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/300 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,700 | Num Epochs = 4 | Total steps = 300
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss,Validation Loss
50,1.0807,0.999838
100,0.8945,0.962364
150,0.8429,0.946313
200,0.7319,0.977935
250,0.7327,0.961941
300,0.5981,1.017176



✅ 训练完成! 最终损失: 0.8832
📊 评估验证集...




✅ 验证集准确率: 0.7600 (76.00%)
💾 结果已保存到: /content/drive/MyDrive/colab_math_sft/experiment_results.json
🧹 清理内存...
🧹 清理实验文件: stage1_prompt_v6
✅ 已删除: /content/drive/MyDrive/colab_math_sft/stage1_prompt_v6

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
🎯 实验完成: stage1_prompt_v6
📊 验证集准确率: 0.7600 (76.00%)
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

✅ 实验 stage1_prompt_v6 完成: 0.7600
📊 进度: 6/6 完成


📊 阶段一：Prompt 选择 - 本轮完成的实验
stage1_prompt_v1               0.7500 (75.00%)
stage1_prompt_v2               0.7400 (74.00%)
stage1_prompt_v3               0.6333 (63.33%)
stage1_prompt_v4               0.7833 (78.33%)
stage1_prompt_v5               0.7767 (77.67%)
stage1_prompt_v6               0.7600 (76.00%)


📊 所有实验结果
实验ID                           准确率        Prompt   LR           LoRA r  
--------------------------------------------------------------------------------
v4_lr3.0e-04_r16_stage1_prompt_v4 0.7833    v4       3.0e-04    16      
v5_lr3.0e-04_r16_stage1_prompt_v5 0.7767    v5       3.0e-04    16      
v6_lr3.0e-0

In [None]:
# =====================================================
# 运行阶段二 - 超参数调优
# =====================================================
def run_stage2_you():
    """你运行这个函数（阶段一完成后）"""
    print("="*70)
    print("👤 你：负责阶段二前 3 组实验（核心参数）")
    print("="*70)
    print("📋 实验列表:")
    for i, exp in enumerate(STAGE2_EXPERIMENTS_YOU, 1):
        print(f"   {i}. {exp['name']}")
        print(f"      lr={exp['lr']:.1e}, r={exp['r']}, alpha={exp['alpha']}")
    print("="*70 + "\n")

    run_experiments(STAGE2_EXPERIMENTS_YOU, "阶段二（你的部分）")

    print("\n" + "="*70)
    print("✅ 你的实验全部完成！")
    print("="*70)
    show_experiment_results()

    print("\n💾 请下载你的结果文件：")
    print(f"   {Config.BASE_DIR}/experiment_results.json")
    print("   重命名为: your_results.json")
    print("   等朋友们完成后一起合并！")

def run_stage2_friend_a():
    """朋友 A 运行这个函数"""
    print("="*70)
    print("👤 朋友 A：负责阶段二中间 3 组实验（LoRA 探索）")
    print("="*70)
    print("📋 实验列表:")
    for i, exp in enumerate(STAGE2_EXPERIMENTS_FRIEND_A, 1):
        print(f"   {i}. {exp['name']}")
        print(f"      lr={exp['lr']:.1e}, r={exp['r']}, alpha={exp['alpha']}")
    print("="*70 + "\n")

    run_experiments(STAGE2_EXPERIMENTS_FRIEND_A, "阶段二（朋友 A）")

    print("\n" + "="*70)
    print("✅ 朋友 A 的实验全部完成！")
    print("="*70)
    show_experiment_results()

    print("\n💾 请下载你的结果文件：")
    print(f"   {Config.BASE_DIR}/experiment_results.json")
    print("   重命名为: friend_a_results.json")
    print("   并分享给其他人合并！")

def run_stage2_friend_b():
    """朋友 B 运行这个函数"""
    print("="*70)
    print("👤 朋友 B：负责阶段二后 3 组实验（黄金组合）")
    print("="*70)
    print("📋 实验列表:")
    for i, exp in enumerate(STAGE2_EXPERIMENTS_FRIEND_B, 1):
        print(f"   {i}. {exp['name']}")
        print(f"      lr={exp['lr']:.1e}, r={exp['r']}, alpha={exp['alpha']}")
    print("="*70 + "\n")

    run_experiments(STAGE2_EXPERIMENTS_FRIEND_B, "阶段二（朋友 B）")

    print("\n" + "="*70)
    print("✅ 朋友 B 的实验全部完成！")
    print("="*70)
    show_experiment_results()

    print("\n💾 请下载你的结果文件：")
    print(f"   {Config.BASE_DIR}/experiment_results.json")
    print("   重命名为: friend_b_results.json")
    print("   并分享给其他人合并！")

In [None]:
run_stage2_you()

👤 你：负责阶段二前 3 组实验（核心参数）
📋 实验列表:
   1. stage2_lr7e-4_r32_50k_2000
      lr=7.0e-04, r=32, alpha=64

🚀 阶段二（你的部分）
📋 共 1 个实验

📝 待运行 1 个实验:
   - stage2_lr7e-4_r32_50k_2000



🚀 运行实验 1/1: stage2_lr7e-4_r32_50k_2000

🚀 开始实验: stage2_lr7e-4_r32_50k_2000
Prompt版本: v4
学习率: 7.0e-04
LoRA配置: r=32, alpha=64
数据量: 50,000
训练步数: 2000
输出目录: /content/drive/MyDrive/colab_math_sft/stage2_lr7e-4_r32_50k_2000

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

📊 加载数据...


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/3.65M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

  训练集: 45,000 | 验证集: 5,000
🔧 格式化数据...


Map:   0%|          | 0/45000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


⚙️  配置LoRA...


Unsloth 2025.10.8 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


  可训练参数: 83,886,080 (1.78%)
🎯 开始训练...



Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/45000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/5000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 45,000 | Num Epochs = 2 | Total steps = 2,000
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 2 x 1) = 32
 "-____-"     Trainable parameters = 83,886,080 of 8,114,147,328 (1.03% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
100,0.9678,0.965477
200,0.9861,0.959033
300,0.9384,0.929255
400,0.9166,0.919966
500,0.9122,0.908653
600,0.8923,0.884957
700,0.8654,0.864881
800,0.8612,0.845015
900,0.8238,0.827427
1000,0.8186,0.810626


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient



✅ 训练完成! 最终损失: 0.7947
📊 评估验证集...




✅ 验证集准确率: 0.8550 (85.50%)
💾 阶段二模型已保存: /content/drive/MyDrive/colab_math_sft/stage2_lr7e-4_r32_50k_2000/final_model
💾 结果已保存到: /content/drive/MyDrive/colab_math_sft/experiment_results.json
🧹 清理内存...
🗑️  清理训练 checkpoint（保留最终模型）...
   ✅ 已删除: checkpoint-1900
   ✅ 已删除: checkpoint-2000
✅ 保留最终模型: /content/drive/MyDrive/colab_math_sft/stage2_lr7e-4_r32_50k_2000/final_model

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
🎯 实验完成: stage2_lr7e-4_r32_50k_2000
📊 验证集准确率: 0.8550 (85.50%)
💾 模型已保存: /content/drive/MyDrive/colab_math_sft/stage2_lr7e-4_r32_50k_2000/final_model
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

✅ 实验 stage2_lr7e-4_r32_50k_2000 完成: 0.8550
📊 进度: 1/1 完成


📊 阶段二（你的部分） - 本轮完成的实验
stage2_lr7e-4_r32_50k_2000     0.8550 (85.50%)


✅ 你的实验全部完成！
📊 所有实验结果
实验ID                           准确率        Prompt   LR           LoRA r  
--------------------------------------------------------------------------------
v4_lr7.0e-04_r32_stage2_lr7e-4_r32_50k_2000 0.8550    v4       7.0e-04    32      
v4_lr7.0e-04_r32_stage2_l

In [None]:
run_stage2_you()

👤 你：负责阶段二前 3 组实验（核心参数）
📋 实验列表:
   1. stage2_baseline
      lr=3.0e-04, r=16, alpha=32
   2. stage2_lr1e-4
      lr=1.0e-04, r=16, alpha=32

🚀 阶段二（你的部分）
📋 共 2 个实验

✅ 已完成 1 个实验，将跳过:
   - stage2_baseline

📝 待运行 1 个实验:
   - stage2_lr1e-4



🚀 运行实验 1/1: stage2_lr1e-4

🚀 开始实验: stage2_lr1e-4
Prompt版本: v4
学习率: 1.0e-04
LoRA配置: r=16, alpha=32
数据量: 10,000
训练步数: 2000
输出目录: /content/drive/MyDrive/colab_math_sft/stage2_lr1e-4

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
📊 加载数据...
  训练集: 9,000 | 验证集: 1,000
🔧 格式化数据...
⚙️  配置LoRA...
  可训练参数: 41,943,040 (0.90%)
🔄 发现断点: /content/drive/

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 9,000 | Num Epochs = 8 | Total steps = 2,000
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 2 x 1) = 32
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss,Validation Loss
400,0.8308,0.879507
500,0.7982,0.866159
600,0.7397,0.872403
700,0.7484,0.859914
800,0.7456,0.843944
900,0.6411,0.867879
1000,0.6327,0.862442
1100,0.6434,0.852012
1200,0.5619,0.884715
1300,0.5543,0.883647



✅ 训练完成! 最终损失: 0.5203
📊 评估验证集...




✅ 验证集准确率: 0.7970 (79.70%)
💾 阶段二模型已保存: /content/drive/MyDrive/colab_math_sft/stage2_lr1e-4/final_model
💾 结果已保存到: /content/drive/MyDrive/colab_math_sft/experiment_results.json
🧹 清理内存...
🗑️  清理训练 checkpoint（保留最终模型）...
   ✅ 已删除: checkpoint-800
   ✅ 已删除: checkpoint-2000
✅ 保留最终模型: /content/drive/MyDrive/colab_math_sft/stage2_lr1e-4/final_model

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
🎯 实验完成: stage2_lr1e-4
📊 验证集准确率: 0.7970 (79.70%)
💾 模型已保存: /content/drive/MyDrive/colab_math_sft/stage2_lr1e-4/final_model
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

✅ 实验 stage2_lr1e-4 完成: 0.7970
📊 进度: 1/1 完成


📊 阶段二（你的部分） - 本轮完成的实验
stage2_lr1e-4                  0.7970 (79.70%)


✅ 你的实验全部完成！
📊 所有实验结果
实验ID                           准确率        Prompt   LR           LoRA r  
--------------------------------------------------------------------------------
v4_lr3.0e-04_r16_stage2_baseline 0.8060    v4       3.0e-04    16      
v4_lr1.0e-04_r16_stage2_lr1e-4 0.7970    v4       1.0e-04    16      

💾 请下载你的结果文件：
   /content/drive/

In [None]:
# =====================================================
# 合并结果
# =====================================================
def merge_results(file_paths):
    """合并三个人的实验结果"""
    merged_results = {}

    print("="*70)
    print("🔄 合并实验结果")
    print("="*70)

    for file_path in file_paths:
        if os.path.exists(file_path):
            with open(file_path, 'r') as f:
                results = json.load(f)
                merged_results.update(results)
            print(f"✅ 已加载: {os.path.basename(file_path)} ({len(results)} 个实验)")
        else:
            print(f"⚠️ 文件不存在: {file_path}")

    merged_file = f"{Config.BASE_DIR}/experiment_results_merged.json"
    with open(merged_file, 'w') as f:
        json.dump(merged_results, f, indent=2)

    print(f"\n💾 合并结果已保存: {merged_file}")
    print(f"📊 总计 {len(merged_results)} 个实验")

    print("\n" + "="*70)
    print("📊 完整排行榜（Top 10）")
    print("="*70)
    print(f"{'排名':<6} {'实验名称':<30} {'准确率':<10} {'配置':<30}")
    print("-"*70)

    sorted_results = sorted(merged_results.items(), key=lambda x: x[1]['accuracy'], reverse=True)

    for i, (exp_id, result) in enumerate(sorted_results[:10], 1):
        config = result['config']
        config_str = f"lr={config['learning_rate']:.1e} r={config['lora_r']}"
        print(f"{i:<6} {config['experiment_name']:<30} {result['accuracy']:.4f}    {config_str:<30}")

    print("="*70)

    best_exp = sorted_results[0]
    best_config = best_exp[1]['config']

    print(f"\n🏆 最佳配置:")
    print(f"   实验名称: {best_config['experiment_name']}")
    print(f"   准确率: {best_exp[1]['accuracy']:.4f} ({best_exp[1]['accuracy']*100:.2f}%)")
    print(f"   Prompt: {best_config['prompt_version']}")
    print(f"   学习率: {best_config['learning_rate']:.1e}")
    print(f"   LoRA r: {best_config['lora_r']}")
    print(f"   LoRA α: {best_config['lora_alpha']}")

    # ✨✨✨ 修改点 4：显示模型路径 ✨✨✨
    model_path = best_config.get('model_path')
    if model_path and os.path.exists(model_path):
        print(f"   模型路径: {model_path}")
        print(f"\n✅ 模型已存在，可以直接使用！")
        print(f"💡 无需重新训练，节省 1 小时！")
    else:
        print(f"\n⚠️ 模型未找到，可能需要重新训练")

    print("="*70)

    return best_config

# =====================================================
# 快速查看结果
# =====================================================
def quick_view():
    """快速查看所有实验结果"""
    show_experiment_results()

    print("\n" + "="*70)
    print("💡 查看详细分析:")
    print("   - 阶段一分析: analyze_stage1_results()")
    print("   - 使用最佳模型: use_best_stage2_model()")
    print("="*70)

In [None]:
run_stage2_friend_b()

👤 朋友 B：负责阶段二后 3 组实验（边界探索）
📋 实验列表:
   1. stage2_lr5e-4_r32
      lr=5.0e-04, r=32, alpha=64
   2. stage2_lr1e-4_r8
      lr=1.0e-04, r=8, alpha=16
   3. stage2_lr2e-4_r16
      lr=2.0e-04, r=16, alpha=32

🚀 阶段二（朋友 B）
📋 共 3 个实验

📝 待运行 3 个实验:
   - stage2_lr5e-4_r32
   - stage2_lr1e-4_r8
   - stage2_lr2e-4_r16



🚀 运行实验 1/3: stage2_lr5e-4_r32

🚀 开始实验: stage2_lr5e-4_r32
Prompt版本: v4
学习率: 5.0e-04
LoRA配置: r=32, alpha=64
数据量: 50,000
训练步数: 2000
输出目录: /content/drive/MyDrive/colab_math_sft/stage2_lr5e-4_r32

📦 加载模型...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

📊 加载数据...


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/3.65M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

  训练集: 45,000 | 验证集: 5,000
🔧 格式化数据...


Map:   0%|          | 0/45000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


⚙️  配置LoRA...


Unsloth 2025.10.8 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


  可训练参数: 83,886,080 (1.78%)
🎯 开始训练...



Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/45000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/5000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 45,000 | Num Epochs = 2 | Total steps = 2,000
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 83,886,080 of 8,114,147,328 (1.03% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


KeyboardInterrupt: 

In [None]:



# =====================================================
# %% Cell 4: 使用最佳配置训练最终模型（可选）
# =====================================================
def train_final_model(best_config):
    """
    使用最佳配置训练最终模型并保留（用于提交）

    参数:
        best_config: 从阶段二得到的最佳配置字典
    """
    print("="*70)
    print("🏆 训练最终提交模型")
    print("="*70)

    # 设置配置
    Config.CURRENT_EXPERIMENT = "final_model"
    Config.PROMPT_VERSION = best_config['prompt_version']
    Config.LEARNING_RATE = best_config['learning_rate']
    Config.LORA_R = best_config['lora_r']
    Config.LORA_ALPHA = best_config['lora_alpha']
    Config.SAMPLE_SIZE = 50000  # 使用全量数据
    Config.MAX_STEPS = 2000
    Config.NUM_EPOCHS = 1

    print(f"📝 使用配置:")
    print(f"   Prompt: {Config.PROMPT_VERSION}")
    print(f"   学习率: {Config.LEARNING_RATE:.1e}")
    print(f"   LoRA r: {Config.LORA_R}")
    print(f"   LoRA α: {Config.LORA_ALPHA}")
    print(f"   数据量: {Config.SAMPLE_SIZE:,}")
    print("="*70 + "\n")

    # 运行训练（这次会保留模型）
    accuracy = run_single_experiment()

    # ⚠️ 特殊处理：最终模型不删除
    print("\n💾 最终模型已保存，不会被自动删除")
    print(f"📂 模型路径: {Config.BASE_DIR}/final_model/")

    return accuracy

# =====================================================
# %% Cell 5: 快速查看结果
# =====================================================
def quick_view():
    """快速查看所有实验结果"""
    show_experiment_results()

    print("\n" + "="*70)
    print("💡 查看详细分析:")
    print("   - 阶段一分析: analyze_stage1_results()")
    print("   - 阶段二分析: analyze_stage2_results()")
    print("="*70)

# =====================================================
# 使用说明
# =====================================================
"""
📖 使用流程:

1️⃣ 运行阶段一（Prompt 选择）:
   在 Cell 2 中运行:
   >>> run_stage1()

   - 如果 GPU 断掉，重新运行即可自动跳过已完成的实验
   - 完成后会显示推荐的最佳 Prompt

2️⃣ 更新阶段二配置:
   修改 STAGE2_EXPERIMENTS 中所有 'prompt' 字段为阶段一推荐的版本

3️⃣ 运行阶段二（超参数调优）:
   在 Cell 3 中运行:
   >>> run_stage2()

   - 同样支持断点续跑
   - 完成后会显示最佳配置

4️⃣ 训练最终模型（可选）:
   在 Cell 4 中运行:
   >>> best = analyze_stage2_results()
   >>> train_final_model(best)

5️⃣ 随时查看结果:
   >>> quick_view()
   >>> show_experiment_results()
   >>> analyze_stage1_results()
   >>> analyze_stage2_results()

⚠️ 重要提示:
- 每个小实验完成后会自动删除模型文件，只保留准确率
- 如果要保留某个模型，请在训练完成后立即复制到其他位置
- 最终模型（final_model）不会被自动删除
- 所有结果都保存在 Google Drive 中，不会因断网丢失
"""

# =====================================================
# 运行示例（根据需要取消注释）
# =====================================================

# 示例 1: 运行阶段一
# run_stage1()

# 示例 2: 运行阶段二（记得先更新 STAGE2_EXPERIMENTS 的 prompt）
# run_stage2()

# 示例 3: 查看结果
# quick_view()

# 示例 4: 训练最终模型
# best_config = analyze_stage2_results()
# if best_config:
#     train_final_model(best_config)

print("✅ 所有函数已加载完成！")
print("\n📖 使用指南:")
print("=" * 70)
print("1. 运行阶段一: run_stage1()")
print("2. 分析结果:   analyze_stage1_results()")
print("3. 更新配置:   修改 STAGE2_EXPERIMENTS 中的 prompt 字段")
print("4. 运行阶段二: run_stage2()")
print("5. 查看结果:   quick_view()")
print("=" * 70)
print("\n💡 提示: 所有实验支持断点续跑，GPU 断了重新运行即可！")