In [None]:
# 安装指定 commit 的 unsloth（10 月 23 日的版本）
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@bbdab300de3eb76a435999e92815de452560e51d"

# 安装你之前用过的兼容依赖版本
!pip install --no-deps "xformers<0.0.26" "trl<0.9.0" "peft<0.12.0" "accelerate<0.32.0" "bitsandbytes<0.44.0" "transformers<4.43.0"


Collecting unsloth@ git+https://github.com/unslothai/unsloth.git@bbdab300de3eb76a435999e92815de452560e51d (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git@bbdab300de3eb76a435999e92815de452560e51d)
  Cloning https://github.com/unslothai/unsloth.git (to revision bbdab300de3eb76a435999e92815de452560e51d) to /tmp/pip-install-5s44y2u5/unsloth_310bf5bfc1434b2c9783c36c458d45ce
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-5s44y2u5/unsloth_310bf5bfc1434b2c9783c36c458d45ce
  Running command git rev-parse -q --verify 'sha^bbdab300de3eb76a435999e92815de452560e51d'
  Running command git fetch -q https://github.com/unslothai/unsloth.git bbdab300de3eb76a435999e92815de452560e51d
  Running command git checkout -q bbdab300de3eb76a435999e92815de452560e51d
  Resolved https://github.com/unslothai/unsloth.git to commit bbdab300de3eb76a435999e92815de452560e51d
  Installing build dependencies ... [?25l[?25hdone
  Getti

In [None]:

from google.colab import drive
import os, gc, torch, json, shutil
from datetime import datetime
import pandas as pd
from tqdm import tqdm
from datasets import load_dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments

drive.mount('/content/drive')

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.




🦥 Unsloth Zoo will now patch everything to make training faster!
Mounted at /content/drive


In [None]:

# =====================================================
# 🚀 加载模型
# =====================================================
model_path = "/content/drive/MyDrive/colab_math_sft/stage2_lr7e-4_r32_50k_2000/final_model"  # 🔧 修改这里

print("加载模型中...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_path,
    load_in_4bit=True,
)

FastLanguageModel.for_inference(model)

# =====================================================
# 📊 加载测试集
# =====================================================
test_dataset = load_dataset("ad6398/nyu-dl-teach-maths-comp", split="test")
predictions = []

# =====================================================
# 🎯 推理设置
# =====================================================
inference_prompt = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Math Problem: {}
Proposed Solution: {}
Is this solution correct? Answer True or False:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""

def parse_output(response_text):
    """解析模型输出，提取True/False"""
    if "<|start_header_id|>assistant<|end_header_id|>" in response_text:
        response_text = response_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1]

    response_lower = response_text.lower().strip()
    response_lower = response_lower.replace("<|eot_id|>", "").replace("<|end_of_text|>", "").strip()

    if response_lower.startswith("true"):
        return True
    elif response_lower.startswith("false"):
        return False

    # 如果开头不是明确的True/False，统计出现次数
    true_count = response_lower.count("true")
    false_count = response_lower.count("false")

    return true_count > false_count

# =====================================================
# 🔮 生成预测
# =====================================================
print("开始生成预测...")

for example in tqdm(test_dataset, desc="处理测试样本"):
    question = example["question"]
    solution = example["solution"]

    # 格式化prompt
    prompt = inference_prompt.format(question, str(solution))
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    # 生成预测
    outputs = model.generate(**inputs, max_new_tokens=8, use_cache=True)
    response_text = tokenizer.batch_decode(outputs)[0]

    # 解析预测结果
    prediction = parse_output(response_text)
    predictions.append(prediction)

# =====================================================
# 💾 保存提交文件
# =====================================================
submission = pd.DataFrame({
    'ID': range(len(predictions)),
    'is_correct': predictions
})

submission.to_csv('submission.csv', index=False)

print("\n✅ 提交文件 'submission.csv' 创建成功！")
print(f"📊 预测统计: True={sum(predictions)}, False={len(predictions)-sum(predictions)}")
print("🎯 现在可以下载此文件并提交到Kaggle竞赛。")

# 清理GPU内存
torch.cuda.empty_cache()

加载模型中...
==((====))==  Unsloth 2025.10.8: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

Unsloth 2025.10.8 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/3.65M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

开始生成预测...


处理测试样本: 100%|██████████| 10000/10000 [2:29:13<00:00,  1.12it/s]


✅ 提交文件 'submission.csv' 创建成功！
📊 预测统计: True=3465, False=6535
🎯 现在可以下载此文件并提交到Kaggle竞赛。





In [None]:
import os
print("文件路径:", os.path.abspath('submission.csv'))

文件路径: /content/submission.csv


In [None]:


# # =====================================================
# # 🚀 Kaggle Submission Generator with Auto-Checkpoint
# # =====================================================

# # Configuration
# MODEL_PATH = "/content/drive/MyDrive/colab_math_sft/your_model_path/final_model"  # 🔧 Update this
# BASE_DIR = "/content/drive/MyDrive/colab_math_sft"
# CHECKPOINT_FILE = f"{BASE_DIR}/submission_checkpoint.json"

# # Prompt template
# INFERENCE_PROMPT = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>

# Math Problem: {}
# Proposed Solution: {}
# Is this solution correct? Answer True or False:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

# """

# def parse_output(response_text):
#     """Parse model output to extract True/False"""
#     if "<|start_header_id|>assistant<|end_header_id|>" in response_text:
#         response_text = response_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1]

#     response_lower = response_text.lower().strip()
#     response_lower = response_lower.replace("<|eot_id|>", "").replace("<|end_of_text|>", "").strip()

#     if response_lower.startswith("true"):
#         return True
#     elif response_lower.startswith("false"):
#         return False

#     true_count = response_lower.count("true")
#     false_count = response_lower.count("false")

#     return true_count > false_count

# def save_checkpoint(predictions, current_index):
#     """Save checkpoint progress"""
#     checkpoint = {
#         "predictions": predictions,
#         "current_index": current_index,
#         "total_processed": len(predictions)
#     }
#     with open(CHECKPOINT_FILE, 'w') as f:
#         json.dump(checkpoint, f)
#     print(f"💾 Checkpoint saved: Processed {len(predictions)}/{current_index} samples")

# def load_checkpoint():
#     """Load checkpoint if exists"""
#     if os.path.exists(CHECKPOINT_FILE):
#         with open(CHECKPOINT_FILE, 'r') as f:
#             checkpoint = json.load(f)
#         print(f"🔄 Checkpoint found: Resuming from sample {checkpoint['current_index']}")
#         return checkpoint["predictions"], checkpoint["current_index"]
#     return [], 0

# def generate_kaggle_submission_with_checkpoint():
#     """Main submission generator with checkpointing"""
#     print("🚀 Starting Kaggle submission generation (with checkpoint)...")

#     # 1. Load model
#     print("📦 Loading model...")
#     model, tokenizer = FastLanguageModel.from_pretrained(
#         model_name=MODEL_PATH,
#         max_seq_length=2048,
#         dtype=None,
#         load_in_4bit=True,
#     )
#     FastLanguageModel.for_inference(model)

#     # 2. Load test dataset
#     print("📊 Loading test dataset...")
#     test_dataset = load_dataset("ad6398/nyu-dl-teach-maths-comp", split="test")
#     total_samples = len(test_dataset)
#     print(f"Test samples: {total_samples:,}")

#     # 3. Load checkpoint
#     predictions, start_index = load_checkpoint()

#     # 4. Generate predictions from checkpoint
#     print("🎯 Generating predictions...")

#     # Batch processing with periodic saving
#     batch_size = 32  # Increased for better performance
#     save_interval = 5  # Save every 5 batches

#     for i in tqdm(range(start_index, total_samples, batch_size), desc="Processing test set"):
#         batch_end = min(i + batch_size, total_samples)
#         batch_indices = list(range(i, batch_end))

#         # Prepare prompts for current batch
#         prompts = []
#         current_batch = []
#         for idx in batch_indices:
#             example = test_dataset[idx]
#             prompt = INFERENCE_PROMPT.format(example["question"], str(example["solution"]))
#             prompts.append(prompt)
#             current_batch.append(idx)

#         try:
#             # Batch encoding and generation
#             inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True, max_length=2048).to("cuda")

#             with torch.no_grad():
#                 outputs = model.generate(
#                     **inputs,
#                     max_new_tokens=8,
#                     do_sample=False,
#                     use_cache=True,
#                     pad_token_id=tokenizer.pad_token_id
#                 )

#             # Parse results
#             responses = tokenizer.batch_decode(outputs, skip_special_tokens=False)
#             batch_predictions = [parse_output(response) for response in responses]
#             predictions.extend(batch_predictions)

#             # Periodic checkpoint saving
#             if (i // batch_size) % save_interval == 0:
#                 save_checkpoint(predictions, batch_end)

#         except Exception as e:
#             print(f"❌ Batch {i}-{batch_end} failed: {str(e)}")
#             print("💾 Saving current progress...")
#             save_checkpoint(predictions, i)  # Rollback to previous batch
#             raise e

#     # 5. Final checkpoint save
#     save_checkpoint(predictions, total_samples)

#     # 6. Create submission file
#     print("💾 Creating final submission file...")
#     submission = pd.DataFrame({
#         'ID': range(len(predictions)),
#         'is_correct': predictions
#     })

#     timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
#     output_path = f"{BASE_DIR}/submission_{timestamp}.csv"
#     submission.to_csv(output_path, index=False)

#     # 7. Cleanup checkpoint file
#     if os.path.exists(CHECKPOINT_FILE):
#         os.remove(CHECKPOINT_FILE)
#         print("🧹 Cleaned up checkpoint file")

#     # 8. Display statistics
#     true_count = sum(predictions)
#     false_count = len(predictions) - true_count

#     print("\n" + "="*50)
#     print("📊 Final Prediction Statistics")
#     print("="*50)
#     print(f"Total samples: {len(predictions):,}")
#     print(f"Predicted True: {true_count:,} ({true_count/len(predictions)*100:.2f}%)")
#     print(f"Predicted False: {false_count:,} ({false_count/len(predictions)*100:.2f}%)")
#     print(f"📁 Submission file: {output_path}")
#     print("="*50)

#     # 9. Clean up memory
#     del model, tokenizer
#     torch.cuda.empty_cache()

#     return output_path

# # Run the generator
# if __name__ == "__main__":
#     try:
#         submission_file = generate_kaggle_submission_with_checkpoint()
#         print(f"\n✅ Submission file generated successfully!: {submission_file}")
#     except Exception as e:
#         print(f"\n❌ Generation interrupted: {str(e)}")
#         print("💡 Re-run this code to continue from checkpoint")