In [None]:
# AIMO3 Submission - Qwen2.5-72B-Instruct-AWQ
# Quantized model that fits on H100 80GB
# Uses vLLM with AWQ quantization

import subprocess
import sys
import os
import time
import gc
from pathlib import Path

# Free up memory
for pkg in ['tensorflow', 'matplotlib', 'keras', 'sklearn', 'scikit-learn']:
    try:
        subprocess.run([sys.executable, '-m', 'pip', 'uninstall', '-y', pkg], 
                      capture_output=True, timeout=60)
    except: pass
gc.collect()

# GPU info
gpu_info = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], 
                          capture_output=True, text=True)
print(f"GPU: {gpu_info.stdout.strip()}")
print(f"Budget: 280min")

In [None]:
# Install vLLM wheels - skip packages that conflict with Kaggle
WHEEL_DIR = Path('/kaggle/input/aimo3-vllm-wheels')

# Packages to skip - Kaggle provides these and mixing versions breaks things
SKIP_PACKAGES = {
    # CRITICAL - PyTorch ecosystem must stay intact
    'torch', 'xformers', 'triton',  # Cannot mix PyTorch versions
    'transformers', 'tokenizers', 'numpy',
    # RISKY - may cause import errors  
    'nvidia', 'accelerate', 'huggingface', 'safetensors', 'pillow', 'protobuf',
    'pyyaml', 'regex', 'packaging', 'tqdm', 'fsspec', 'certifi', 'charset',
    'idna', 'urllib3', 'jinja2', 'markupsafe', 'sympy', 'mpmath', 'networkx',
    'six', 'python-dateutil', 'attrs', 'referencing', 'jsonschema', 'rpds',
    # FLASHINFER - not in our wheels anyway
    'flashinfer',
}

def should_skip(wheel_name):
    name_lower = wheel_name.lower()
    for skip in SKIP_PACKAGES:
        if skip.lower().replace('-', '_') in name_lower.replace('-', '_'):
            return True
    return False

if WHEEL_DIR.exists():
    wheels = list(WHEEL_DIR.glob('*.whl'))
    print(f"\n{WHEEL_DIR}:")
    print(f"  Found {len(wheels)} wheels")
    for w in wheels[:10]:
        print(f"  {w.name}")
    if len(wheels) > 10:
        print(f"  ... and {len(wheels)-10} more")
    
    print(f"\n--- Installing wheels (skipping Kaggle-provided packages) ---")
    installed, skipped = 0, 0
    for wheel in sorted(wheels):
        if should_skip(wheel.name):
            skipped += 1
            continue
        result = subprocess.run(
            [sys.executable, '-m', 'pip', 'install', '--no-deps', '--quiet', str(wheel)],
            capture_output=True, text=True
        )
        if result.returncode == 0:
            installed += 1
    print(f"Installed {installed} wheels (skipped {skipped} conflicting)")

In [None]:
# Import vLLM and verify
from vllm import LLM, SamplingParams
import torch
print(f"\nAll imports ready")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

In [None]:
# Model path - Qwen2.5-72B-Instruct-AWQ
MODEL_PATH = "/kaggle/input/qwen2.5/transformers/72b-instruct-awq/1"

# Verify model exists
model_path = Path(MODEL_PATH)
if model_path.exists():
    files = list(model_path.glob('*'))
    print(f"Found Qwen2.5-72B-AWQ: {MODEL_PATH}")
    safetensors = [f for f in files if f.suffix == '.safetensors']
    print(f"  Safetensor shards: {len(safetensors)}")
    total_size = sum(f.stat().st_size for f in safetensors) / 1e9
    print(f"  Total size: {total_size:.1f} GB")
else:
    raise FileNotFoundError(f"Model not found at {MODEL_PATH}")

In [None]:
# Load model with AWQ quantization
print("Loading Qwen2.5-72B-Instruct-AWQ...")
print("  Using AWQ INT4 quantization (~36GB VRAM)")

llm = LLM(
    MODEL_PATH,
    quantization="awq",
    dtype="float16",
    trust_remote_code=True,
    gpu_memory_utilization=0.92,
    max_model_len=4096,
    max_num_seqs=16,
    enforce_eager=True,  # More stable
    enable_prefix_caching=True,
    seed=42,
)

tokenizer = llm.get_tokenizer()
print(f"Model loaded successfully!")
print(f"Max context: 4096 tokens")

In [None]:
# Load test problems
import kaggle_evaluation.aimo_2_inference_server

# Get sample problems for local testing
try:
    import pandas as pd
    sample_df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-progress-prize-3/sample_submission.csv')
    test_df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv')
    print(f"Test set: {len(test_df)} problems")
except:
    print("Running in submission mode - problems provided by server")

In [None]:
# Math problem solving configuration
SYSTEM_PROMPT = """You are an expert mathematical problem solver specializing in competition mathematics.

When solving problems:
1. Read the problem carefully and identify what is being asked
2. Break down the problem into manageable steps
3. Show your work clearly with mathematical reasoning
4. Double-check your calculations
5. State your final answer clearly

For numerical answers, provide ONLY the integer value as your final answer.
If the answer should be a remainder or modular result, compute it explicitly."""

def format_prompt(problem: str) -> str:
    """Format problem for Qwen chat template."""
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"Solve this problem step by step:\n\n{problem}\n\nShow your complete solution, then state your final numerical answer."}
    ]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Sampling parameters for math
sampling_params = SamplingParams(
    temperature=0.6,
    top_p=0.9,
    max_tokens=2048,
    stop=["<|endoftext|>", "<|im_end|>"],
)

In [None]:
import re

def extract_answer(response: str) -> int:
    """Extract numerical answer from model response."""
    # Look for common answer patterns
    patterns = [
        r'(?:final answer|answer is|therefore|thus|hence)[:\s]*(?:\$)?\s*(\d+)',
        r'(?:=|equals?)\s*(\d+)\s*$',
        r'\\boxed\{(\d+)\}',
        r'\*\*(\d+)\*\*\s*$',
    ]
    
    response_lower = response.lower()
    
    for pattern in patterns:
        matches = re.findall(pattern, response_lower, re.IGNORECASE | re.MULTILINE)
        if matches:
            try:
                return int(matches[-1])
            except ValueError:
                continue
    
    # Fallback: find last number in response
    numbers = re.findall(r'\b(\d+)\b', response)
    if numbers:
        return int(numbers[-1])
    
    return 0  # Default if no number found

def solve_problem(problem: str, num_samples: int = 3) -> int:
    """Solve a math problem using majority voting."""
    prompt = format_prompt(problem)
    
    # Generate multiple solutions
    outputs = llm.generate([prompt] * num_samples, sampling_params)
    
    answers = []
    for output in outputs:
        response = output.outputs[0].text
        answer = extract_answer(response)
        answers.append(answer)
    
    # Majority vote
    from collections import Counter
    if answers:
        most_common = Counter(answers).most_common(1)[0][0]
        return most_common
    return 0

In [None]:
# Test with a sample problem
test_problem = """Find the sum of all positive integers n such that n^2 + n + 1 divides n^4 + n^2 + 1."""

print("Testing with sample problem...")
print(f"Problem: {test_problem[:100]}...")

prompt = format_prompt(test_problem)
output = llm.generate([prompt], sampling_params)[0]
response = output.outputs[0].text

print(f"\nResponse preview: {response[:500]}...")
print(f"\nExtracted answer: {extract_answer(response)}")

In [None]:
# Main prediction function for Kaggle submission
def predict(id_: str, question: str) -> int:
    """Main prediction function called by Kaggle evaluation server."""
    try:
        answer = solve_problem(question, num_samples=3)
        print(f"Problem {id_}: answer = {answer}")
        return answer
    except Exception as e:
        print(f"Error on problem {id_}: {e}")
        return 0

print("Prediction function ready!")

In [None]:
# Start inference server for Kaggle submission
print("Starting Kaggle inference server...")
kaggle_evaluation.aimo_2_inference_server.serve(predict)