# DeepSeek-R1-70B Solo
Just DeepSeek. No drama.

In [None]:
import os, sys, subprocess, gc, time, re, math
import glob as globmod
from collections import Counter
from typing import Optional, List, Tuple

os.environ["HF_HUB_OFFLINE"] = "1"
os.environ["TRANSFORMERS_OFFLINE"] = "1"
os.environ["VLLM_LOGGING_LEVEL"] = "WARNING"

import torch
print(f"Torch: {torch.__version__}, CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Find wheels
wheel_dirs = ["/kaggle/input/vllm-wheels-py311", "/kaggle/input/deepseek-offline-wheels"]
wheel_dir = next((d for d in wheel_dirs if os.path.exists(d) and globmod.glob(f"{d}/*.whl")), None)
if wheel_dir:
    print(f"Installing from {wheel_dir}")
    subprocess.run([sys.executable, "-m", "pip", "install", "--no-index", f"--find-links={wheel_dir}", "--no-deps", "-q", "vllm"], timeout=120)
    for pkg in ["transformers", "accelerate", "safetensors", "tokenizers", "sentencepiece", "huggingface_hub", "pydantic", "msgspec", "cloudpickle", "einops", "filelock", "regex", "tqdm", "packaging", "typing_extensions", "jinja2", "triton", "xgrammar"]:
        subprocess.run([sys.executable, "-m", "pip", "install", "--no-index", f"--find-links={wheel_dir}", "--no-deps", "-q", pkg], timeout=60, capture_output=True)

from vllm import LLM, SamplingParams
print("vLLM OK")

In [None]:
import polars as pl

START = time.time()
CUTOFF = START + (4*60+50)*60
MODEL_PATH = "/kaggle/input/deepseek-r1/transformers/deepseek-r1-distill-llama-70b/1"
PROBLEM_COUNT = 0

print(f"Loading {MODEL_PATH}...")
LLM_MODEL = LLM(
    model=MODEL_PATH,
    tensor_parallel_size=1,
    gpu_memory_utilization=0.92,
    trust_remote_code=True,
    max_model_len=8192,
    enforce_eager=True,
    dtype="bfloat16",
)
print("Model loaded!")

In [None]:
PROMPTS = {
    'algebraic': "Solve using algebra. Think in <think> tags. Final integer in \\boxed{}.",
    'casework': "Solve by case analysis. Think in <think> tags. Final integer in \\boxed{}.",
    'verification': "Solve and verify by substitution. Think in <think> tags. Final integer in \\boxed{}.",
    'computational': "Solve step by step. Think in <think> tags. Final integer in \\boxed{}.",
    'backwards': "Work backwards from constraints. Think in <think> tags. Final integer in \\boxed{}.",
}
TEMPS = [0.7, 0.85, 1.0]
STOPS = ["<\uff5cend\u2581of\u2581sentence\uff5c>", "<|endoftext|>", "</s>"]

def extract_boxed(text: str) -> Optional[int]:
    for p in [r'\\boxed\{(\d+)\}', r'boxed\{(\d+)\}']:
        m = re.findall(p, text)
        if m:
            v = int(m[-1])
            if 0 <= v <= 99999: return v
    m = re.findall(r'answer\s*(?:is|=|:)\s*(\d+)', text[-500:], re.I)
    if m:
        v = int(m[-1])
        if 0 <= v <= 99999: return v
    return None

def generate(question: str, system: str, temp: float) -> Optional[int]:
    if time.time() >= CUTOFF: return None
    prompt = f"<\uff5cbegin\u2581of\u2581sentence\uff5c><\uff5cUser\uff5c>{system}\n\n{question}<\uff5cAssistant\uff5c><think>\n"
    params = SamplingParams(temperature=temp, top_p=0.95, max_tokens=6144, stop=STOPS)
    try:
        out = LLM_MODEL.generate([prompt], sampling_params=params)
        txt = out[0].outputs[0].text
        return extract_boxed(txt.split("</think>")[-1] if "</think>" in txt else txt)
    except: return None

def solve(question: str) -> int:
    global PROBLEM_COUNT
    PROBLEM_COUNT += 1
    
    time_left = CUTOFF - time.time()
    time_per = time_left / max(1, 50 - PROBLEM_COUNT + 1)
    max_samples = 20 if time_per > 300 else 15 if time_per > 180 else 10 if time_per > 60 else 5
    
    answers = []
    tasks = [(p, t) for t in TEMPS for p in PROMPTS.keys()][:max_samples]
    for ptype, temp in tasks:
        if time.time() >= CUTOFF: break
        ans = generate(question, PROMPTS[ptype], temp)
        if ans is not None: answers.append(ans)
        if len(answers) >= 5 and Counter(answers).most_common(1)[0][1] >= 4: break
    
    if not answers:
        nums = [int(x) for x in re.findall(r'\b\d+\b', question) if 0 < int(x) < 100000]
        return nums[0] if nums else 42
    
    # Weighted vote
    weighted = {v: math.log(1.25 + abs(v)) * c for v, c in Counter(answers).items()}
    final = max(weighted, key=weighted.get)
    print(f"  -> {final} (from {Counter(answers).most_common(3)})")
    return final

print("Solver ready")

In [None]:
def predict(id_: pl.Series, problem: pl.Series) -> pl.DataFrame:
    qid, question = id_.item(0), problem.item(0)
    print(f"\n[{PROBLEM_COUNT+1}] {qid} | {(CUTOFF-time.time())/60:.1f}m left")
    print(f"Q: {question[:80]}...")
    answer = solve(question)
    print(f"A: {answer}")
    gc.collect(); torch.cuda.empty_cache()
    return pl.DataFrame({"id": id_, "answer": answer})

print("="*50)
print("DeepSeek-R1-70B Solo")
print("="*50)

try:
    import kaggle_evaluation.aimo_3_inference_server
    server = kaggle_evaluation.aimo_3_inference_server.AIMO3InferenceServer(predict)
    if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
        server.serve()
    else:
        test_file = '/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv'
        if os.path.exists(test_file):
            server.run_local_gateway((test_file,))
        else:
            result = predict(pl.Series(["test"]), pl.Series(["Find 2^10 mod 1000."]))
            print(result)
except ImportError:
    result = predict(pl.Series(["test"]), pl.Series(["Find 2^10 mod 1000."]))
    print(result)

print(f"\nDone in {(time.time()-START)/60:.1f}m")