In [None]:
# import time
# start_time = time.time()
# total_time = (4 * 60 + 45) * 60

In [1]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
from transformers import set_seed
set_seed(42)

In [None]:
def get_environment() -> str:
    if os.getenv("KAGGLE_KERNEL_RUN_TYPE") != None:
        return "kaggle"
    elif os.getenv("COLAB_RELEASE_TAG") != None:
        return "colab"
    else:
        return "local"


ENV = get_environment()
if ENV == "kaggle":
    from jeffdshen.aimo2.predict import MetaLLM
    from jeffdshen.aimo2.systems import WeightedEnsemble
    from jeffdshen.aimo2.config import (
        get_validation_data,
        load_llm,
        MODELS,
        SYSTEM_PARAMS,
    )
elif ENV == "local":
    from .predict import MetaLLM
    from .systems import WeightedEnsemble
    from .config import get_validation_data, load_llm, MODELS, SYSTEM_PARAMS
elif ENV == "colab":
    raise NotImplementedError("Not yet implemented")
else:
    raise ValueError("Unknown environment")

In [1]:
data = get_validation_data()

In [None]:
llm = load_llm(MODELS["deepseek-r1-distill-qwen-32b-awq"])

In [None]:
SYSTEMS = [
    "r1_v1a",
    "r1_v1b",
    "r1_v1c",
    "r1_v2a",
    "r1_v2b",
    "r1_v2c",
]

In [None]:
with open("request_log.jsonl", "w") as f:
    system = WeightedEnsemble(
        system_params=[SYSTEM_PARAMS[x] for x in SYSTEMS],
        question_log=f,
        correct_answers=data.correct_answers,
    )
    meta_llm = MetaLLM(
        llm=llm,
        system=system,
    )
    meta_llm.predict(data.df.get_column("id"), data.df.get_column("problem"))