In [None]:
import os
import time
import warnings
import re

import pandas as pd
import polars as pl

import torch
import kaggle_evaluation.aimo_2_inference_server

pd.set_option('display.max_colwidth', None)
cutoff_time = time.time() + (4 * 60 + 55) * 60  # 4 hours 55 minutes from now

In [None]:
import json
from datetime import datetime


def log_model_interaction(messages,
                          response,
                          log_file="model_interactions.jsonl"):
    log_entry = {
        "timestamp": datetime.now().isoformat(),
        "input_messages": messages,
        "model_response": response
    }
    with open(log_file, 'a', encoding='utf-8') as f:
        f.write(json.dumps(log_entry, ensure_ascii=False) + '\n')

In [None]:
def format_time(seconds: float) -> str:
    """Convert seconds to HH:MM:SS.mm format"""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    
    if hours > 0:
        return f"{hours:02d}:{minutes:02d}:{seconds_remainder:05.2f}"
    elif minutes > 0:
        return f"{minutes:02d}:{seconds_remainder:05.2f}"
    return f"{seconds_remainder:.2f} seconds"

In [None]:
from functools import wraps


def problem_timer(func):

    @wraps(func)
    def wrapper(question: str, id_: str, *args, **kwargs):
        start_time = time.time()
        result = func(question, id_, *args, **kwargs)
        elapsed_time = time.time() - start_time
        print(f"Problem {id_} completed in: {format_time(elapsed_time)}")
        return result

    return wrapper

In [None]:
class CodeTimer:
    def __init__(self, description="Code block"):
        self.description = description
        
    def __enter__(self):
        self.start = time.perf_counter()
        return self
               
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end = time.perf_counter()
        self.duration = self.end - self.start
        print(f"\n{self.description} completed in: {format_time(self.duration)}")

In [None]:
with CodeTimer("VLLM Model & CUDA Setup"):
    from vllm import LLM, SamplingParams 

    warnings.simplefilter('ignore')

    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
    os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
with CodeTimer("Model initialization"):
    num_gpus = torch.cuda.device_count()

    llm_model_pth = '/kaggle/input/qwen_qwen2.5_math_72bawq2/transformers/72b/3'

    llm = LLM(
        llm_model_pth,
        dtype=torch.bfloat16,
        max_num_seqs=16,
        max_model_len=4096, #model max 16384
        trust_remote_code=True,
        tensor_parallel_size=4,
        swap_space=max(1, num_gpus // 2),
        gpu_memory_utilization=0.97,
        seed=2024,
        enforce_eager=True,        
    )

In [None]:
tokenizer = llm.get_tokenizer()

In [None]:
def extract_python_code(text):
    pattern = r'```python\s*(.*?)\s*```'
    matches = re.findall(pattern, text, re.DOTALL)
    return "\n\n".join(matches)

In [None]:
import keyword


def process_python_code(query):
    query = "import math\nimport numpy as np\nimport sympy as sp\n" + query
    current_rows = query.strip().split("\n")
    new_rows = []
    for row in current_rows:
        new_rows.append(row)
        if not row.startswith(" ") and "=" in row:
            variables_to_print = row.split("=")[0].strip()
            for variable_to_print in variables_to_print.split(","):
                variable_to_print = variable_to_print.strip()
                if variable_to_print.isidentifier(
                ) and not keyword.iskeyword(variable_to_print):
                    if row.count("(") == row.count(")") and row.count(
                            "[") == row.count("]"):
                        # TODO: use some AST to parse code
                        new_rows.append(
                            f'\ntry:\n    print(f"{variable_to_print}={{str({variable_to_print})[:100]}}")\nexcept:\n    pass\n'
                        )
    return "\n".join(new_rows)

In [None]:
def extract_boxed_text(text):
    pattern = r'oxed{(.*?)}'
    matches = re.findall(pattern, text)
    if not matches:
        return ""
    return matches[0]

In [None]:
from collections import Counter
import random


def select_answer(answers):
    counter = Counter()
    for answer in answers:
        try:
            if int(answer) == float(answer):
                counter[int(answer)] += 1 + random.random() / 1_000
        except:
            pass
    if not counter:
        return 210
    _, answer = sorted([(v, k) for k, v in counter.items()], reverse=True)[0]
    return answer % 1000

In [None]:
import tempfile
import subprocess


class PythonREPL:

    def __init__(self, timeout=5):
        self.timeout = timeout

    def __call__(self, query):
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_file_path = os.path.join(temp_dir, "tmp.py")
            with open(temp_file_path, "w", encoding="utf-8") as f:
                f.write(query)
            try:
                result = subprocess.run(
                    ["python3", temp_file_path],
                    capture_output=True,
                    check=False,
                    text=True,
                    timeout=self.timeout,
                )
            except subprocess.TimeoutExpired:
                return False, f"Execution timed out after {self.timeout} seconds."
            stdout = result.stdout.strip()
            stderr = result.stderr.strip()
            if result.returncode == 0:
                return True, stdout
            else:
                error_lines = stderr.split("\n")
                cleaned_errors = []
                for line in error_lines:
                    if temp_file_path in line:
                        line = line.replace(temp_file_path, "<temporary_file>")
                    cleaned_errors.append(line)
                cleaned_error_msg = "\n".join(cleaned_errors)
                combined_output = f"{stdout}\n{cleaned_error_msg}" if stdout else cleaned_error_msg
                return False, combined_output

In [None]:
sampling_params = SamplingParams(
    temperature=0.7,
    # min_p=0.01,
    top_p=0.8,
    #skip_special_tokens=True,
    max_tokens=3072,
    stop=["```\n"],
    include_stop_str_in_output=True,
)


def batch_message_generate(list_of_messages) -> list[list[dict]]:
    list_of_texts = [
        tokenizer.apply_chat_template(conversation=messages,
                                      tokenize=False,
                                      add_generation_prompt=True)
        for messages in list_of_messages
    ]

    request_output = llm.generate(
        prompts=list_of_texts,
        sampling_params=sampling_params,
        use_tqdm=True,
    )

    for messages, full_prompt, single_request_output in zip(
            list_of_messages, list_of_texts, request_output):
        print("\n=== Input Prompt ===")
        print(full_prompt)
        print("==================\n")

        response_text = single_request_output.outputs[0].text
        print("=== Model Response ===")
        print(response_text)
        print("=====================\n")

        log_model_interaction(messages, response_text)
        messages.append({'role': 'assistant', 'content': response_text})

    return list_of_messages

In [None]:
def batch_message_filter(
        list_of_messages) -> tuple[list[list[dict]], list[str]]:
    extracted_answers = []
    list_of_messages_to_keep = []
    for messages in list_of_messages:
        answer = extract_boxed_text(messages[-1]['content'])
        if answer:
            extracted_answers.append(answer)
        else:
            list_of_messages_to_keep.append(messages)
    return list_of_messages_to_keep, extracted_answers

In [None]:
def batch_message_execute(list_of_messages) -> list[list[dict]]:
    for messages in list_of_messages:
        python_code = extract_python_code(messages[-1]['content'])
        python_code = process_python_code(python_code)
        # print('\n\n' + python_code + '\n\n')
        try:
            print('c', end='')
            is_successful, output = PythonREPL()(python_code)
            if is_successful:
                print('o', end='')
            else:
                print('e', end='')
        except Exception as e:
            print('f', end='')
            output = str(e)
        print(python_code)
        print()
        print(output)
        print("\n\n")
        messages.append({
            'role': 'user',
            'content': "```output\n" + output + "\n```"
        })
    print()
    return list_of_messages

In [None]:
from enum import Enum


class MessageStyle(Enum):
    COT = "chain_of_thought"
    TIR = "tool_integrated_reasoning"
    ALTERNATE = "alternate"


def create_starter_messages(question, index, style=MessageStyle.ALTERNATE):
    # https://github.com/QwenLM/Qwen2.5-Math?tab=readme-ov-file#-hugging-face-transformers
    cot_message = [{
        "role":
        "system",
        "content":
        "Please reason step by step, and put your final answer within \\boxed{}."
    }, {
        "role": "user",
        "content": question
    }]

    tir_message = [{
        "role":
        "system",
        "content":
        "Please integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}."
    }, {
        "role":
        "user",
        "content":
        question + "\n\nBegin your answer by importing math and sympy."
    }]

    if style == MessageStyle.COT:
        return cot_message
    elif style == MessageStyle.TIR:
        return tir_message
    else:  # ALTERNATE
        cycle_size = 2  # Preserved for alternate mode
        return cot_message if index % cycle_size == 1 else tir_message

In [None]:
@problem_timer
def predict_for_question(question: str, id_: str) -> int:
    if not os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
        if id_ != "1acac0" and id_ != "480182":
            return 210
    if time.time() > cutoff_time:
        return 210
    question += "\nImportant: If your calculations result in a number greater than 1e6, " \
                "this likely indicates an error - please review your approach. " \
                "For calculations of $x \\bmod m$, ensure your result $r$ satisfies $0 \\leq r < m$." 
    num_messages = 6
    list_of_messages = [
        # Only CoT messages
        create_starter_messages(question, index, MessageStyle.COT)
        for index in range(num_messages)
    ]
    all_extracted_answers = []
    # 4 rounds of message generation, filtering, and execution
    for _ in range(4):
        list_of_messages = batch_message_generate(list_of_messages)
        list_of_messages, extracted_answers = batch_message_filter(
            list_of_messages)
        all_extracted_answers.extend(extracted_answers)
        if not list_of_messages:
            break
        list_of_messages = batch_message_execute(list_of_messages)
    print(all_extracted_answers)
    answer = select_answer(all_extracted_answers)
    print(answer)
    print("\n\n")
    return answer

In [None]:
# Replace this function with your inference code.
# The function should return a single integer between 0 and 999, inclusive.
# Each prediction (except the very first) must be returned within 30 minutes of the question being provided.
def predict(id_: pl.DataFrame,
            question: pl.DataFrame) -> pl.DataFrame | pd.DataFrame:
    id_ = id_.item(0)
    print("------")
    print(id_)
    question = question.item(0)
    answer = predict_for_question(question, id_)
    print(question)
    print("------\n\n\n")
    return pl.DataFrame({'id': id_, 'answer': answer})

In [None]:
# predict_for_question("Triangle $ABC$ has side length $AB = 120$ and circumradius $R = 100$. Let $D$ be the foot of the perpendicular from $C$ to the line $AB$. What is the greatest possible length of segment $CD$?", "1acac0")

In [None]:
pd.read_csv(
    '/kaggle/input/ai-mathematical-olympiad-progress-prize-2/reference.csv'
).drop('answer', axis=1).to_csv('reference.csv', index=False)

In [None]:
inference_server = kaggle_evaluation.aimo_2_inference_server.AIMO2InferenceServer(
    predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('reference.csv', ))

In [None]:
"""
import gc  # garbage collector

# clean memory (RAM and GPU memory) to avoid memory leaks and out-of-memory errors (e.g., due to PyTorch tensor parallelism)
# and improve performance (e.g., by reducing memory fragmentation) and stability (e.g., by avoiding memory leaks) of the VLLM model
def clean_memory(deep=False):
    gc.collect()  # garbage collector (RAM)
    if deep:
        # memory allocator (RAM) for PyTorch tensors
        ctypes.CDLL("libc.so.6").malloc_trim(0)
    # memory allocator (GPU) for PyTorch tensors
    torch.cuda.empty_cache()


# delete the VLLM model to free up GPU memory
del llm

# clean memory (RAM and GPU memory) to avoid memory leaks and out-of-memory errors
clean_memory(deep=True)
"""

In [None]:
# Modified file handling code
def save_and_display_files():
    """Save files and create reliable download links"""
    import os
    from IPython.display import HTML, FileLink, display
    import shutil

    # Ensure we're in the Kaggle working directory
    working_dir = "/kaggle/working"
    if not os.path.exists(working_dir):
        print("Error: Not running in Kaggle environment")
        return

    # Function to safely handle file copy
    def safe_copy(src, dst):
        try:
            shutil.copy2(src, dst)
            return True
        except Exception as e:
            print(f"Error copying {src}: {e}")
            return False

    # Create a new directory for our outputs
    output_dir = os.path.join(working_dir, "outputs")
    os.makedirs(output_dir, exist_ok=True)

    # List and copy all relevant files
    files_copied = []
    for root, _, files in os.walk(working_dir):
        for filename in files:
            # Skip the outputs directory itself and any temp files
            if "outputs" in root or filename.startswith('.'):
                continue

            src_path = os.path.join(root, filename)
            dst_path = os.path.join(output_dir, filename)

            if safe_copy(src_path, dst_path):
                files_copied.append(filename)

    # Create HTML display for available files
    if files_copied:
        print("\nFiles available for download:")
        for filename in sorted(files_copied):
            try:
                # Create FileLink for each file
                file_path = os.path.join("outputs", filename)
                display(FileLink(file_path,
                                 result_html_prefix=f"{filename}: "))
            except Exception as e:
                print(f"Error creating link for {filename}: {e}")
    else:
        print("No files were found to copy")

    # Create zip file of all outputs
    try:
        zip_path = os.path.join(working_dir, "outputs.zip")
        shutil.make_archive(os.path.join(working_dir, "outputs"), 'zip',
                            output_dir)
        print("\nAll files are also available in a single zip:")
        display(
            FileLink("outputs.zip", result_html_prefix="Download all files: "))
    except Exception as e:
        print(f"Error creating zip file: {e}")


# Run the function
save_and_display_files()