In [1]:
!pip install -q --no-index --upgrade --find-links=file:///kaggle/input/download-packages-and-model/packages lmdeploy

In [2]:
from lmdeploy import pipeline, TurbomindEngineConfig
max_seq_length = 4096
pipe = pipeline("/kaggle/input/download-packages-and-model/MathGenie-InterLM-20B-AWQ", backend_config=TurbomindEngineConfig(model_format='awq', cache_max_entry_count=0.6, quant_policy=4, tp=2, session_len=max_seq_length, max_prefill_token_num=max_seq_length), stream_response=False)

2024-04-29 11:21:46.301106: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 11:21:46.301253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 11:21:46.446809: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tok



In [3]:
import nest_asyncio
nest_asyncio.apply()

In [4]:
import traceback
import sys
import signal
from contextlib import contextmanager, redirect_stdout
import resource
import re
import gc
import torch
import sys
import sympy as sp
import math
import numpy as np
from io import StringIO
import os
sys.set_int_max_str_digits(0)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    PRIVATE = True
else:
    PRIVATE = False
        
class TimeoutException(Exception): pass

@contextmanager
def memory_limit(limit, type=resource.RLIMIT_AS):
    soft_limit, hard_limit = resource.getrlimit(type)
    resource.setrlimit(type, (limit, hard_limit)) # set soft limit
    try:
        yield
    finally:
        resource.setrlimit(type, (soft_limit, hard_limit)) # restore


# sometimes it may run code for very long time, we don't want that.
@contextmanager
def time_limit(seconds):
    def signal_handler(signum, frame):
        raise TimeoutException(f"Execution time exceeded {seconds} seconds")
    signal.signal(signal.SIGALRM, signal_handler)
    signal.alarm(seconds)
    try:
        yield
    finally:
        signal.alarm(0)

# run code in exec. this thing might not be stable. still needs some debugging cause sometimes throws different Errors.
def run_code(code_string, run_globals):
    code_lines = code_string.strip().split('\n')
    try:
        with time_limit(10):
            with memory_limit(1 << 30):
                exec(code_string, run_globals)
                return eval(code_lines[-1], run_globals)
        return 'No execution results'
    except Exception:
        tb = traceback.format_exc(limit=4)
        err_lines = re.findall(r'(File "<string>", line (\d+), in <module>)', tb)
        for el in err_lines:
            tb = tb.replace(el[0], f"{el[0]}\n    {code_lines[int(el[1])-1]}")
        return tb

# simple regex. just copied from Olga's notebook. Thanks :)
def parse_answer(text):
    result_outputs = re.findall(r'\\boxed\{([\d\s.]+)\}', text)
    if len(result_outputs) > 0:
        return result_outputs[-1]
    return None


if not PRIVATE:
    import pandas as pd
    
    class train_env():
        def __init__(self, randomize=False):
            self.randomlize = randomize
            
            self.df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
            self.df['ground_truth'] = self.df['answer']
            self.df['answer'] = -1
            
            if self.randomlize:
                self.df = self.df.reset_index().sample(frac=1).reset_index(drop=True)
            
            self.predict_called = True
            self.counter = 0
            self.len = len(self.df)
        
        
        def iter_test(self):
             while self.counter<self.len:
                if self.predict_called:
                    self.predict_called = False
                    yield (self.df.loc[[self.counter]][['id','problem']]),(self.df.loc[[self.counter]][['id','answer']])
                else:
                    print("You must call `predict()` successfully before you can continue with `iter_test()`")
                    yield None 
                
        def predict(self, answer):
            self.df[self.counter,'answer'] = answer['answer']
            self.predict_called = True
            self.counter+=1

    env = train_env(randomize=True)
    iter_test = env.iter_test()
else:
    # Set up the evaluation API
    import aimo

    env = aimo.make_env()
    iter_test = env.iter_test()
    
def clean_globals(code_globals):
    for k in list(code_globals.keys()):
        if k in ['sp', 'math', '__builtins__']:
            continue
        del code_globals[k]
    gc.collect()

In [5]:
prompts = [
#     ('code', "<|system|><|text|>Below is a math problem. Please solve it step by step.\nPut your final answer in: final_answer = '\\boxed{{' + answer + '}}'.<|endofblock|><|endofmessage|><|user|><|text|>{problem} The answer should be given as a non-negative modulo 1000.<|endofblock|><|endofmessage|><|assistant|><|code|>"),
#     ('code', "<|system|><|text|>Below is a math problem. Please solve it step by step.\nPut your final answer in: final_answer = '\\boxed{{' + answer + '}}'.<|endofblock|><|endofmessage|><|user|><|text|>{problem} The answer should be given as a non-negative modulo 1000.<|endofblock|><|endofmessage|><|assistant|><|code|>import math\nimport sympy as sp\n\n# Alright let's solve this problem step by step.\n# Step 1:\n"),
#     ('text', "<|system|><|text|>Below is a math problem. Please solve it step by step.\nPut your final answer within \\boxed{{}}.<|endofblock|><|endofmessage|><|user|><|text|>{problem} The answer should be given as a non-negative modulo 1000.<|endofblock|><|endofmessage|><|assistant|><|text|>To tackle this problem, let's start by analyzing the constraints and deducing a strategy step by step.\nStep 1: Understanding the Constraints"),
    ('text', '<|system|><|text|>Below is a math problem. Please solve it step by step.\nPut your final answer within \\boxed{{}}.<|endofblock|><|endofmessage|><|user|><|text|>{problem} The answer should be given as a non-negative modulo 1000.<|endofblock|><|endofmessage|><|assistant|>')
]

In [6]:
from lmdeploy import  GenerationConfig
from collections import Counter
solution_iters = 5
max_code_blocks = 40
max_prompt_len = max_seq_length - 256
verify_suffix = "<|text|>Now when we have our answer let's verify it using python code.\nTo verify the answer we have to:\n"
verbose=(0 if PRIVATE else 2)
total_answers=[]

solution_prompts = [val for val in prompts for _ in range(solution_iters)]
code_globals = {}
for test, sample_submission in iter_test:
# for i , test in df.iterrows():
    # majority voting
    answers = []
    for j, (prompt_type, prompt_template) in enumerate(solution_prompts):
        answer = -1
        # locals for run_code. we will add sp and math all the time
        if code_globals:
            clean_globals(code_globals)
        # template for model.
        problem_text = test['problem'].values[0]
        prompt = prompt_template.format(problem=problem_text)
        
        if verbose > 1:
            print('-------------')
            print(prompt)
        
        # we do a series of loops where each loops produces a new block of code
        is_text_last=(prompt_type=='code')
        for k in range(max_code_blocks):
            try:
                prompt_len = len(pipe.tokenizer.encode(prompt))
                if prompt_len >= max_prompt_len:
                    if verbose >= 1:
                        print(f'[Debug] Max len reached')
                    break
                if verbose >= 1:
                    print(f'[Debug] Prompt length: {prompt_len}')
                
                gen_config = GenerationConfig(max_new_tokens=min(1024, max_seq_length - prompt_len - 32), temperature=0.2, top_k=(1 if is_text_last else 20), stop_words=['<|endofblock|>'], skip_special_tokens=False) # top_k=40, temperature=0.8, top_p=0.5,repetition_penalty=1.1
                response = pipe([prompt], gen_config=gen_config)[0]
                if len(response.text.strip()) == 0:
                    break

                new_block = response.text + "<|endofblock|>"
                code_output = ""
                is_text_last = new_block.startswith('<|text|>')
                # # new block is a code block
                if new_block.startswith('<|code|>') or prompt_type == 'code':
                    # we got a new code to run
                    code_text = new_block.replace('<|code|>', '').replace('<|endofblock|>', '').strip()

                    # run code with globals. it's needed to persist variables across different code runs
                    code_result = str(run_code(code_text, code_globals))
                    if len(pipe.tokenizer.encode(code_result)) > 512:
                        code_result = f"Error: output is too long. Try to reduce number of printed items, consider using slice.\n{code_result[:50]}..."
                    code_output = f"<|execution|>{code_result}<|endofblock|><|{prompt_type}|>"

                # we just simpy replace it cause it already has all the context and append a code running result
                prompt += new_block + code_output

                if verbose > 1:
                    print(new_block)
                    print(code_output)

                block_answer = parse_answer(new_block + code_output)
                if block_answer is not None:
                    answer = int(block_answer)
                    break
            except:
                ignored = True
        answers.append(answer)
    mostvoted = (list(filter(lambda x: (x[0] != -1), Counter(answers).most_common(2)))+[(0,0)])[0][0]
    sample_submission['answer'] = mostvoted
    print(f'Q: {problem_text}\nA: {mostvoted}\n')
    env.predict(sample_submission)

-------------
<|system|><|text|>Below is a math problem. Please solve it step by step.
Put your final answer within \boxed{}.<|endofblock|><|endofmessage|><|user|><|text|>Suppose that we roll four 6-sided fair dice with faces numbered 1 to~6. Let $a/b$ be the probability that the highest roll is a 5, where $a$ and $b$ are relatively prime positive integers. Find $a + b$. The answer should be given as a non-negative modulo 1000.<|endofblock|><|endofmessage|><|assistant|>
[Debug] Prompt length: 103
<|text|> Alright, let's solve this step by step.

First, let's consider the ways in which we can get the highest roll to be a 5:

1. All dice show a 5.
2. Three dice show a number other than 5, and one die shows a 5.
3. Two dice show a number other than 5, and two dice show a 5.
4. One die shows a number other than 5, and three dice show a 5.
5. All dice show a number other than 5, and one die shows a 5.

Now, let's calculate the probability for each of these cases:

1. Probability that all di

In [None]:
run_code(code_text)