In [1]:
# credits: https://www.kaggle.com/code/aatiffraz/prompt-prediction-w-mixtral-mistral7b-gemma-llama

In [2]:
!pip install -U /kaggle/input/bitsandbytes-0-42-0-py3-none-any-whl/bitsandbytes-0.42.0-py3-none-any.whl -qq

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig

MODEL_PATH = "/kaggle/input/mixtral/pytorch/8x7b-instruct-v0.1-hf/1"

quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# To prevent GPU memory overflow in Mixtral8x7b
config = AutoConfig.from_pretrained(MODEL_PATH)
config.gradient_checkpointing = True


tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map = "auto",
    trust_remote_code = True,
    quantization_config=quantization_config,
    config=config
)

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [4]:
import pandas as pd
from tqdm import tqdm
PRIVATE = True

df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/test.csv')
df.head()

Unnamed: 0,id,problem
0,000aaa,What is $1-1$?
1,111bbb,What is $0\times10$?
2,222ccc,Solve $4+x=4$ for $x$.


In [5]:
if len(df) < 5:
    df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    PRIVATE = False
df.head()

Unnamed: 0,id,problem,answer
0,229ee8,"Let $k, l > 0$ be parameters. The parabola $y ...",52
1,246d26,Each of the three-digits numbers $111$ to $999...,250
2,2fc4ad,Let the `sparkle' operation on positive intege...,702
3,430b63,What is the minimum value of $5x^2+5y^2-8xy$ w...,800
4,5277ed,There exists a unique increasing geometric seq...,211


In [6]:
def gen_prompt(problem):
    
    return f"""
Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n
### Instruction:\n{problem}\n\n
### Response: Let's think step by step. The final response should be a single number in the last line of your response.
"""

In [7]:
import gc
device = 'cuda'

In [8]:
def naive_parse(answer):
    out = []
    start = False
    end = False
    for l in reversed(list(answer)):
        if l in '0123456789' and not end:
            start = True
            out.append(l)
        else:
            if start:
                end = True
        
    out = reversed(out)
    return int(''.join(out))

In [9]:
answers = []

for i in tqdm(range(len(df))):
    try:
        id_ = df['id'].loc[i]
        problem = df['problem'].loc[i]
        query_prompt = gen_prompt(problem)
        
        messages = [
            {
                "role": "user",
                "content": query_prompt
            }
        ]

        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

        with torch.no_grad():
            encoded_output = model.generate(inputs, max_new_tokens=1500, do_sample=False, pad_token_id=tokenizer.eos_token_id)

        decoded_output = tokenizer.decode(encoded_output[0], skip_special_tokens=True).replace(query_prompt, '').replace("[INST]", "").replace("[/INST]", "").strip()
    
        print(i)
        print(decoded_output)
        
        answer = decoded_output.split('\n')[-1]
        answer = naive_parse(answer)
        print(answer)
        answer = int(answer) % 1000
        print(answer)
        answers.append(answer)
        torch.cuda.empty_cache()
        gc.collect()
        
    except Exception as e:
        print(e)
        answers.append(0)

  0%|          | 0/10 [00:00<?, ?it/s]2024-04-18 15:04:14.177288: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-18 15:04:14.177431: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-18 15:04:14.473344: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


0
First, we need to find the points of intersection $A$ and $B$ between the parabola and the line $y = 4$. This gives us the following system of equations:

$4 = kx^2 - 2kx + l$ and $y = 4 \Rightarrow 4 = y \Rightarrow 4 = kx^2 - 2kx + l$

Rearranging the second equation, we get $l = 4 - kx^2 + 2kx$. Substituting this into the first equation, we have:

$4 = kx^2 - 2kx + (4 - kx^2 + 2kx)$

Simplifying, we get:

$0 = -kx^2 + 2kx + 4$

Dividing through by $-k$, we have:

$x^2 - \frac{2}{k}x - \frac{4}{k} = 0$

This is a quadratic equation. We can solve it using the quadratic formula:

$x = \frac{-(-2/k) \pm \sqrt{(-2/k)^2 - 4(-1)(-4/k)}}{2(-1)}$

$x = \frac{2/k \pm \sqrt{4/k^2 + 16/k}}{2}$

Let $x_1$ and $x_2$ be the two solutions. Then, the points of intersection are $A(x_1, 4)$ and $B(x_2, 4)$. The sum of the squares of the distances from $A$ and $B$ to the origin is:

$d(A, 0)^2 + d(B, 0)^2 = x_1^2 + y_1^2 + x_2^2 + y_2^2 = x_1^2 + 4 + x_2^2 + 4 = x_1^2 + x_2^2 + 8$

Since the points a

 10%|█         | 1/10 [04:04<36:37, 244.18s/it]

1
To solve this problem, let's first notice that the sum of any two (not necessarily different) yellow numbers should be a blue number. This implies that the sum of any two yellow numbers should be unique, otherwise, we can replace one of them with a new blue number.

Now, let's consider the smallest and the largest possible blue numbers: 112 (the smallest even three-digit number) and 999. We can't have any yellow numbers greater than or equal to 112 because they can't form a unique sum with another yellow number without creating a blue number less than 112.

Let's see the largest possible sum of two yellow numbers that is still less than 112:
99 + 11 = 110

We can fit 9 nines and 11 ones in the given range (111-999) as yellow numbers:
99, 99, 99, 99, 99, 99, 99, 99, 11, 11, 11, ..., 11

The sum of these 12 yellow numbers is:
9 * 90 + 3 * 11 = 810 + 33 = 843

Since 843 is greater than 112, we can't fit more than 12 yellow numbers. However, we can replace one of the nines with a 112 and

 20%|██        | 2/10 [06:26<24:34, 184.30s/it]

2
To solve this problem, we need to find the special numbers that eventually lead to a number less than 6 through the sparkle operation. The sparkle operation involves calculating the sum of the digits of a number and taking its factorial. We need to find the special numbers such that no matter how many times we perform the sparkle operation, the resulting number will always be less than 6.

We can start by finding the factorial of numbers up to 10, as any number greater than 10 will eventually result in a number greater than or equal to 10 after the sparkle operation. The factorials of numbers up to 10 are as follows:

1! = 1
2! = 2
3! = 6
4! = 24
5! = 120
6! = 720
7! = 5040
8! = 40320
9! = 362880
10! = 3628800

We can see that the factorial of any number greater than 3 will result in a number greater than or equal to 6. Therefore, the special numbers can only be composed of the digits 0, 1, 2, and 3.

Furthermore, since the sum of the digits of any number greater than 6 will result i

 30%|███       | 3/10 [08:35<18:32, 158.99s/it]

3
To solve this problem, we can use the method of Lagrange multipliers. First, let's rewrite the equation $|x-2y| + |y-2x| = 40$ as two equations, one for the case when $x-2y \geq 0$ and $y-2x \geq 0$, and another for the case when $x-2y \leq 0$ and $y-2x \leq 0$. This gives us:

$x - 2y + y - 2x = 40$ for the first case, which simplifies to $3(x-y) = 40$, or $x-y = 40/3$.

And $-x + 2y - y + 2x = 40$ for the second case, which simplifies to $x+y = 40/3$.

Now, we want to find the minimum value of $5x^2 + 5y^2 - 8xy$ subject to these constraints. We introduce a Lagrange multiplier $\lambda$ and form the Lagrangian:

$L(x,y,\lambda) = 5x^2 + 5y^2 - 8xy - \lambda(|x-2y| + |y-2x| - 40/3)$

We then take the gradient of $L$ with respect to $x, y,$ and $\lambda$, and set them equal to zero:

$\nabla L = (10x - 8y - \lambda \cdot sign(x-2y), 10y - 8x - \lambda \cdot sign(y-2x), |x-2y| + |y-2x| - 40/3) = 0$

where $sign$ is the sign function.

Solving these equations is a bit involved, but we 

 40%|████      | 4/10 [11:58<17:39, 176.51s/it]

4
First, we need to understand what a geometric sequence is. It is a sequence of numbers where each term after the first is found by multiplying the previous term by a fixed, non-zero number.

Since it's an increasing geometric sequence of 2-digit positive integers, let's start with the smallest such number, which is 10. 

The next number in the sequence would be 10*r (where r is the common ratio). Since this number must also be a 2-digit positive integer, the smallest possible value for r would be 2/1 = 2 (the smallest prime number that ensures the sequence increases).

So, the second number is 10*2 = 20.

The third number is 20*2 = 40, but 40 is not a 2-digit positive integer, so we need to adjust our ratio. The next 2-digit positive integer after 2 is 3, so let's use that as our new ratio.

The third number is then 20*3 = 60, and the fourth number is 60*3 = 180.

The fifth number is 180*3 = 540, but 540 is not a 2-digit positive integer, so we made an error in our assumption that 3 

 50%|█████     | 5/10 [14:01<13:04, 156.96s/it]

5
To find the number of positive integers $m$ that result in 4 distinct solutions for the equation $|\ |x-1|\ -2|\ =\frac{m}{100}$, we first need to analyze the given equation.

The given equation can be simplified as follows:

1. $| |x-1| -2 |\ = \frac{m}{100}$
2. $|x-1| - 2 = \pm \frac{m}{100}$ (since $|y| = y$ if $y \geq 0$ and $|y| = -y$ if $y < 0$)
3. $|x-1| = 2 \pm \frac{m}{100}$

Now, let's consider the two possible cases for $|x-1|$:

Case 1: $x - 1 \geq 0 \Rightarrow x \geq 1$

* In this case, $|x-1| = x-1$
* So, $x - 1 = 2 \pm \frac{m}{100}$
* This leads to $x = 3 \pm \frac{m}{100}$

Case 2: $x - 1 < 0 \Rightarrow x < 1$

* In this case, $|x-1| = -(x-1) = 1-x$
* So, $1-x = 2 \pm \frac{m}{100}$
* This leads to $x = -1 \pm \frac{m}{100}$

For both cases to have solutions, the RHS must be greater than or equal to 1 and less than or equal to 3.

1. If $x = 3 + \frac{m}{100}$, then $3 + \frac{m}{100} \geq 1 \Rightarrow \frac{m}{100} \geq -2 \Rightarrow m \geq -200$
2. If $x = -1 +

 60%|██████    | 6/10 [17:40<11:52, 178.05s/it]

6
To find the probability that the highest roll is 5, we need to consider all possible scenarios where at least one of the dice shows a 5 and none of them show a 6.

First, let's calculate the total number of outcomes when rolling four 6-sided dice. This is $6^4 = 1296$.

Now, let's calculate the number of favorable outcomes:
- One die can show 5: There are 4 dice that can be chosen to show 5, and the remaining 3 dice must show numbers between 1 and 4. The number of ways to do this is $4 \times 4^3 = 256$.
- Two dice can show 5: There are $C(4,2) = 6$ ways to choose 2 dice out of 4, and the remaining 2 dice must show numbers between 1 and 4. The number of ways to do this is $6 \times 4^2 = 96$.
- Three dice can show 5: There is only $C(4,3) = 4$ ways to choose 3 dice out of 4, and the remaining die must show a number between 1 and 4. The number of ways to do this is $4 \times 4^1 = 16$.

So, the total number of favorable outcomes is $256 + 96 + 16 = 368$.

However, we also need to excl

 70%|███████   | 7/10 [20:51<09:07, 182.43s/it]

7
To find the area of the convex polygon, we first need to determine the set of points that satisfy the given equation. The equation equals zero, so we are looking for points where the expression inside the parentheses equals zero. This expression can be broken down into two separate expressions, both of which must equal zero:

1. $(\left | x+y \right | - 10)^2 + ( \left | x-y \right | - 10)^2 = 0$
2. $(\left | x \right | - 8)^2 + ( \left | y \right | - 8)^2 = 0$

Let's analyze each expression separately.

Expression 1:
$(\left | x+y \right | - 10)^2 = 0 \Rightarrow \left | x+y \right | = 10$
This expression represents a pair of lines, $x+y=10$ and $x+y=-10$.

Expression 2:
$(\left | x \right | - 8)^2 = 0 \Rightarrow \left | x \right | = 8$
This expression represents two lines, $x=8$ and $x=-8$.

Now, let's find the intersection points of these lines:

1. $x+y=10$, $x=8 \Rightarrow y=10-8=2$
2. $x+y=10$, $x=-8 \Rightarrow y=10-(-8)=18$
3. $x-y=8$, $x=8 \Rightarrow y=8-8=0$
4. $x-y=-8$,

 80%|████████  | 8/10 [24:22<06:22, 191.45s/it]

8
First, let's find the coordinates of points P and Q. Since A is the origin, the coordinates of P are (1/2, 0) and the coordinates of Q are (0, 1/24).

Next, we need to find the equations of the lines DP and BQ. The slope of DP is (0 - 1/2)/(1 - 0) = -1/2, so the equation of DP is y = -1/2(x - 1). The equation of BQ can be found using the fact that it passes through the point (1, 1) and has a slope of (1 - 1/24)/(0 - 1) = -1/23. So the equation of BQ is y = -1/23(x - 1).

Now we can find the points where DP and BQ intersect the horizontal and vertical lines passing through the midpoints of the sides of the square. The midpoints of the sides have coordinates (1/2, 1/2), (1/2, 0), (0, 1/2), and (1, 1/2). Solving the system of equations formed by DP and BQ with the equations of the midpoints, we find that the intersection points are (3/5, 2/5), (2/5, 3/25), (23/50, 27/50), and (27/50, 23/50).

Now we can find the areas of the four regions. The regions are bounded by the lines DP, BQ, and

 90%|█████████ | 9/10 [27:15<03:05, 185.62s/it]

9
First, let's find a general formula for $f(n)$. We know that $f(2n)=2f(n)+1$. By applying this equation recursively, we can find that $f(2^kn)=2^kf(n)+(2^k-1)$.

Now, we want to find $f(n)$ when $n=2^k$, so that $f(n)$ is as large as possible while still being a power of $2$. This happens when $k=\lfloor \log_2 n \rfloor$, so $f(100)=f(2^k)=2^kf(1)+(2^k-1)=100f(1)+99$.

Next, we need to find $f(1)$. Since $f(f(f(n)))=8n-7$, then $f(f(2))=8(2)-7=1$. Thus, $f(1)=1$ because $f$ maps positive integers to positive integers.

Now, we can find $f(100)=100f(1)+99=100(1)+99=199$. The final answer is $f(100)=199$.
199
199


100%|██████████| 10/10 [28:38<00:00, 171.82s/it]


In [10]:
df['answer'] = answers

In [11]:
df[['id','answer']].to_csv("submission.csv", header=True, index=False)

In [12]:
df[['id','answer']].head()

Unnamed: 0,id,answer
0,229ee8,2
1,246d26,13
2,2fc4ad,36
3,430b63,67
4,5277ed,880


In [13]:
if not PRIVATE:
    df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    df['model_answer'] = answers
    df['match'] = df.answer == df.model_answer
    print(f'{df.match.sum()} matches in {len(df)} examples')

1 matches in 10 examples
