In [1]:
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from tqdm import tqdm
import re
import numpy as np


## Data Loading

In [2]:
from datasets import load_dataset
import pandas as pd

# Login using e.g. `huggingface-cli login` to access this dataset
aime = load_dataset("Maxwell-Jia/AIME_2024")
aime_df = pd.DataFrame(aime['train'])
aime_df.head(5)

Unnamed: 0,ID,Problem,Solution,Answer
0,2024-II-4,"Let $x,y$ and $z$ be positive real numbers tha...","Denote $\log_2(x) = a$, $\log_2(y) = b$, and $...",33
1,2024-II-12,"Let $O(0,0), A(\tfrac{1}{2}, 0),$ and $B(0, \t...",Begin by finding the equation of the line $\ov...,23
2,2024-I-4,Jen enters a lottery by picking $4$ distinct n...,This is a conditional probability problem. Bay...,116
3,2024-I-3,Alice and Bob play the following game. A stack...,Let's first try some experimentation. Alice ob...,809
4,2024-I-8,Eight circles of radius $34$ are sequentially ...,Draw an altitude from both end circles of the ...,197


### Test Problem Statement

Let $x,y$ and $z$ be positive real numbers that satisfy the following system of equations: 
$$[\log_2\left({x \over yz}\right) = 1/ 2 ]$$
$$[\log_2\left({y \over xz}\right) = {1 / 3}]$$
$$[\log_2\left({z \over xy}\right) = {1 \over 4}]$$
Then the value of $\left|\log_2(x^4y^3z^2)\right|$ is $\tfrac{m}{n}$ where $m$ and $n$ are relatively prime positive integers. Find $m+n$.

In [None]:
print(aime_df.iloc[0]['Problem'])

Let $x,y$ and $z$ be positive real numbers that satisfy the following system of equations: 
\[\log_2\left({x \over yz}\right) = {1 \over 2}\]
\[\log_2\left({y \over xz}\right) = {1 \over 3}\]
\[\log_2\left({z \over xy}\right) = {1 \over 4}\]
Then the value of $\left|\log_2(x^4y^3z^2)\right|$ is $\tfrac{m}{n}$ where $m$ and $n$ are relatively prime positive integers. Find $m+n$.


In [58]:
to_markdown(aime_df.iloc[0]['Solution'])

> Denote $\log_2(x) = a$, $\log_2(y) = b$, and $\log_2(z) = c$.
> 
> Then, we have:
> $a-b-c = \frac{1}{2}$,
> $-a+b-c = \frac{1}{3}$,
> $-a-b+c = \frac{1}{4}$.
> 
> Now, we can solve to get $a = \frac{-7}{24}, b = \frac{-9}{24}, c = \frac{-5}{12}$.
> Plugging these values in, we obtain $|4a + 3b + 2c|  = \frac{25}{8} \implies \boxed{033}$.

In [91]:
print(aime_df.iloc[0]['Solution'])

Denote $\log_2(x) = a$, $\log_2(y) = b$, and $\log_2(z) = c$.

Then, we have:
$a-b-c = \frac{1}{2}$,
$-a+b-c = \frac{1}{3}$,
$-a-b+c = \frac{1}{4}$.

Now, we can solve to get $a = \frac{-7}{24}, b = \frac{-9}{24}, c = \frac{-5}{12}$.
Plugging these values in, we obtain $|4a + 3b + 2c|  = \frac{25}{8} \implies \boxed{033}$.


In [None]:
# model_names = ["deepscaler-1.5B", "DeepSeek-R1-Distill-Qwen-1.5B"]
# for m_name in model_names:
#     print(f"Evaluating raw performance for {m_name}")
#     model, tokenizer = load_model_and_tokenizer(m_name)
#     acc = evaluate_model_raw(model, tokenizer, pd.DataFrame(aime['train']), num_samples=1)
#     print(f"{m_name} raw accuracy: {acc:.2f}%\n")

## Experiments

### Unsloth experiments

In [4]:
from utils.benchmark import *

INFO 05-05 03:15:00 [__init__.py:239] Automatically detected platform cuda.


In [8]:
from functools import partial
evaluate_performance_math(
    aime_df,
    foo_model,
    '{}',
    metric='best',
    num_samples=1
)

100%|██████████| 30/30 [00:00<00:00, 11201.74it/s]


0.03333333333333333

In [22]:
row = aime_df.iloc[0]
question = row['Problem']
ground_truth = row['Answer']

#### Vanilla R1-Distill-QWEN-1.5B

20%

In [None]:
from unsloth import is_bfloat16_supported
from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Can increase for longer reasoning traces
lora_rank = 16 # Larger rank = smarter, but slower

# model_id = "meta-llama/meta-Llama-3.1-8B-Instruct"
# model_id = "mistralai/Mistral-7B-v0.1"
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# import gc
# gc.collect()
# torch.cuda.empty_cache()

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_id,
    max_seq_length = max_seq_length,
    load_in_4bit = True, # False for LoRA 16bit
    fast_inference = True, # Enable vLLM fast inference
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.6, # Reduce if out of memory
    # max_logprobs = 1000
)

vanilla_1p5B = FastMathModel(
    model,
    tokenizer,
    sampling_params=SamplingParams(
                    temperature=1.0,
                    top_p=0.9,
                    max_tokens=20000)
)

# Deprecated Naive Experiment: SLOW!!
# system_prompt = r'''Assume you are a professional mathematician.{}
# Please show your step by step reasoning in <thinking> tag, keep your 
# solution concise skipping arithmetic details, and generate your answer as 
# a number in <answer> tag.'''
# prompt = system_prompt.format(question)
# inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# sampling_params = SamplingParams(
#     temperature = 0.5,
#     top_p = 0.99,
#     max_tokens = 2048,
# )

# outputs = model.generate(**inputs,
#                         #  streamer = text_streamer,
#                         # sampling_params=sampling_params, 
#                          max_new_tokens = 2048)

# # outputs = model.fast_generate(inputs, sampling_params=sampling_params)

# generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
#vanilla
vanilla_1p5B_perf = evaluate_performance_math(
                    aime_df,
                    vanilla_1p5B.generate,
                    get_prompt_template(),
                    metric='best',
                    num_samples=1
                )
print(f"Vanilla 1.5B performance: {vanilla_1p5B_perf*100:.4f}%")

In [12]:
print(f"Vanilla 1.5B performance: {vanilla_1p5B_perf*100:.4f}%")

Vanilla 1.5B performance: 20.0000%


#### Less Thought Switch Model

In [19]:
from vllm.logits_process import LogitsProcessor

class LessThoughtSwitchModel(FastMathModel):
    def __init__(self, model, tokenizer, sampling_params=None, max_tokens=20000):
        if sampling_params is None:
            sampling_params = SamplingParams(
                temperature = 0.6,
                repetition_penalty = 1.1,
                top_p = 0.9,
                max_tokens = max_tokens,
                logits_processors = [self._penalize_thought_switch]
            )
        super().__init__(model, tokenizer, sampling_params)
        self.penalized_tokens = self._thought_switch_penalty()
        if sampling_params.logits_processors is None:
            sampling_params.logits_processors = [self._penalize_thought_switch]
        self.sampling_params = sampling_params
        

    def _thought_switch_penalty(self):
        custom_penalties = {
            "Wait": 3.0,  # Strong penalty for "Wait"
            "wait": 3.0,
            "Hmm": 3.0,   # Strong penalty for hesitation markers
            "hmm": 3.0
        }
        # Get token IDs for penalized tokens
        penalized_tokens = {}
        for word, penalty in custom_penalties.items():
            token_ids = self.tokenizer.encode(word, add_special_tokens=False)
            for token_id in token_ids:
                penalized_tokens[token_id] = penalty
        return penalized_tokens

    def _penalize_thought_switch(self, token_ids, logits):
        for token_id, penalty in self.penalized_tokens.items():
            logits[token_id] -= penalty
        return logits

In [None]:
# # import gc
# # gc.collect()
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = model_id,
#     max_seq_length = max_seq_length,
#     load_in_4bit = True, # False for LoRA 16bit
#     fast_inference = True, # Enable vLLM fast inference
#     max_lora_rank = lora_rank,
#     gpu_memory_utilization = 0.6, # Reduce if out of memory
#     # max_logprobs = 1000
# )

lessThoughtSw_1p5B = LessThoughtSwitchModel(
                                model,
                                tokenizer,
                                max_tokens=20000,
                            )
# sampling_params = SamplingParams(
#     temperature = 0.6,
#     repetition_penalty = 1.1,
#     top_p = 0.9,
#     max_tokens = max_tokens,
#     logits_processors = [self._thought_switch_penalty]
# )

# Less Thought Switch Model
lessTSW_1p5B_perf = evaluate_performance_math(
                    aime_df,
                    lessThoughtSw_1p5B.generate,
                    get_prompt_template(),
                    metric='best',
                    num_samples=1
                )

Processed prompts: 100%|██████████| 1/1 [00:28<00:00, 28.10s/it, est. speed input: 8.83 toks/s, output: 77.67 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:51<00:00, 51.25s/it, est. speed input: 5.46 toks/s, output: 74.48 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:25<00:00, 25.42s/it, est. speed input: 8.85 toks/s, output: 76.09 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:40<00:00, 40.43s/it, est. speed input: 4.92 toks/s, output: 75.69 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:51<00:00, 51.59s/it, est. speed input: 3.78 toks/s, output: 75.64 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:11<00:00, 11.57s/it, est. speed input: 15.73 toks/s, output: 77.52 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:34<00:00, 34.72s/it, est. speed input: 5.10 toks/s, output: 75.15 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:11<00:00, 11.32s/it, est. speed input: 15.20 toks/s, output: 77.78 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:11<00:00, 

#### Penalized Original Example

In [25]:
custom_penalties = {
    "Wait": 3.0,  # Strong penalty for "Wait"
    "wait": 3.0,
    "Hmm": 3.0,   # Strong penalty for hesitation markers
    "hmm": 3.0
}
# Get token IDs for penalized tokens
penalized_tokens = {}
for word, penalty in custom_penalties.items():
    token_ids = tokenizer.encode(word, add_special_tokens=False)
    for token_id in token_ids:
        penalized_tokens[token_id] = penalty


def penalize_thought_switch(token_ids, logits):
    for token_id, penalty in penalized_tokens.items():
        logits[token_id] -= penalty
    return logits


In [26]:
system_prompt = r'''Assume you are a professional mathematician.{}
Please show your step by step reasoning in <think> tag. keep your 
solution concise skipping arithmetic details, and generate your answer as 
a number in <answer> tag.
Constraint: Please keep your response in the following format:
<think>
[your reasoning]
</think>
[your derivation, keep this short] 
<answer>
[succinct answer]
</answer>
'''
prompt = system_prompt.format(question)

text = tokenizer.apply_chat_template([
    {"role" : "user", "content" : prompt},
], tokenize = False, add_generation_prompt = True)


from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.5,
    repetition_penalty=1.1,
    # frequency_penalty=0.1,
    top_p = 0.9,
    max_tokens = 20000,
    logits_processors=[penalize_thought_switch]
)
output = model.fast_generate(
    [text],
    sampling_params = sampling_params,
    lora_request = None,
)[0].outputs[0].text

Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processed prompts: 100%|██████████| 2/2 [01:19<00:00, 39.64s/it, est. speed input: 6.11 toks/s, output: 55.16 toks/s]


In [27]:
to_markdown(output)

> Okay, so I have this problem here with three logarithmic equations involving variables x, y, and z. The goal is to find the absolute value of log base 2 of (x⁴y³z²). Hmm, let's see how to approach this.
> 
> First, the equations given are:
> 
> 1. log₂(x / yz) = 1/2
> 2. log₂(y / xz) = 1/3
> 3. log₂(z / xy) = 1/4
> 
> I remember that logarithms can be converted into exponents if needed, which might help simplify things. Let me rewrite each equation using exponent form instead of logs.
> 
> Starting with the first equation:
> log₂(x / yz) = 1/2 means that 2^(1/2) = x / yz. So,
> √2 = x / yz → x = √2 * yz.
> 
> Second equation:
> log₂(y / xz) = 1/3 ⇒ 2^(1/3) = y / xz. Thus,
> 2^(1/3) = y / xz → y = 2^(1/3) * xz.
> 
> Third equation:
> log₂(z / xy) = 1/4 ⇒ 2^(1/4) = z / xy. Hence,
> 2^(1/4) = z / xy → z = 2^(1/4) * xy.
> 
> Now, we have expressions for x, y, and z in terms of each other. It seems like substitution might work here. Maybe express all variables in terms of one variable and solve?
> 
> Let me try expressing everything in terms of x. From the first equation, x = √2 * yz. If I can express y and z in terms of x, then perhaps substitute back.
> 
> But looking at the second equation, y = 2^(1/3)*xz. Similarly, from the third equation, z = 2^(1/4)*xy.
> 
> This seems cyclical because each variable depends on the product of two others. Maybe multiplying them together could lead somewhere? Let's think about it.
> 
> If I multiply all three original logarithmic equations:
> 
> log₂(x / yz) + log₂(y / xz) + log₂(z / xy) = 1/2 + 1/3 + 1/4.
> 
> Recall that log(a) + log(b) + log(c) = log(abc), but wait, actually, each term is log of something, so when adding logs, it becomes the log of the product. However, since these are individual logs set equal to specific values, maybe another approach would be better.
> 
> Alternatively, maybe convert each equation into exponential form and then take the ratio or something to eliminate variables.
> 
> Let me define:
> 
> Equation 1: log₂(x / yz) = 1/2 ⇒ x / yz = 2^{1/2} ⇒ x = yz * sqrt(2).
> 
> Equation 2: log₂(y / xz) = 1/3 ⇒ y / xz = 2^{1/3} ⇒ y = xz * 2^{1/3}.
> 
> Equation 3: log₂(z / xy) = 1/4 ⇒ z / xy = 2^{1/4} ⇒ z = xy * 2^{1/4}.
> 
> So now, each variable is expressed in terms of products. Let me write them again:
> 
> x = yz * sqrt(2),
> y = xz * 2^{1/3},
> z = xy * 2^{1/4}.
> 
> This looks similar to a cyclic dependency. Maybe substituting one into another will help.
> 
> From Equation 1: x = yz * sqrt(2).
> 
> Plug this into Equation 2: y = (yz * sqrt(2)) * z * 2^{1/3}.
> 
> Simplify: y = yz² * sqrt(2) * 2^{1/3}.
> 
> Divide both sides by y (assuming y ≠ 0, which it isn't):
> 
> 1 = z² * sqrt(2) * 2^{1/3}.
> 
> Similarly, from Equation 3: z = (yz * sqrt(2)) * y * 2^{1/4}.
> 
> Which simplifies to: z = y²z * sqrt(2) * 2^{1/4}.
> 
> Divide both sides by z:
> 
> 1 = y² * sqrt(2) * 2^{1/4}.
> 
> Now, we have two equations:
> 
> 1 = z² * sqrt(2) * 2^{1/3}   ...(A)
> 1 = y² * sqrt(2) * 2^{1/4}   ...(B)
> 
> Let me compute the constants:
> 
> sqrt(2) is 2^{1/2}, so 2^{1/2} * 2^{1/3} = 2^{5/6}; similarly,
> 
> sqrt(2) * 2^{1/4} = 2^{1/2} * 2^{1/4} = 2^{3/4}.
> 
> Thus, Equations become:
> 
> 1 = z² * 2^{5/6}  ...(A)
> 1 = y² * 2^{3/4}   ...(B)
> 
> Therefore, solving for z² and y²:
> 
> z² = 2^{-5/6}
> y² = 2^{-3/4}
> 
> Taking square roots:
> 
> z = 2^{-5/12}
> y = 2^{-3/8}
> 
> Since all variables are positive, no issues with signs.
> 
> Now, recall from Equation 1: x = yz * sqrt(2).
> 
> Compute yz:
> 
> yz = (2^{-3/8})(2^{-5/12}) = 2^{-3/8 -5/12}. Let's calculate the exponent:
> 
> Convert to common denominator: 24.
> 
> -3/8 = -9/24; -5/12 = -10/24. Sum: -19/24.
> 
> So yz = 2^{-19/24}.
> 
> Multiply by sqrt(2) = 2^{1/2}:
> 
> x = 2^{-19/24} * 2^{1/2} = 2^{-19/24 + 12/24} = 2^{-7/24}.
> 
> Thus, x = 2^{-7/24}.
> 
> Now, having found x, y, z in terms of powers of 2:
> 
> x = 2^{-7/24},
> 
> y = 2^{-3/8} = 2^{-9/24},
> 
> z = 2^{-5/12} = 2^{-10/24}.
> 
> To compute log₂(x⁴y³z²), let's compute the expression inside:
> 
> x⁴y³z² = (2^{-7/24})⁴ * (2^{-9/24})³ * (2^{-10/24})².
> 
> Exponent rules:
> 
> When multiplying same bases, add exponents.
> 
> Compute exponents:
> 
> For x⁴: (-7/24)*4 = -28/24 = -7/6.
> 
> For y³: (-9/24)*3 = -27/24 = -9/8.
> 
> For z²: (-10/24)*2 = -20/24 = -5/12.
> 
> Add them up:
> 
> -7/6 - 9/8 -5/12.
> 
> Find a common denominator, which is 24.
> 
> Convert each fraction:
> 
> -7/6 = -28/24,
> 
> -9/8 = -27/24,
> 
> -5/12 = -10/24.
> 
> Sum: (-28 -27 -10)/24 = (-65)/24.
> 
> Thus, x⁴y³z² = 2^{-65/24}.
> 
> Therefore, log₂(x⁴y³z²) = log₂(2^{-65/24}) = -65/24.
> 
> The absolute value is | -65/24 | = 65/24.
> 
> Check if 65 and 24 are coprime. Prime factors of 65 are 5 and 13; 24 is 2³×3¹. No common factors, so m=65, n=24. Therefore, m+n=89.
> 
> **Final Answer**
> \boxed{89}
> </think>
> 
> Given the system of equations involving logarithms:
> 
> 1. \(\log_2\left(\frac{x}{yz}\right) = \frac{1}{2}\)
> 2. \(\log_2\left(\frac{y}{xz}\right) = \frac{1}{3}\)
> 3. \(\log_2\left(\frac{z}{xy}\right) = \frac{1}{4}\)
> 
> We convert these logarithmic equations to their exponential forms:
> 
> 1. \(x = yz \cdot \sqrt{2}\)
> 2. \(y = xz \cdot 2^{1/3}\)
> 3. \(z = xy \cdot 2^{1/4}\)
> 
> Next, we express each variable in terms of the others and substitute:
> 
> From the first equation, \(x = yz \cdot \sqrt{2}\).
> 
> Substituting into the second equation:
> \[ y = (yz \cdot \sqrt{2}) \cdot z \cdot 2^{1/3} \]
> \[ 1 = z^2 \cdot \sqrt{2} \cdot 2^{1/3} \]
> 
> From the third equation, \(z = xy \cdot 2^{1/4}\).
> 
> Substituting into the second equation:
> \[ 1 = y^2 \cdot \sqrt{2} \cdot 2^{1/4} \]
> 
> Converting the constants:
> \[ \sqrt{2} = 2^{1/2} \]
> \[ 2^{1/2} \cdot 2^{1/3} = 2^{5/6} \]
> \[ 2^{1/2} \cdot 2^{1/4} = 2^{3/4} \]
> 
> Thus, we get:
> \[ 1 = z^2 \cdot 2^{5/6} \]
> \[ 1 = y^2 \cdot 2^{3/4} \]
> 
> Solving for \(z\) and \(y\):
> \[ z = 2^{-5/12} \]
> \[ y = 2^{-3/8} \]
> 
> Expressing \(x\), \(y\), and \(z\) in terms of powers of 2:
> \[ x = 2^{-7/24} \]
> \[ y = 2^{-9/24} \]
> \[ z = 2^{-10/24} \]
> 
> Calculating \(x^4 y^3 z^2\):
> \[ x^4 y^3 z^2 = (2^{-7/24})^4 \cdot (2^{-9/24})^3 \cdot (2^{-10/24})^2 \]
> \[ = 2^{-28/24} \cdot 2^{-27/24} \cdot 2^{-20/24} \]
> \[ = 2^{-65/24} \]
> 
> Thus, \(\log_2(x^4 y^3 z^2) = -65/24\). Taking the absolute value, we get:
> \[ |\log_2(x^4 y^3 z^2)| = \frac{65}{24} \]
> 
> Since 65 and 24 are coprime, \(m = 65\) and \(n = 24\). Therefore, \(m + n = 89\).
> 
> \[
> \boxed{89}
> \]

In [None]:
# testing changing logprobs
from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.8,
    # repetition_penalty=1.1,
    # frequency_penalty=0.1,
    top_p = 0.9,
    max_tokens = 10,
    logprobs=2,
)
output = model.fast_generate(
    [text],
    sampling_params = sampling_params,
    lora_request = None,
)

In [21]:
system_prompt = r'''Assume you are a professional mathematician.{}
Please show your step by step reasoning in <think> tag. keep your 
solution concise skipping arithmetic details, and generate your answer as 
a number in <answer> tag.
Constraint: Please keep your response in the following format:
<think>
[your reasoning]
</think>
[your derivation, keep this short] 
<answer>
[succinct answer]
</answer>
'''
prompt = system_prompt.format(question)

text = tokenizer.apply_chat_template([
    {"role" : "user", "content" : prompt},
], tokenize = False, add_generation_prompt = True)


from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.5,
    repetition_penalty=1.1,
    # frequency_penalty=0.1,
    top_p = 0.9,
    max_tokens = 40000,

)
output = model.fast_generate(
    [text],
    sampling_params = sampling_params,
    lora_request = None,
)[0].outputs[0].text


NameError: name 'question' is not defined

In [84]:
to_markdown(output)

> Okay, so I have this problem here with three logarithmic equations involving variables x, y, and z. The goal is to find the absolute value of log base 2 of (x⁴y³z²), which will give me a fraction m/n where m and n are coprime positive integers. Then, I need to add m and n together for the final answer.
> 
> First, let's write down the given equations:
> 
> 1. log₂(x / yz) = 1/2
> 2. log₂(y / xz) = 1/3
> 3. log₂(z / xy) = 1/4
> 
> I remember that logarithms can be converted into exponential form to make them easier to work with. So, if log₂(A) = B, then A = 2^B. Let me apply that to each equation.
> 
> Starting with the first equation:
> log₂(x / yz) = 1/2  
> This means that x / yz = 2^(1/2)  
> Which simplifies to x = yz * sqrt(2)
> 
> Second equation:
> log₂(y / xz) = 1/3  
> So, y / xz = 2^(1/3)  
> Thus, y = xz * 2^(1/3)
> 
> Third equation:
> log₂(z / xy) = 1/4  
> Therefore, z / xy = 2^(1/4)  
> Hence, z = xy * 2^(1/4)
> 
> Now, I have expressions connecting x, y, and z. It seems like substitution might help here. Let me see if I can express all variables in terms of one variable or relate them in some way.
> 
> Looking at the first equation, x = yz * sqrt(2). From the second equation, y = xz * 2^(1/3). If I substitute x from the first equation into the second, maybe I can get an expression for y in terms of z.
> 
> Substituting x:
> y = (yz * sqrt(2)) * z * 2^(1/3)
> Simplify:
> y = yz² * sqrt(2) * 2^(1/3)
> 
> Divide both sides by y (assuming y ≠ 0):
> 1 = z² * sqrt(2) * 2^(1/3)
> 
> Let me compute sqrt(2) * 2^(1/3). Since sqrt(2) is 2^(1/2), multiplying it by 2^(1/3) gives 2^(1/2 + 1/3) = 2^(5/6).
> 
> So,
> 1 = z² * 2^(5/6)
> 
> Therefore,
> z² = 2^(-5/6)
> 
> Taking square roots on both sides:
> z = 2^(-5/12)
> 
> Since z is positive, we don't consider the negative root.
> 
> Similarly, now that I have z, perhaps I can find y and x.
> 
> From the first equation, x = yz * sqrt(2). Plugging z = 2^(-5/12):
> 
> x = y * 2^(-5/12) * 2^(1/2)
> Combine exponents:
> x = y * 2^(-5/12 + 6/12) = y * 2^(1/12)
> 
> From the third equation, z = xy * 2^(1/4). Plug in z and x in terms of y:
> 
> 2^(-5/12) = (y * 2^(1/12)) * y * 2^(1/4)
> Simplify RHS:
> = y² * 2^(1/12 + 1/4) = y² * 2^(1/12 + 3/12) = y² * 2^(4/12) = y² * 2^(1/3)
> 
> Thus,
> 2^(-5/12) = y² * 2^(1/3)
> 
> Divide both sides by 2^(1/3):
> y² = 2^(-5/12 - 1/3) = 2^(-5/12 - 4/12) = 2^(-9/12) = 2^(-3/4)
> 
> Take square root:
> y = 2^(-3/8)
> 
> Again, since y is positive.
> 
> Now, having found y, x, z in terms of powers of 2:
> 
> x = y * 2^(1/12) = [2^(-3/8)] * 2^(1/12) = 2^(-3/8 + 1/12)
> Find common denominator for exponents:
> -3/8 = -9/24, 1/12 = 2/24, so total exponent is (-9 + 2)/24 = -7/24
> Thus, x = 2^(-7/24)
> 
> Similarly, z = 2^(-5/12) = 2^(-10/24) = 2^(-5/12)
> 
> And y = 2^(-3/8) = 2^(-9/24)
> 
> Now, I need to compute |log₂(x⁴y³z²)|.
> 
> Compute each term inside the log:
> 
> x⁴ = [2^(-7/24)]⁴ = 2^(-28/24) = 2^(-7/6)
> y³ = [2^(-9/24)]³ = 2^(-27/24) = 2^(-9/8)
> z² = [2^(-10/24)]² = 2^(-20/24) = 2^(-5/6)
> 
> Multiply these together:
> x⁴y³z² = 2^(-7/6) * 2^(-9/8) * 2^(-5/6) = 2^(-7/6 -9/8 -5/6)
> 
> Compute the sum of exponents:
> 
> Convert all to 24 denominators:
> -7/6 = -28/24
> -9/8 = -27/24
> -5/6 = -20/24
> 
> Total exponent: -28/24 -27/24 -20/24 = (-28 -27 -20)/24 = (-75)/24 = -25/8
> 
> Thus, x⁴y³z² = 2^(-25/8)
> 
> Now, take log base 2:
> log₂(x⁴y³z²) = log₂(2^(-25/8)) = -25/8
> 
> The absolute value is |-25/8| = 25/8.
> 
> So, m = 25, n = 8. They are coprime. Therefore, m + n = 25 + 8 = 33.
> 
> **Final Answer**
> \boxed{33}
> </think>
> 
> Given the system of equations involving logarithms:
> 
> 1. \(\log_2\left(\frac{x}{yz}\right) = \frac{1}{2}\)
> 2. \(\log_2\left(\frac{y}{xz}\right) = \frac{1}{3}\)
> 3. \(\log_2\left(\frac{z}{xy}\right) = \frac{1}{4}\)
> 
> We convert these logarithmic equations into exponential form:
> 
> 1. \(x = yz \cdot \sqrt{2}\)
> 2. \(y = xz \cdot 2^{1/3}\)
> 3. \(z = xy \cdot 2^{1/4}\)
> 
> Next, we express each variable in terms of another:
> 
> From the first equation, substituting \(x\) from the second equation:
> \[ y = (yz \cdot \sqrt{2}) \cdot z \cdot 2^{1/3} \]
> \[ y = yz^2 \cdot 2^{5/6} \]
> \[ 1 = z^2 \cdot 2^{5/6} \]
> \[ z = 2^{-5/12} \]
> 
> Using \(z = 2^{-5/12}\) in the first equation:
> \[ x = y \cdot 2^{-5/12} \cdot \sqrt{2} \]
> \[ x = y \cdot 2^{1/12} \]
> 
> From the third equation:
> \[ z = xy \cdot 2^{1/4} \]
> \[ 2^{-5/12} = (y \cdot 2^{1/12}) \cdot y \cdot 2^{1/4} \]
> \[ 2^{-5/12} = y^2 \cdot 2^{1/3} \]
> \[ y = 2^{-3/8} \]
> 
> Expressing \(x\) and \(y\) in terms of \(z\):
> \[ x = 2^{-7/24} \]
> \[ y = 2^{-9/24} \]
> \[ z = 2^{-10/24} \]
> 
> To find \(\left|\log_2(x^4y^3z^2)\right|\):
> 
> Calculate each term:
> \[ x^4 = 2^{-28/24} \]
> \[ y^3 = 2^{-27/24} \]
> \[ z^2 = 2^{-20/24} \]
> 
> Combine these:
> \[ x^4y^3z^2 = 2^{-28/24 - 27/24 - 20/24} = 2^{-75/24} = 2^{-25/8} \]
> 
> Taking the logarithm:
> \[ \log_2(x^4y^3z^2) = -25/8 \]
> 
> Taking the absolute value:
> \[ \left|\log_2(x^4y^3z^2)\right| = 25/8 \]
> 
> Thus, \(m = 25\) and \(n = 8\), giving \(m + n = 33\).
> 
> \[
> \boxed{33}
> \]

In [43]:
[method_name for method_name in dir(model) if callable(getattr(model, method_name))]

['__call__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_apply',
 '_assisted_decoding',
 '_autoset_attn_implementation',
 '_backward_compatibility_gradient_checkpointing',
 '_beam_search',
 '_call_impl',
 '_check_and_enable_flash_attn_2',
 '_check_and_enable_flex_attn',
 '_check_and_enable_sdpa',
 '_constrained_beam_search',
 '_contrastive_search',
 '_convert_head_mask_to_5d',
 '_copy_lm_head_original_to_resized',
 '_create_repo',
 '_dispatch_accelerate_model',
 '_dola_decoding',
 '_expand_inputs_for_generation',
 '_fix_state_dict_key_on_load',
 '_fix_state_dict_key_on_save',
 '_fix_state_dict_keys_on_load',
 '_fix_state_dict_keys_on_save',
 '_from_config',
 '_get_back

> Okay, so I have this problem here where I need to find the absolute value of log base 2 of (x⁴y³z²). The problem gives me three equations involving logarithms with base 2, and it involves x, y, z, which are positive real numbers. Let me try to figure this out step by step.
> 
> First, let me write down the equations:
> 
> 1. log₂(x / (y z)) = 1/2
> 2. log₂(y / (x z)) = 1/3
> 3. log₂(z / (x y)) = 1/4
> 
> Hmm, so each equation is a logarithm of a fraction where one variable is in the numerator and the product of the other two is in the denominator. Maybe I can rewrite these using logarithm properties to separate the variables. 
> 
> I remember that log₂(a/b) = log₂a - log₂b, so maybe I can apply that here.
> 
> Let me rewrite each equation:
> 
> 1. log₂x - log₂(y z) = 1/2
> 2. log₂y - log₂(x z) = 1/3
> 3. log₂z - log₂(x y) = 1/4
> 
> Now, I can further break down the logs in the denominators using log₂(ab) = log₂a + log₂b.
> 
> So, applying that:
> 
> 1. log₂x - (log₂y + log₂z) = 1/2
> 2. log₂y - (log₂x + log₂z) = 1/3
> 3. log₂z - (log₂x + log₂y) = 1/4
> 
> Let me denote log₂x = a, log₂y = b, and log₂z = c. That might make it easier to handle. So now, I can rewrite the equations in terms of a, b, c.
> 
> 1. a - (b + c) = 1/2
> 2. b - (a + c) = 1/3
> 3. c - (a + b) = 1/4
> 
> Okay, so now I have a system of three linear equations:
> 
> 1. a - b - c = 1/2
> 2. -a + b - c = 1/3
> 3. -a - b + c = 1/4
> 
> Hmm, this is a system of three equations with three variables: a, b, c. I can solve this system to find a, b, c, and then use them to find log₂(x⁴y³z²), which is 4a + 3b + 2c.
> 
> Let me write down the equations:
> 
> Equation 1: a - b - c = 1/2
> 
> Equation 2: -a + b - c = 1/3
> 
> Equation 3: -a - b + c = 1/4
> 
> Hmm, solving this system. Let me try adding them up or manipulating them to eliminate variables.
> 
> First, let me add all three equations together:
> 
> Equation 1 + Equation 2 + Equation 3:
> 
> (a - b - c) + (-a + b - c) + (-a - b + c) = 1/2 + 1/3 + 1/4
> 
> Let me compute the left side:
> 
> a - b - c - a + b - c - a - b + c
> 
> Simplify term by term:
> 
> a - a - a = -a
> 
> -b + b - b = -b
> 
> -c - c + c = -c
> 
> So overall, -a - b - c
> 
> Right side:
> 
> 1/2 + 1/3 + 1/4
> 
> Let me compute that. Let me find a common denominator. The least common denominator for 2,3,4 is 12.
> 
> So:
> 
> 1/2 = 6/12
> 
> 1/3 = 4/12
> 
> 1/4 = 3/12
> 
> Adding them: 6/12 + 4/12 + 3/12 = 13/12
> 
> So, we have:
> 
> - (a + b + c) = 13/12
> 
> Which implies:
> 
> a + b + c = -13/12
> 
> Wait, but a, b, c are log₂x, log₂y, log₂z, which are logs of positive real numbers, so they should be positive. But here, a + b + c is negative? That seems contradictory. Maybe I made a mistake in adding.
> 
> Wait, let me check the addition:
> 
> Equation 1: a - b - c = 1/2
> 
> Equation 2: -a + b - c = 1/3
> 
> Equation 3: -a - b + c = 1/4
> 
> Adding them:
> 
> a - b - c - a + b - c - a - b + c
> 
> So for a: 1 -1 -1 = -1
> 
> For b: -1 +1 -1 = -1
> 
> For c: -1 -1 +1 = -1
> 
> So total: -a - b - c = 13/12
> 
> Thus, a + b + c = -13/12
> 
> But that's negative, but a, b, c are positive since they are logs of positive numbers.
> 
> Hmm, that suggests something is wrong. Did I add correctly?
> 
> Wait, maybe I should try solving the system step by step instead of adding all three.
> 
> Let me write the equations again:
> 
> 1. a - b - c = 1/2
> 
> 2. -a + b - c = 1/3
> 
> 3. -a - b + c = 1/4
> 
> Let me label them as Eq1, Eq2, Eq3.
> 
> Maybe I can subtract Eq1 from Eq2.
> 
> Wait, actually, let me try solving step by step.
> 
> First, let's consider Eq1 and Eq2.
> 
> From Eq1: a - b - c = 1/2
> 
> From Eq2: -a + b - c = 1/3
> 
> Let me add Eq1 and Eq2 to eliminate a:
> 
> (a - b - c) + (-a + b - c) = 1/2 + 1/3
> 
> Simplify:
> 
> a - b - c - a + b - c = 1/2 + 1/3
> 
> Which is: (-b + b) + (-c - c) = 1/2 + 1/3
> 
> So, 0 - 2c = 1/2 + 1/3
> 
> Wait, 1/2 + 1/3 is 5/6. So:
> 
> -2c = 5/6
> 
> Thus, c = (-5/6)/2 = -5/12
> 
> Wait, c is log₂z, which is a log of a positive number, so c must be positive. So c = -5/12? That's negative. That can't be right. I must have messed up something.
> 
> Wait, maybe I should try subtracting equations differently. Let's see.
> 
> Wait, perhaps I made a mistake in adding Eq1 and Eq2. Let me try it again.
> 
> From Eq1: a - b - c = 1/2
> 
> From Eq2: -a + b - c = 1/3
> 
> Adding them:
> 
> a - b - c - a + b - c = 1/2 + 1/3
> 
> Wait, that's still:
> 
> a - a = 0
> 
> -b + b = 0
> 
> -c - c = -2c
> 
> Right side: 1/2 + 1/3 = 5/6
> 
> So, -2c = 5/6 => c = -5/12
> 
> Same result. Hmm. Negative c. But c is log₂z, which is positive. So, that's impossible. Therefore, maybe I have a mistake in adding the equations.
> 
> Wait, no. Actually, in the original step, when I added Eq1, Eq2, and Eq3, I think I did that correctly, but when adding the first two equations, maybe I made an error.
> 
> Wait, let's re-examine the addition:
> 
> Eq1: a - b - c = 1/2
> 
> Eq2: -a + b - c = 1/3
> 
> Adding them:
> 
> a - b - c - a + b - c = 1/2 + 1/3
> 
> Simplify:
> 
> 0a + 0b - 2c = 5/6
> 
> So, -2c = 5/6 => c = -5/12. So, same result.
> 
> Hmm, this suggests that perhaps the system is inconsistent? Or perhaps I have an error in setting up the equations.
> 
> Wait, let me go back.
> 
> Original equations:
> 
> 1. log₂(x / (y z)) = 1/2 => log₂x - log₂y - log₂z = 1/2
> 
> 2. log₂(y / (x z)) = 1/3 => log₂y - log₂x - log₂z = 1/3
> 
> 3. log₂(z / (x y)) = 1/4 => log₂z - log₂x - log₂y = 1/4
> 
> So, that's correct. So, the three equations are:
> 
> 1. a - b - c = 1/2
> 
> 2. -a + b - c = 1/3
> 
> 3. -a - b + c = 1/4
> 
> So, yeah, it's correct. So, when I added them, I got c = -5/12, which is negative, but that can't be. So, maybe the system is inconsistent, but the problem states that such x, y, z exist. So, something must be wrong in my approach.
> 
> Wait, another thought. Maybe I should subtract the equations instead of adding them. Let me try subtracting.
> 
> Let me subtract Eq1 and Eq2.
> 
> Wait, no, let's try a different approach.
> 
> Let me write the three equations:
> 
> 1. a - b - c = 1/2
> 
> 2. -a + b - c = 1/3
> 
> 3. -a - b + c = 1/4
> 
> Let me try solving for a, b, c.
> 
> Let me denote:
> 
> From Eq1: a = 1/2 + b + c
> 
> From Eq2: -a + b - c = 1/3
> 
> Substitute a from Eq1 into Eq2:
> 
> -(1/2 + b + c) + b - c = 1/3
> 
> Simplify:
> 
> -1/2 - b - c + b - c = 1/3
> 
> Which simplifies to:
> 
> -1/2 -2c = 1/3
> 
> So, -2c = 1/3 + 1/2 = (2 + 3)/6 = 5/6
> 
> Thus, c = -5/12
> 
> Same as before. Hmm. That's negative.
> 
> Wait, maybe let's try solving another pair.
> 
> Let me subtract Eq2 from Eq1.
> 
> Wait, Eq1: a - b - c = 1/2
> 
> Eq2: -a + b - c = 1/3
> 
> If I add Eq1 and Eq2, as I did before, I get c = -5/12. So, same result.
> 
> Alternatively, if I consider adding Eq1 and Eq3:
> 
> Eq1: a - b - c = 1/2
> 
> Eq3: -a - b + c = 1/4
> 
> Adding them:
> 
> a - b - c - a - b + c = 1/2 + 1/4
> 
> Simplify:
> 
> (-2b) = 3/4
> 
> Thus, b = (-3/4)/2 = -3/8
> 
> Wait, that can't be because b = log₂y, which must be positive. So, b = -3/8. Negative. That's a problem.
> 
> Wait, so perhaps my approach is wrong.
> 
> Wait, maybe I should consider that maybe I made a mistake in the equation setup.
> 
> Let me go back.
> 
> Original equations:
> 
> 1. log₂(x / (y z)) = 1/2
> 
> So, that is log₂x - log₂y - log₂z = 1/2
> 
> Which is a - b - c = 1/2
> 
> Similarly, equation 2: log₂y - log₂x - log₂z = 1/3 => -a + b - c = 1/3
> 
> Equation 3: log₂z - log₂x - log₂y = 1/4 => -a - b + c = 1/4
> 
> So that's correct.
> 
> Wait, but when I add all three equations, I get a + b + c = -13/12, which is negative. But a, b, c are positive. So, seems impossible.
> 
> But in the problem statement, it says that x, y, z are positive real numbers that satisfy the system. So, this suggests that the system is consistent, but when solving, we get negative values.
> 
> Wait, but perhaps the error is in assuming that all variables are positive. Let me check the original logarithms:
> 
> log₂(x / (y z)) = 1/2
> 
> Since x, y, z are positive, the argument of the logarithm is positive, so this is okay.
> 
> But, if x, y, z are positive, but the system when solving seems to require a + b + c negative. Maybe I need to think differently.
> 
> Wait, perhaps instead of solving the system as equations for a, b, c, maybe it's better to express the ratios.
> 
> Wait, another approach: Let me denote u = log₂x, v = log₂y, w = log₂z.
> 
> Then the system becomes:
> 
> 1. u - v - w = 1/2
> 
> 2. -u + v - w = 1/3
> 
> 3. -u - v + w = 1/4
> 
> So, it's the same as before. So, same problem.
> 
> Wait, perhaps I can write the system as:
> 
> 1. u - v - w = 1/2
> 
> 2. -u + v - w = 1/3
> 
> 3. -u - v + w = 1/4
> 
> So, adding the three equations:
> 
> (1 + 2 + 3): (-u - v - w) - (u + v + w) = (1/2 + 1/3 + 1/4)
> 
> Wait, no, better:
> 
> Wait, let me think. If I have:
> 
> Equation1: u - v - w = 1/2
> 
> Equation2: -u + v - w = 1/3
> 
> Equation3: -u - v + w = 1/4
> 
> Let me denote them as Eq1, Eq2, Eq3.
> 
> Now, let me try to solve for u, v, w.
> 
> Let me write the equations:
> 
> From Eq1: u = v + w + 1/2
> 
> From Eq2: -u + v - w = 1/3 => u = -v + w + 1/3
> 
> From Eq3: -u - v + w = 1/4 => u = -v + w - 1/4
> 
> Wait, now we have three expressions for u:
> 
> 1. From Eq1: u = v + w + 1/2
> 
> 2. From Eq2: u = -v + w + 1/3
> 
> 3. From Eq3: u = -v + w - 1/4
> 
> So, since all equal to u, set them equal to each other.
> 
> From 1 and 2:
> 
> v + w + 1/2 = -v + w + 1/3
> 
> Simplify:
> 
> v + w + 1/2 = -v + w + 1/3
> 
> Subtract w from both sides:
> 
> v + 1/2 = -v + 1/3
> 
> Bring -v to left:
> 
> v + v + 1/2 = 1/3
> 
> 2v = 1/3 - 1/2
> 
> Compute 1/3 - 1/2: that's -1/6
> 
> So, 2v = -1/6 => v = -1/12
> 
> But v = log₂y, which is supposed to be positive. But v = -1/12. That's negative, which is impossible.
> 
> Wait, that's the same problem as before. So, same result.
> 
> Hmm, this suggests that the system is inconsistent? But the problem states that such x, y, z exist. So, maybe I made a mistake in the setup.
> 
> Wait, let me double-check the original problem:
> 
> "Assume you are a professional mathematician. Let x, y and z be positive real numbers that satisfy the following system of equations:
> 
> log₂(x / yz) = 1/2
> 
> log₂(y / xz) = 1/3
> 
> log₂(z / xy) = 1/4
> 
> Then the value of |log₂(x⁴y³z²)| is m/n where m and n are relatively prime positive integers. Find m + n."
> 
> So, that's correct.
> 
> Wait, but if solving gives a negative value, perhaps I need to check if there's a mistake in the way I set up the equations.
> 
> Wait, another approach: Let's express all logs in terms of log₂x, log₂y, log₂z.
> 
> Let me denote u = log₂x, v = log₂y, w = log₂z.
> 
> Then,
> 
> log₂(x / y z) = u - v - w = 1/2
> 
> log₂(y / x z) = v - u - w = 1/3
> 
> log₂(z / x y) = w - u - v = 1/4
> 
> So, equations:
> 
> 1. u - v - w = 1/2
> 
> 2. -u + v - w = 1/3
> 
> 3. -u - v + w = 1/4
> 
> Let me write them as:
> 
> 1. u - v - w = 1/2
> 
> 2. -u + v - w = 1/3
> 
> 3. -u - v + w

In [16]:
to_markdown(tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, ''))

TypeError: argument 'ids': 'RequestOutput' object cannot be converted to 'Sequence'

In [9]:
# system_prompt = r'''Assume you are a professional mathematician.{}\n
# Please generate your answer as a number in <answer> tag.\n'''

system_prompt = r'''Assume you are a professional mathematician.{}
Please show your step by step reasoning in <thinking> tag, keep your 
solution concise skipping arithmetic details, and generate your answer as 
a number in <answer> tag.'''
prompt = system_prompt.format(question)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate with nucleus sampling (top-p sampling)
outputs = model.generate(
    **inputs,
    max_new_tokens=1024,
    do_sample=True,
    top_p=0.9, 
    temperature=1.0,
    num_return_sequences=10,
    num_beams=10,
    repetition_penalty=1.2,
    length_penalty=0,
    renormalize_logits=True
)

# If multiple samples, use majority vote (here we simply pick the first for raw eval)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

NotImplementedError: Make sure that a `_reorder_cache` function is correctly implemented in transformers.models.qwen2.modeling_qwen2 to enable beam search for <class 'transformers.models.qwen2.modeling_qwen2.Qwen2ForCausalLM'>

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)


# system_prompt = r'''Assume you are a professional mathematician.{}\n
# Please show your step by step reasoning in <thinking> tag, keep your solution concise skipping arithmetic details, and generate your answer as a number in <answer> tag.\n'''
system_prompt = r'''Assume you are a professional mathematician.{}
Please generate your answer as a number in <answer> tag.
'''
prompt = system_prompt.format(question)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(**inputs, 
                        #  streamer = text_streamer, 
                         max_new_tokens = 2048)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [9]:
to_markdown(tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, ''))

> 
> \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n

#### transformers experiment

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


# Configure 4-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config,
    device_map="auto",
    torch_dtype=torch.float16,
)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [9]:
# m_name = "DeepSeek-R1-Distill-Qwen-1.5B"
# model, tokenizer = load_model_and_tokenizer(m_name)
model.eval()
row = aime_df.iloc[0]
question = row['Problem']
ground_truth = row['Answer']
# system_prompt = r'''Assume you are a professional mathematician.{}\n
# Please show your step by step reasoning in <thinking> tag, keep your solution concise skipping arithmetic details, and generate your answer as a number in <answer> tag.\n'''
system_prompt = r'''Assume you are a professional mathematician.{}\n
Please generate your answer as a number in <answer> tag.\n'''
prompt = system_prompt.format(question)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate with nucleus sampling (top-p sampling)
outputs = model.generate(
    **inputs,
    max_new_tokens=1024,
    do_sample=True,
    top_p=0.9, 
    temperature=1.0,
    num_return_sequences=1,
    num_beams=1,
    repetition_penalty=1.2,
    #length_penalty=0,
    renormalize_logits=True
)

# If multiple samples, use majority vote (here we simply pick the first for raw eval)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [10]:

to_markdown(tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, ''))

> <think>
> Okay, so I have this problem here with three logarithmic equations involving variables x, y, z, all positive real numbers. And I need to find the absolute value of log base 2 of (x⁴ y³ z²), expressed as m/n where m and n are coprime, then sum them up for my final answer.
> 
> Let me write down the given equations first:
> 
> 1) log₂( x / (yz) ) = 1/2
> 
> 2) log₂( y / (xz) ) = 1/3
> 
> 3) log₂( z / (xy) ) = 1/4
> 
> And we want |log₂(x⁴ y³ z²)| which would just be something like log terms inside or outside? Well, since they're asking for an absolute value regardless, maybe it simplifies positively?
> 
> First thought, these logs can be rewritten using properties of logarithms. Maybe express everything in exponentials... Hmm, yes!
> 
> So let's take each equation step by step.
> 
> Equation 1:
> log₂( x/(yz) ) = 1/2  
> Which means, converting from log form:  
> x/(yz) = 2^(1/2) => sqrt(2)
> 
> Similarly,
> 
> Equation 2:
> log₂(y/(xz)) = 1/3  
> => y/(xz) = 2^(1/3) 
> 
> Equation 3:
> log₂(z/(xy)) = 1/4  
> => z/(xy) = 2^(1/4 )
> 
> Alright, now rewrite these into fractions instead of roots if needed but maybe multiplication is easier when dealing with ratios.
> 
> From Equation 1:  
> x = yz * sqrt(2) ... Let me note that expression (A). So x equals y times z multiplied by root two.
> 
> Equation 2:  
> y = xz * 2^{1/3} ...(B)
> Equation 3:  
> z = xy * 2^{1/4 }...(C)
> 
> Now, substitute (A) into Equations B and C...
> 
> Starting with substitution into Equation B:
> 
> We know from A, x = y*z*sqrt(2).
> 
> Plug into Equation B: y = x*z*(2)^{1/3}
> 
> Substitute x: y = ( y z sqrt(2) )* z * (2)^{1/3 }
> 
> Simplify the right side: multiply constants together and same bases: y = y * z² * (sqrt(2))*(2^{1/3})
> 
> Compute exponents on 2:
> 
> sqrt(2)=2^{1/2}, multiplying by another 2^{1/3}: total exponent becomes 5/6.
> 
> Thus: y= y * z² * 2^{5/6}. Let me subtract y to both sides: 0=y(z² -1)* 2^{5/6}. Since y is non-zero because all variables are positive reals, therefore we must have z² =1 ⇒ z =1 (√ isn't applicable due to positivity only). So from here,z=1.
> 
> Wait hold on; Wait after substituting x into eq B led us to conclude that either y cancels out or leads to z squared equaling one? Which gives z=1 considering positivity.
> 
> Yes, indeed.
> 
> So z=1. Alright.
> 
> With z known as 1, go back to Equation A:x = y*1*√2= √2*y
> 
> That relates x and y directly.
> 
> Moving forward, plug z=1 into Equation C: c is also relevant.
> 
> Equation C: z=(xy)*(2^{1/4}). But z was found as 1.
> 
> Therefore:
> 
> 1= x*y*(2^{1/4}) → which implies xy =2^{-1/4}= 1 over square root fourth power.
> 
> But since we had established earlier relation between x and y, i.e., x= sqrt(2)y, plugging into above: 
> 
> (xy)= (sqrt(2) y)( y )= sqrt(2)*y² =1/(2^{1/4})  
> 
> Hence, solve for y²:
> 
> Multiply both sides by 2^{1/4}: sqrt(2)*y² *2^{1/4} ?
> 
> Wait no, wait more precisely: From xy = [1]/[2^{1/4}], while x=sqrt(2)y.
> 
> So replace x with sqrt(2)y: Therefore:
> 
> (sqrt(2)y)(y)= sqrt(2)y² = 1/[2^{1/4}] 
> 
> Divide sqrt(2) denominator is 2^{1/2}; numerator has factor sqrt(2): which may help cancel denominators.
> 
> Expressed differently: compute how does sqrt(2)/2^{1/4} look?
> 
> Note that sqrt(2) is 2^{1/2}, divided by 2^{1/4} is equivalent to 2^{(1/

In [22]:
model.eval()
row = aime_df.iloc[0]
question = row['Problem']
ground_truth = row['Answer']
# system_prompt = r'''Assume you are a professional mathematician.{}\n
# Please show your step by step reasoning in <thinking> tag, keep your solution concise skipping arithmetic details, and generate your answer as a number in <answer> tag.\n'''
system_prompt = r'''Assume you are a professional mathematician.{}\n
Please generate your answer as a number in <answer> tag.\n'''
prompt = system_prompt.format(question)
judge_prompt = r"""
now we based on the response in <response>, could you judge its correctness in terms of closeness to goal from 1 to 10 in <score>.
"""

answer = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, '')
judge_string = 'Q:' + prompt + r'\n' + r'<response>' + answer + r'<\response>' + '\n\n\n' + judge_prompt
inputs = tokenizer(judge_string, return_tensors="pt").to(model.device)

# Generate with nucleus sampling (top-p sampling)
outputs = model.generate(
    **inputs,
    max_new_tokens=1024,
    do_sample=True,
    top_p=0.9, 
    temperature=1.0,
    num_return_sequences=1,
    num_beams=1,
    repetition_penalty=1.2,
    #length_penalty=0,
    renormalize_logits=True
)

# If multiple samples, use majority vote (here we simply pick the first for raw eval)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [23]:
to_markdown(tokenizer.decode(outputs[0], skip_special_tokens=True).replace(judge_string, ''))

> </think>
> 
> The user provided mathematical reasoning towards finding \( |\log_2(x^4 y^3 z^2)| \). Through successive substitutions and transformations, solutions were derived systematically, ultimately yielding precise numerical results within constraints. Based on the detailed thought processes, confident conclusions emerge upon evaluation.
> 
> 
> \boxed{\dfrac{8}{7}}