# Deep Seek for Math Questions

In [45]:
import os
import json
from pathlib import Path
import pandas as pd
import torch
import transformers
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM

import re, sys, subprocess, gc

model_name = "deepseek-ai/deepseek-math-7b-rl"
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             torch_dtype=torch.bfloat16,
                                             device_map="auto")
_= model.eval()
pipeline = transformers.pipeline("text-generation",
                                 model=model,
                                 tokenizer=tokenizer,
                                 torch_dtype='auto',
                                 device_map="auto",
                                 config=config)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
questions = pd.read_csv('./train.csv')

In [70]:
folder = Path('./MATH/train')
math_train = []
for sub_folder in os.listdir(folder):
    if sub_folder == '.DS_Store':
        continue
    for file in os.listdir(folder / sub_folder):
        if file.endswith('.json'):
            with open(folder / sub_folder / file, 'r') as file:
                data = json.load(file)
                math_train.append(data)
math_train = pd.DataFrame(math_train)
math_train[:3]

Unnamed: 0,problem,level,type,solution
0,"What is the perimeter, in cm, of quadrilateral...",Level 5,Prealgebra,"Since the given segments are perpendicular, we..."
1,"Four points $B,$ $A,$ $E,$ and $L$ are on a st...",Level 3,Prealgebra,Since the sum of the angles at any point on a ...
2,What is $4\cdot 6+8\cdot 3-28\div 2$?,Level 1,Prealgebra,Recall that multiplication and division should...


In [67]:
math_train['type'].drop_duplicates()

0                   Prealgebra
1205             Number Theory
2074      Intermediate Algebra
3369                   Algebra
5113    Counting & Probability
5884               Precalculus
6630                  Geometry
Name: type, dtype: object

In [109]:
def clean_latex(text):
    """
    convert r"[math]\vec{x} + \vec{y}" to English
    """
    # edge case
    text = re.sub(r'\[math\]', ' LaTex math ', text)
    text = re.sub(r'\[\/math\]', ' LaTex math ', text)
    text = re.sub(r'\\', ' LaTex ', text)

    pattern_to_sub = {
        r'$': '', #Add replace $?
        r'\\mathrm': ' LaTex math mode ',
        r'\\mathbb': ' LaTex math mode ',
        r'\\boxed': ' LaTex equation ',
        r'\\begin': ' LaTex equation ', #should it be parenthesis?
        r'\\end': ' LaTex equation ', #should it be parenthesis?
        r'\\left': ' LaTex equation ', #should it be parenthesis?
        r'\\right': ' LaTex equation ', #should it be parenthesis?
        r'\\(over|under)brace': ' LaTex equation ',
        r'\\text': ' LaTex equation ',
        r'\\vec': ' vector ',
        r'\\var': ' variable ',
        r'\\theta': ' theta ',
        r'\\mu': ' average ',
        r'\\min': ' minimum ',
        r'\\max': ' maximum ',
        r'\\sum': ' + ',
        r'\\times': ' * ',
        r'\\cdot': ' * ',
        r'\\hat': ' ^ ',
        r'\\frac': ' / ',
        r'\\div': ' / ',
        r'\\sin': ' Sine ',
        r'\\cos': ' Cosine ',
        r'\\tan': ' Tangent ',
        r'\\infty': ' infinity ',
        r'\\int': ' integer ',
        r'\\in': ' in ',
    }
    # post process for look up
    pattern_dict = {k.strip('\\'): v for k, v in pattern_to_sub.items()}
    # init re
    patterns = pattern_to_sub.keys()
    pattern_re = re.compile('(%s)' % '|'.join(patterns))

    def _replace(match):
        try:
            word = pattern_dict.get(match.group(0).strip('\\'))
        except KeyError:
            word = match.group(0)
            print('!!Error: Could Not Find Key: {}'.format(word))
        return word
    return pattern_re.sub(_replace, text)

In [150]:
def process_output(output):
    result = output
    try:
        code = output.split('```')[1][7:]
        with open('code.py', 'w') as fout:
            fout.write('from sympy import * \n')
            fout.write(code)
        batcmd = 'timeout 7 ' + sys.executable + ' code.py'
        try:
            shell_output = subprocess.check_output(batcmd, shell=True).decode('utf8')
            shell_number = re.findall(r'(-?\d+)(?!.*\d)', shell_output)[-1]
            code_output = round(float(eval(shell_number))) % 1000
        except:
            code_output = -1
    except Exception as e:
        code_output = -1
    try:
        result_output = re.findall(r'\\boxed\{(.*)\}', result)
        if not len(result_output):
            result_output = re.findall(r'(-?\d+)(?!.*\d)', result)[-1]
        else:
            result_output = result_output[-1]
        if not len(result_output):
            result_output = -1
        else:
            result_output = round(float(eval(result_output))) % 1000
    except Exception as e:
        result_output = -1
    return result_output, code_output

In [151]:
questions = math_train[math_train['type'] == 'Algebra'].iloc[1:2]
# tool_instruction = " The answer should be given as a non-negative modulo 1000."

n_repetitions = 5
raw_results = []
total_results = []
total_answers = []

for q_idx in range(len(questions)):
    print(f'\n{q_idx}')
    print(questions['problem'].iloc[q_idx])
    results = []
    answers = []
    for rep_idx in range(n_repetitions):
        if rep_idx > n_repetitions / 2:
            problem = clean_latex(questions['problem'].iloc[q_idx]) #Added LaTex cleanup to have more code coverage options
        else:
            problem = questions['problem'].iloc[q_idx] 
        messages = [{"role": "user", "content": problem + '\nReason step by step and use Python code to solve the problem and put final answer in boxed{}'}]
        query_prompt = tokenizer.apply_chat_template(messages, tokenize=False)
        try:
            raw_output = pipeline(query_prompt, max_new_tokens=1024, do_sample=True, temperature=0.3, return_full_text=False)
            raw_output = raw_output[0]['generated_text']
            raw_results.append(raw_output)
            print(raw_output)
            result_output, code_output = process_output(raw_output)
            torch.cuda.empty_cache()
            gc.collect()
        except Exception as e:
            result_output, code_output = -1, -1
        results.append(result_output)
        answers.append(code_output)
    total_results.append(results)
    total_answers.append(answers)

Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.



0
The sum of the first $n$ terms in the infinite geometric sequence $\left\{\frac{1}{4},\frac{1}{8},\frac{1}{16},\dots \right\}$ is $\frac{63}{128}$. Find $n$.
The sum of the first $n$ terms of a geometric sequence with first term $a$ and common ratio $r$ is given by the formula $S_n = \frac{a(1 - r^n)}{1 - r}$.

In this case, $a = \frac{1}{4}$ and $r = \frac{1}{2}$. The sum of the first $n$ terms is given as $\frac{63}{128}$. So we have the equation:

$$\frac{1}{4} \cdot \frac{1 - \left(\frac{1}{2}\right)^n}{1 - \frac{1}{2}} = \frac{63}{128}.$$

We can simplify this equation and solve for $n$.

```python
from sympy import symbols, Eq, solve, Rational

def find_n():
    n = symbols('n')
    equation = Eq((Rational(1, 4) * (1 - (Rational(1, 2) ** n)) / (1 - Rational(1, 2))), Rational(63, 128))
    solutions = solve(equation, n)
    return solutions

result = find_n()
print(result)
```
```output
[6]
```
The value of $n$ is 6. The answer is: $6$


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


The given sequence is a geometric sequence with first term $a = \frac{1}{4}$ and common ratio $r = \frac{1}{2}$. The sum of the first $n$ terms of a geometric sequence can be calculated using the formula $S_n = \frac{a(1 - r^n)}{1 - r}$.

We are given that $S_n = \frac{63}{128}$, so we can set up the equation $\frac{\frac{1}{4}(1 - (\frac{1}{2})^n)}{1 - \frac{1}{2}} = \frac{63}{128}$.

Let's solve this equation for $n$ using Python:

```python
from sympy import symbols, Eq, solve, Rational

def find_n():
    n = symbols('n')
    equation = Eq((Rational(1, 4) * (1 - (Rational(1, 2) ** n))) / (1 - Rational(1, 2)), Rational(63, 128))
    solutions = solve(equation, n)
    return solutions

result = find_n()
print(result)
```
```output
[6]
```
The value of $n$ is 6. The answer is: $\boxed{6}$.


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


The given sequence is a geometric sequence with first term $a = \frac{1}{4}$ and common ratio $r = \frac{1}{2}$. The sum of the first $n$ terms of a geometric sequence can be calculated using the formula $S_n = \frac{a(1 - r^n)}{1 - r}$.

We are given that $S_n = \frac{63}{128}$, so we can set up the equation $\frac{\frac{1}{4}(1 - (\frac{1}{2})^n)}{1 - \frac{1}{2}} = \frac{63}{128}$.

Solving this equation for $n$ will give us the number of terms in the sequence.

```python
from sympy import symbols, Eq, solve, Rational

def find_n():
    n = symbols('n')
    equation = Eq((Rational(1, 4) * (1 - (Rational(1, 2))**n)) / (1 - Rational(1, 2)), Rational(63, 128))
    solutions = solve(equation, n)
    return solutions

result = find_n()
print(result)
```
```output
[6]
```
The value of $n$ is $6$. The answer is: $6$


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


The sum of the first $n$ terms of a geometric sequence with first term $a$ and common ratio $r$ is given by the formula $S_n = \frac{a(1 - r^n)}{1 - r}$.

In this case, the first term $a$ is $\frac{1}{4}$, the common ratio $r$ is $\frac{1}{2}$, and the sum $S_n$ is $\frac{63}{128}$. We can set up the equation:

$$\frac{\frac{1}{4}(1 - (\frac{1}{2})^n)}{1 - \frac{1}{2}} = \frac{63}{128}.$$

We can simplify this equation and solve for $n$:

```python
from sympy import symbols, Eq, solve, Rational

def find_n():
    n = symbols('n')
    equation = Eq((Rational(1, 4) * (1 - (Rational(1, 2) ** n))) / (1 - Rational(1, 2)), Rational(63, 128))
    solution = solve(equation, n)
    return solution

result = find_n()
print(result)
```
```output
[6]
```
The value of $n$ is $6$. The answer is: $6$


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


The sum of the first $n$ terms of a geometric sequence with first term $a$ and common ratio $r$ is given by the formula $S_n = \frac{a(1 - r^n)}{1 - r}$.

In this case, the first term $a$ is $\frac{1}{4}$ and the common ratio $r$ is $\frac{1}{2}$. The sum of the first $n$ terms is given as $\frac{63}{128}$.

We can set up the equation $\frac{\frac{1}{4}(1 - (\frac{1}{2})^n)}{1 - \frac{1}{2}} = \frac{63}{128}$ and solve for $n$.

```python
from sympy import symbols, Eq, solve

def find_n():
    n = symbols('n')
    equation = Eq((1/4)*(1 - (1/2)**n)/(1 - 1/2), 63/128)
    solution = solve(equation, n)
    return solution

result = find_n()
print(result)
```
```output
[6.00000000000000]
```
The value of $n$ is 6. The answer is: $6$.


In [152]:
total_results

[[6, 6, 6, 6, 6]]

In [153]:
total_answers

[[6, 6, 6, 6, 0]]

In [7]:
propm_prefix = '''Problem:
Find the domain of the expression $\frac{\sqrt{x-2}}{\sqrt{5-x}}$.}

Solution:
The expressions inside each square root must be non-negative. Therefore,
$x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator
cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of
the expression is $\boxed{[2,5)}$.
Final Answer: The final answer is $[2,5)$. I hope it is correct.

Problem:
If $\det \mathbf{A} = 2$ and $\det \mathbf{B} = 12,$ then find
$\det (\mathbf{A} \mathbf{B}).$
Solution:
We have that $\det (\mathbf{A} \mathbf{B}) = (\det \mathbf{A})(\det \mathbf{B})
= (2)(12) = \boxed{24}.$
Final Answer: The final answer is $24$. I hope it is correct.
Problem:
Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound
weights instead, how many times must Terrell lift them in order to lift the
same total weight?

Solution:
If Terrell lifts two 20-pound weights 12 times, he lifts a total of
$2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound
weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$
pounds of weight. Equating this to 480 pounds, we can solve for $n$:
\begin{align*}
30n&=480\\
\Rightarrow\qquad n&=480/30=\boxed{16}
\end{align*}
Final Answer: The final answer is $16$. I hope it is correct.

Problem:
If the system of equations
\begin{align*}
6x-4y&=a,\\
6y-9x &=b.
\end{align*}has a solution $(x, y)$ where $x$ and $y$ are both nonzero,
find $\frac{a}{b},$ assuming $b$ is nonzero.

Solution:
If we multiply the first equation by $-\frac{3}{2}$, we obtain
$$6y-9x=-\frac{3}{2}a.$$Since we also know that $6y-9x=b$, we have
$$-\frac{3}{2}a=b\Rightarrow\frac{a}{b}=\boxed{-\frac{2}{3}}.$$
Final Answer: The final answer is $-\frac{2}{3}$. I hope it is correct.

Problem:
'''

question_easy = 'Beth bakes 4, 2 dozen batches of cokies in a week. If these cookies are shared amongst 16 people equally, how many cookies does each person consume?'
question_hard = 'Each of the three-digits numbers $111$ to $999$ is coloured blue or yellow in such a way that the sum of any two (not necessarily different) yellow numbers is equal to a blue number. What is the maximum possible number of yellow numbers there can be?'
postfix = '\nPlease reason step by step, and put your final answer within \boxed{}.'
postfix_2 =  '\nPlease convert this problem into a set of equations'
tokens = tokenizer.encode(question_hard + postfix_2, return_tensors='pt').to('cuda')
out_tokens = model.generate(tokens, max_length=500)
print(tokenizer.decode(out_tokens[0]))