# Maths models for LLM Zoomcamp math competition

In [1]:
# Installing necessary packages
! pip install -qU langchain-openai langchain

In [2]:
# Necessary imports
import pandas as pd
import numpy as np
from langchain.prompts import PromptTemplate
from openai import OpenAI
from dotenv import load_dotenv
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor
import os
import re

load_dotenv()

True

# LLM design

## microsoft/rho-math-7b-interpreter-v0.1

## MathLLMs/MathCoder-CL-7B

## Qwen/Qwen2-Math-7B-Instruct

In [34]:
# Results: 0.73750

llm = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

def get_answer(question):
    prompt = f"""Role:
You are an advanced AI system with exceptional mathematical reasoning and problem-solving capabilities, specifically designed to solve tricky math problems (whose answer is a non-negative integer) written in LaTeX format from the AI Mathematical Olympiad (AIMO) competition. Your task is to accurately analyze and solve intricate mathematical problems, demonstrating a deep understanding of mathematical concepts and a strong ability to apply logical reasoning strategies.

Instruction:
1. Carefully read and comprehend the problem statement provided in the "Problem" section.
2. In the "Solution" section, provide a solution of the problem with detailed explanation of your logical reasoning process. Keep in mind that answer must be a non-negative integer number.
3. At the end, create a "Answer" section where you will state only the final numerical or algebraic answer, without any additional text or narrative.

Problem:
...

Solution:
...

Answer:
...

{question}

Step-by-step solution and final answer:"""

    response = llm.chat.completions.create(
        model='qwen2-math-7b-instruct',
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

## deepseek-ai/deepseek-math-7b-rl

In [35]:
# Results: 0.45000

llm = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

def get_answer(question):
    prompt = f"""Role:
You are an advanced AI system with exceptional mathematical reasoning and problem-solving capabilities, specifically designed to solve tricky math problems (whose answer is a non-negative integer) written in LaTeX format from the AI Mathematical Olympiad (AIMO) competition. Your task is to accurately analyze and solve intricate mathematical problems, demonstrating a deep understanding of mathematical concepts and a strong ability to apply logical reasoning strategies.

Instruction:
1. Carefully read and comprehend the problem statement provided in the "Problem" section.
2. In the "Solution" section, provide a solution of the problem with detailed explanation of your logical reasoning process. Keep in mind that answer must be a non-negative integer number.
3. At the end, create a "Answer" section where you will state only the final numerical or algebraic answer, without any additional text or narrative.

Problem:
...

Solution:
...

Answer:
...

{question}

Step-by-step solution and final answer:"""

    response = llm.chat.completions.create(
        model='deepseek-math-7b',
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

# Retrieval design

#### Function to Extract Answer with

In [20]:
import re

def extract_numerical_answer(text):
    # Look for patterns like "Final answer: X" or "The answer is X" at the end of the text
    match = re.search(r'(?:final answer|the answer is)[:\s]*([+-]?\d*\.?\d+)', text, re.IGNORECASE)
    if match:
        return float(match.group(1))
    else:
        # If no clear final answer, look for the last number in the text
        numbers = re.findall(r'[+-]?\d*\.?\d+', text)
        return float(numbers[-1]) if numbers else None

#### Function to Prepare Dataset and Get Answers with

We will use a pool executor to make it faster (`max_workers=6` controls how many concurrent queries we send to OpenAI)

In [21]:
from tqdm.auto import tqdm

from concurrent.futures import ThreadPoolExecutor

pool = ThreadPoolExecutor(max_workers=6)

def map_progress(pool, seq, f):
    results = []

    with tqdm(total=len(seq)) as progress:
        futures = []

        for el in seq:
            future = pool.submit(f, el)
            future.add_done_callback(lambda p: progress.update())
            futures.append(future)

        for future in futures:
            result = future.result()
            results.append(result)

    return results

Now we create a function for processing each row:

In [22]:
def process_row(row):
    problem_id = row['problem_id']
    problem_text = row['problem_text']

    llm_reasoning = get_answer(problem_text)

    numerical_answer = extract_numerical_answer(llm_reasoning)

    return {
        'problem_id': problem_id,
        'problem_text': problem_text,
        'llm_reasoning': llm_reasoning,
        'answer': numerical_answer
    }

At this point, let's load the train data - so we can use it for evaluation our solution offline (without submitting to Kaggle):

In [23]:
df_train = pd.read_csv('/home/dmitry/Documents/Programming/Python/llm-zoomcamp/kaggle-competition/train.csv')
df_train.head()

Unnamed: 0,problem_id,problem_text,answer
0,2374,Find the value of the expression $\dfrac{17}{5...,1.6
1,4723,"In a company of 30 people, 25 use the social n...",24.0
2,7135,The number of road traffic accidents (RTAs) in...,32.0
3,5814,Find the value of the expression $\dfrac{2\str...,256.0
4,9237,A traveler from Moscow wants to visit four cit...,53.0


In [24]:
rows = df_train.head().to_dict(orient='records')
process_row(rows[0])

{'problem_id': 2374,
 'problem_text': 'Find the value of the expression $\\dfrac{17}{5} :\\dfrac{34}{3} +1.3$.',
 'llm_reasoning': "**Problem:**\nFind the value of the expression $\\dfrac{17}{5} :\\dfrac{34}{3} +1.3$.\n\n**Solution:**\n\nTo solve this problem, we need to follow the order of operations (PEMDAS). First, let's simplify the fraction division:\n\n$\\dfrac{17}{5} :\\dfrac{34}{3}$ = $\\dfrac{\\dfrac{17}{5}}{\\dfrac{34}{3}}$\n\nNow, we can cancel out common factors between the numerator and denominator of each fraction:\n\n= $\\dfrac{17/5}{34/3}$\n\n= $\\dfrac{1}{2}$\n\nNext, add 1.3 to this result:\n\n$\\dfrac{1}{2} + 1.3$\n\n= $0.5 + 1.3$\n\n= $1.8$\n\nTherefore, the value of the expression is 1.8.\n\n**Answer:**\n1.8",
 'answer': 1.8}

In [25]:
results = map_progress(pool, rows, process_row)
df_results = pd.DataFrame(results)
df_results

  0%|          | 0/5 [00:00<?, ?it/s]

Unnamed: 0,problem_id,problem_text,llm_reasoning,answer
0,2374,Find the value of the expression $\dfrac{17}{5...,**Problem:** Find the value of the expression ...,1.8
1,4723,"In a company of 30 people, 25 use the social n...",Problem:\n...\n\nSolution:\n\nLet's analyze ea...,4.0
2,7135,The number of road traffic accidents (RTAs) in...,Problem:\n...\n\nThe problem states that the n...,32.0
3,5814,Find the value of the expression $\dfrac{2\str...,**Problem:** Find the value of the expression ...,256.0
4,9237,A traveler from Moscow wants to visit four cit...,Problem:\n...\n\nSolution:\n\nTo solve this pr...,4563.0


In [26]:
df_train.head()

Unnamed: 0,problem_id,problem_text,answer
0,2374,Find the value of the expression $\dfrac{17}{5...,1.6
1,4723,"In a company of 30 people, 25 use the social n...",24.0
2,7135,The number of road traffic accidents (RTAs) in...,32.0
3,5814,Find the value of the expression $\dfrac{2\str...,256.0
4,9237,A traveler from Moscow wants to visit four cit...,53.0


Let's evaluate the answer on these 5 examples:

In [27]:
import pandas as pd
import numpy as np


class ParticipantVisibleError(Exception):
    # If you want an error message to be shown to participants, you must raise the error as a ParticipantVisibleError
    # All other errors will only be shown to the competition host. This helps prevent unintentional leakage of solution data.
    pass


def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    '''
    Accuracy that works with multiple correct answers.
    '''
    solution = solution.set_index(row_id_column_name, drop=True)
    submission = submission.set_index(row_id_column_name, drop=True)
    submission = submission.loc[solution.index]
        
    target_column = 'answer'
    assert target_column in solution.columns
    assert target_column in submission.columns

    # This fix is needed because submission is loaded with default parameters
    # Pandas magically converts string column into float
    def fix_suffix(value):
        if value.endswith('.0'):
            return value[:-2]
        else:
            return value
        
    submission[target_column] = submission[target_column].astype(str)
    submission[target_column] = submission[target_column].apply(fix_suffix)
    
    
    def convert_to_list(value):
        values = [v.strip() for v in value.strip().lstrip('[').rstrip(']').split(',')]
        return values

    solution[target_column] = solution[target_column].astype(str).apply(convert_to_list)

    correct = [
        submit_answer in correct_answer
        for correct_answer, submit_answer in zip(
            solution[target_column].values, 
            submission[target_column].values
        )
    ]
            
    return np.mean(correct)

In [28]:
score(df_train.head(), df_results, 'problem_id')

0.4

Prepare submission

In [29]:
def prepare_prompts_and_get_answers(df):
    rows = df.to_dict(orient='records')
    results = map_progress(pool, rows, process_row)
    return pd.DataFrame(results)

In [30]:
df_train_results = prepare_prompts_and_get_answers(df_train)
df_train_results.head()

  0%|          | 0/100 [00:00<?, ?it/s]

Unnamed: 0,problem_id,problem_text,llm_reasoning,answer
0,2374,Find the value of the expression $\dfrac{17}{5...,**Problem:** Find the value of the expression ...,8.8
1,4723,"In a company of 30 people, 25 use the social n...",Problem:\n...\n\nSolution:\n\nLet's analyze ea...,13.0
2,7135,The number of road traffic accidents (RTAs) in...,Problem:\n...\n\nThe problem states that the n...,32.0
3,5814,Find the value of the expression $\dfrac{2\str...,**Problem:**\nFind the value of the expression...,8192.0
4,9237,A traveler from Moscow wants to visit four cit...,Problem:\n...\n\nSolution:\n\nTo solve this pr...,6.0


In [31]:
score(df_train, df_train_results, 'problem_id')

0.33

In [32]:
df_test = pd.read_csv('/home/dmitry/Documents/Programming/Python/llm-zoomcamp/kaggle-competition/test.csv')

df_test_results = prepare_prompts_and_get_answers(df_test)

submission = df_test_results[['problem_id', 'answer']]
submission.to_csv('submission.csv', index=False)

  0%|          | 0/100 [00:00<?, ?it/s]