# Setting up

In [2]:
import os

os.environ["OPENAI_API_KEY"] = "sk-your-api-key"

In [3]:
import pandas as pd

test_df = pd.read_csv('/kaggle/input/llm-zoomcamp-2024-competition/test.csv')

In [4]:
! pip install langchain-openai

Collecting langchain-openai
  Downloading langchain_openai-0.1.19-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain-core<0.3.0,>=0.2.24 (from langchain-openai)
  Downloading langchain_core-0.2.24-py3-none-any.whl.metadata (6.2 kB)
Collecting openai<2.0.0,>=1.32.0 (from langchain-openai)
  Downloading openai-1.37.1-py3-none-any.whl.metadata (22 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting langsmith<0.2.0,>=0.1.75 (from langchain-core<0.3.0,>=0.2.24->langchain-openai)
  Downloading langsmith-0.1.94-py3-none-any.whl.metadata (13 kB)
Collecting packaging<25,>=23.2 (from langchain-core<0.3.0,>=0.2.24->langchain-openai)
  Downloading packaging-24.1-py3-none-any.whl.metadata (3.2 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.24->langchain-openai)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.many

In [5]:
! pip install langchain

Collecting langchain
  Downloading langchain-0.2.11-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Downloading langchain-0.2.11-py3-none-any.whl (990 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.3/990.3 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading langchain_text_splitters-0.2.2-py3-none-any.whl (25 kB)
Installing collected packages: langchain-text-splitters, langchain
Successfully installed langchain-0.2.11 langchain-text-splitters-0.2.2


# LLM choice and Prompts

This code sets up a system for solving math problems using a language model (LLM). Here's a brief overview:

1. `get_answer(question)`: Generates a prompt for the LLM to solve a math problem step-by-step.

2. `parse_answer(question, message)`: Creates a prompt for the LLM to analyze and potentially fix a given solution, then provide the final answer.

3. `extract_numerical_answer(text)`: Uses regular expressions to extract the numerical answer from the LLM's response.

In [6]:
from langchain_openai import OpenAI
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-4o-mini",
                 temperature=0.5,
#                  max_tokens=
                )

In [7]:
import re

def get_answer(question):
    prompt = f"""Please solve the following high school math problem step by step. Explain your reasoning clearly and provide the final answer.

{question}

Step-by-step solution and final answer:"""

    response = llm.invoke(prompt)
    return response

def parse_answer(question, message):
    prompt = f""" Please analyze the solution for a given problem, fix it if needed, and then provide the final answer.         
Your response should end in the format: 'Hence, the final answer is [numeric string].
    
Q: {question}

Solution: {message}

Analysis: 
    
    """
    
    response = llm.invoke(prompt)
    return response


def extract_numerical_answer(text):
    # Look for patterns like "Final answer: X" or "The answer is X" at the end of the text
    match = re.search(r'(?:final answer|the answer is)[:\s]*([+-]?\d*\.?\d+)', text, re.IGNORECASE)
    if match:
        return float(match.group(1))
    else:
        # If no clear final answer, look for the last number in the text
        numbers = re.findall(r'[+-]?\d*\.?\d+', text)
        return float(numbers[-1]) if numbers else 1.0

# Run the loop

In [8]:
from tqdm import tqdm

GLOBAL_SOLUTIONS_FOR_SUBMISSION = []

for idx, row in tqdm(test_df.iterrows(), total=len(test_df)):

    question = row['problem_text']
    response_1 = get_answer(question)
    response_1 = response_1.content.strip()
    
    response_2 = parse_answer(question, response_1)
    response_2 = response_2.content.strip()
    
    numerical_answer = extract_numerical_answer(response_2)
    
    GLOBAL_SOLUTIONS_FOR_SUBMISSION.append({
        'problem_id': row['problem_id'],
        'llm_out_1': response_1,
        'llm_out_2': response_2,
        'answer': numerical_answer
    })
    
    pd.DataFrame(GLOBAL_SOLUTIONS_FOR_SUBMISSION).to_csv('baseline_answers_with_double_check.csv', index=False)
    

100%|██████████| 100/100 [23:35<00:00, 14.15s/it]


# Save the outputs for further analysis and create submission file

In [9]:
answers_df = pd.DataFrame(GLOBAL_SOLUTIONS_FOR_SUBMISSION)
answers_df.to_csv('baseline_answers_with_double_check.csv', index=False)

df = answers_df[['problem_id', 'answer']]
df.to_csv('submission.csv', index=False)

In [10]:
df.head()

Unnamed: 0,problem_id,answer
0,11919,12.0
1,8513,11287.6
2,7887,4.0
3,5272,6.0
4,8295,13.0


# What Next?

**Using LLM for a double check and parsing** is a useful approach, albeit at the cost of additional API calls.
This method is valuable because our regex parser `extract_numerical_answer()` is simplistic and may miss correct answers.
So far, my best solution comes from constructing few-shot prompts using problems from `train.csv`.
I recommend looking at these resources:

* https://www.promptingguide.ai/techniques/cot
    - Example prompt: https://github.com/FranxYao/chain-of-thought-hub/blob/main/gsm8k/lib_prompt/prompt_original.txt
* https://www.promptingguide.ai/techniques/consistency
    - Example prompt: https://tylerburleigh.com/blog/2023/12/04/#prompt-functions