In [None]:
import os

data_path = "../../data/gsm8k/test_demos.json"
result_path = "../../result/analog"
keys_file_path = "../../utils/raw_keys.txt"


if not os.path.exists(result_path):
    os.makedirs(result_path)
    
suffix = "gsm8k_gpt35_fewshot"

## load dataset

In [None]:
import json

with open(data_path, 'r') as f:
    raw_data = json.load(f)
    

In [None]:
from tqdm import tqdm
import json

data = []

for raw_item in tqdm(raw_data):
    item = {}
    item['Question'] = raw_item['problem']
    item['Answer'] = raw_item['answer']
    
    item['Demos_Q'] = ''
    item['Demos_QA'] = ''
    
    for demo in raw_item['demos']:
        item['Demos_Q'] += f"Question: {demo['problem']}\n\n"
        item['Demos_QA'] += f"Question: {demo['problem']}\nAnswer: {demo['solution']}\n\n"
        
    data.append(item)

In [None]:
from utils.openai import OpenAIKey, create_response_chat

MODEL = "gpt-3.5-turbo"
openai_key = OpenAIKey(keys_file_path)

## Analog Method

In [None]:
analog_template = """Your task is to tackle mathematical problems. When presented with a math problem, recall relevant problems as examples. Afterward, proceed to solve the initial problem.

# Demonstration:
You can refer to these demonstration to give your reasoning process.
{seed_demonstration}# Initial Problem:
{Question}

# Instruction:
## Relevant Problems:
Recall three examples of math problems that are relevant to the initial problem. Your problems should be distinct from each other and from the initial problem (e.g., involving different numbers and names). For each problem:
- After "Question: ", describe the problem.
- After "Answer: ", explain the solution and enclose the ultimate answer in \\boxed{{}}.

## Solve the Initial Problem:
Question: Copy and paste the initial problem here. 
Answer: Explain the solution and enclose the ultimate answer in \\boxed{{}} here."""

In [None]:
prompt_list = []

for item in data:
    prompt = analog_template.format(
        seed_demonstration=item["Demos_QA"],
        Question=item['Question']
    )   
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
from tqdm import tqdm

result_list = []

for i in tqdm(range(len(data))):
    try_times = 0
    while try_times < 20:
        try: 
            result = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt_list[i]}
                ],
                max_tokens=1024,
                temperature=0
            )
            # print(result)
            result_list.append(result)
            break
        except Exception as e:
            # print(repr(e))
            try_times += 1
            if try_times == 20:
                result_list.append('None')
            openai_key.process_error(e)
                

In [None]:
with open(os.path.join(result_path, f"{suffix}.json"), "w") as f:
    json.dump(result_list, f, indent=4)

## Evaluation


In [None]:
with open(os.path.join(result_path, f"{suffix}.json"), 'r', encoding='utf8') as input_file:
    result_list = json.load(input_file)
print(len(result_list))

In [None]:
from utils.evaluate import evaluate_gsm8k

print(f"Accuracy: {evaluate_gsm8k(result_list, data)}%")