In [None]:
import os
import sys
sys.path.append('../../')

data_path = "../../data/gsm8k/test_demos.json"
result_path = "../../result/self_icl"
keys_file_path = "../../utils/raw_keys.txt"


if not os.path.exists(result_path):
    os.makedirs(result_path)

suffix = "gsm8k_gpt35"

## load dataset

In [None]:
import json

with open(data_path, 'r') as f:
    raw_data = json.load(f)
    
skip_list = [0] * len(raw_data)

In [None]:
from tqdm import tqdm
import json

data = []

for raw_item in tqdm(raw_data):
    item = {}
    item['Question'] = raw_item['problem']
    item['Answer'] = raw_item['answer']
    
    item['Demos_Q'] = ''
    item['Demos_QA'] = ''
    
    for demo in raw_item['demos']:
        item['Demos_Q'] += f"Question: {demo['problem']}\n\n"
        item['Demos_QA'] += f"Question: {demo['problem']}\nAnswer: {demo['solution']}\n\n"
        
    data.append(item)
    
data[0]

In [None]:
from utils.openai import OpenAIKey, create_response_chat

MODEL = "gpt-3.5-turbo"
openai_key = OpenAIKey(keys_file_path)

## Step 1: Pesudo Query


In [None]:
step1_template = """Following is an example instance for the task: mathematical questions solving. Please come up with 3 new, diverse, and creative questions for the task.

# Question:
{Question}

# Instruction:
Please come up with 3 new, diverse, and creative questions for the task.
For each question:
- After "Question: ", write your generated question here."""

In [None]:
prompt_list = []

for item in data:
    prompt = step1_template.format(
        Question=item['Question']
    )
    
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
import re
from tqdm import tqdm

raw_step1_result_list = []

for i in tqdm(range(len(prompt_list))):
    try_times = 0
    while try_times < 5:
        try: 
            response = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt_list[i]}
                ],
                max_tokens=512,
                temperature=0.0
            )
            # print(response)
            raw_step1_result_list.append(response)
            break
        except Exception as e:
            openai_key.process_error(e)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1.json"), "w") as f:
    json.dump(raw_step1_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1.json"), 'r', encoding='utf8') as input_file:
    raw_step1_result_list = json.load(input_file)

In [None]:
import re

step1_result_list = []

def extract_key_lines(text):
    text = text.replace('Question 1:', 'Question:')
    text = text.replace('Question 2:', 'Question:')
    text = text.replace('Question 3:', 'Question:')
    text = text.replace('Question:\n', 'Question: ').replace('Question: \n', 'Question: ')
    
    lines = text.split('\n')
    lines = [line.strip() for line in lines]
    
    query_lines = [line for line in lines if re.match(r'^(question)', line, re.IGNORECASE)]
    query_lines = [line for line in query_lines if len(line) > 20]
    
    if len(query_lines) == 0:
        return ''

    return query_lines

for i in range(len(raw_step1_result_list)):
    clean_result_list = extract_key_lines(raw_step1_result_list[i])
    clean_result_list = list(set(clean_result_list))
    

    if len(clean_result_list) >= 1:
        step1_result_list.append(clean_result_list[:3])
    else:
        step1_result_list.append(['None'])
        skip_list[i] = 1

In [None]:
sum(skip_list)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1_skip.json"), "w") as f:
    json.dump(skip_list, f, indent=4)

## Step 2: Pesudo Label


In [None]:
step2_template = """Your task is to tackle mathematical problems step by step.

# Instruction: Solve the following problem step by step.
{Question}
- After "Answer: ", explain the solution and enclose the ultimate answer in \\boxed{{}}."""

In [None]:
prompt_list = []

for query_list, item in zip(step1_result_list, data):
    
    temp_list = []
    
    for query in query_list:
        prompt = step2_template.format(
            Question=query,
        )
        temp_list.append(prompt)
    
    prompt_list.append(temp_list)
    
print(prompt_list[0][0])

In [None]:
len(prompt_list)

In [None]:
raw_step2_result_list = []

for i in tqdm(range(len(prompt_list))):
    if skip_list[i] == 1:
        raw_step2_result_list.append('None')
        continue
    
    demo_candidate = []
    
    for query_prompt in prompt_list[i]:
        try_times = 0
        while try_times < 10:
            try: 
                result = create_response_chat(
                    MODEL,
                    prompt_input=[
                        {"role": "system", "content": "You are a helpful assistant."},
                        {"role": "user", "content": query_prompt}
                    ],
                    max_tokens=512,
                    temperature=0.0
                )
                # print(result)
                demo_candidate.append(result)
                break
            except Exception as e:
                # print(repr(e))
                try_times += 1
                if try_times == 10:
                    break
                openai_key.process_error(e)
    
    raw_step2_result_list.append(demo_candidate)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2.json"), "w") as f:
    json.dump(raw_step2_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2.json"), 'r', encoding='utf8') as input_file:
    raw_step2_result_list = json.load(input_file)

In [None]:
import re

step2_result_list = []

for i in range(len(data)):
    if skip_list[i] == 1:
        step2_result_list.append('None')
        continue
    if len(step1_result_list[i]) == len(raw_step2_result_list[i]):
        demo_string = ""
        for query, answer in zip(step1_result_list[i], raw_step2_result_list[i]):
            answer_lines = answer.split('\n')
            answer_lines = [line.strip() for line in answer_lines if line.strip()]
            answer = ' '.join(answer_lines)
            demo_string += query + '\n' + answer + '\n\n'
        
        step2_result_list.append(demo_string)
    else:
        step2_result_list.append('None')
        skip_list[i] = 1

In [None]:
sum(skip_list)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2_skip.json"), "w") as f:
    json.dump(skip_list, f, indent=4)

## Step 3: Response Generation


In [None]:
with open(os.path.join(result_path, f"{suffix}_step2_skip.json"), "r") as f:
    skip_list = json.load(f)

In [None]:
step3_template = """Your task is to tackle mathematical problems step by step. You can refer to these demonstration to give your reasoning process.

# Demonstration:
{checked_demonstration}# Instruction: Solve the following problem step by step.
Question: {Question}
Answer: Explain the solution and enclose the ultimate answer in \\boxed{{}} here."""

In [None]:
zeroshot_template = """Your task is to tackle mathematical problems step by step.

# Instruction: Solve the following problem step by step.
Question: {Question}
Answer: Explain the solution and enclose the ultimate answer in \\boxed{{}} here."""

In [None]:
prompt_list = []

for i in range(len(data)):
    if skip_list[i] == 1:
        prompt = zeroshot_template.format(
            Question=data[i]['Question']
        )
    else:
        prompt = step3_template.format(
            checked_demonstration=step2_result_list[i],
            Question=data[i]['Question']
        )
        
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
result_list = []

for i in tqdm(range(len(prompt_list))):
    try_times = 0
    while try_times < 10:
        try: 
            result = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt_list[i]}
                ],
                max_tokens=512,
                temperature=0
            )
            # print(result)
            result_list.append(result)
            break
        except Exception as e:
            try_times += 1
            if try_times == 10:
                result_list.append('None')
            openai_key.process_error(e)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step3.json"), "w") as f:
    json.dump(result_list, f, indent=4)

## Evaluation


In [None]:
with open(os.path.join(result_path, f"{suffix}_step3.json"), 'r', encoding='utf8') as input_file:
    result_list = json.load(input_file)
print(len(result_list))

In [None]:
from utils.evaluate import evaluate_gsm8k

print(f"Accuracy: {evaluate_gsm8k(result_list, data)}%")