# Program of Thoughts 

PoT prompting has the LLM generate reasoning steps as programming language statements, 
which are then executed by an external interpreter like Python.

While Chain-of-Thought uses LLMs for both reasoning and computation, PoT uses LLMs only for reasoning, but instead of using plain text for computations, it leverages code.


- <https://medium.com/ai-advances/next-generation-in-chain-of-thought-program-of-thoughts-5c6ca75ee4fa>
- <https://github.com/TIGER-AI-Lab/Program-of-Thoughts>
- <https://arxiv.org/pdf/2211.12588>


In [5]:
from openai import OpenAI
client = OpenAI(api_key="ollama", base_url="http://localhost:11434/v1")

In [6]:
pot_prompt = open("./pot_prompt.txt", "r").read()
pot_choices_prompt = open("./pot_choices_prompt.txt", "r").read()

In [7]:
from typing import Dict, Any
def create_reader_request(example: Dict[str, Any]) -> str:
    string =  f'# Question: {example["question"]}\n'
    string += f'# Answer option: {example["options"]}'
    return string

In [19]:
import json
data = open("./data/aqua_test.jsonl", "r").read().strip().split("\n")
examples = [json.loads(item) for i, item in enumerate(data)]
print(len(examples))
print(examples[0])

254
{'question': 'A car is being driven, in a straight line and at a uniform speed, towards the base of a vertical tower. The top of the tower is observed from the car and, in the process, it takes 10 minutes for the angle of elevation to change from 45° to 60°. After how much more time will this car reach the base of the tower?', 'options': ['A)5(√3 + 1)', 'B)6(√3 + √2)', 'C)7(√3 – 1)', 'D)8(√3 – 2)', 'E)None of these'], 'rationale': 'Explanation :\nLet the height of the building be h. Initially, he was at an angle of 450. tan 45 = h/distance between car and tower. h = distance between car and tower (since tan 45 = 1).\nNow, after 10 minutes, it travelled a certain distance, and angle changed to 600.\ntan 60 = h/x x = h/√3\nSo, in 10 minutes, it has travelled a distance of h – x = h - h/√3.\n10 minutes = h *( 1 – 1√3)\nh can be travelled in 10 / (1 – 1√3).\nTo travel a distance of x, which is h/√3, it takes :\nh = 10 / (1 – 1/√3)\nh / √3 = 10/ √3 * (1 – 1/√3). Multiply numerator and d

In [27]:
######## Greedy = just one run
example = examples[0]

messages = [{"role":"user", "content": pot_prompt + "\n" + create_reader_request(example)}]

result = client.chat.completions.create(
    model='llama3.2:latest', #'code-davinci-002'
    messages=messages,
    #max_tokens=256,
    temperature=0.0,
    top_p=1,
    n=1,
    #stop=['\n\n'],
    logprobs=1)

In [28]:
print(result.choices[0].message.content)

Here is the Python code that solves all the given questions and stores the results in variables named `ans`:

```python
from sympy import Symbol, simplify, solve_it

# Question: In a flight of 600 km, an aircraft was slowed down due to bad weather. Its average speed for the trip was reduced by 200 km/hr and the time of flight increased by 30 minutes.
duration = Symbol('duration', positive=True)
delay = 30 / 60
total_disntace = 600
original_speed = total_disntace / duration
reduced_speed = total_disntace / (duration + delay)
solution = solve_it(original_speed - reduced_speed - 200, duration)
ans = solution[duration]

# Question: M men agree to purchase a gift for Rs. D. If 3 men drop out how much more will each have to contribute towards the purchase of the gift?
M = Symbol('M')
D = Symbol('D')
cost_before_dropout = D / M
cost_after_dropout = D / (M - 3)
ans=simplify(cost_after_dropout - cost_before_dropout)

# Question: A sum of money at simple interest amounts to Rs. 815 in 3 years an

In [8]:
######## Self-consistency
def prompt_for_choice(question: str, options: str, prediction: str) -> str:
    prompt = f'{pot_choices_prompt}\nQuestion: {question}\nOptions: {options}\nPrediction: {prediction}\nClosest Option: '
    messages = [{"role":"user", "content": prompt}]
    
    got_result = False
    while not got_result:
        try:
            result = client.chat.completions.create(
                model='llama3.2:latest', #'code-davinci-002'
                messages=messages,
                max_tokens=256,
                temperature=0.0,
                top_p=1,
                n=20,
                stop=['\n'],
                logprobs=1
            )
            got_result = True
        except Exception:
            sleep(3)

    return result.choices[0].content.strip()

In [None]:
########### BACKUP ##########
from typing import Dict, Any
import os
import json
from tqdm import tqdm
from datetime import datetime
import openai
from time import sleep
import sympy
from sympy.solvers import solve
from sympy import Symbol
import math
import argparse
from tool import simplify_ans, safe_execute
from sympy import simplify
from collections import Counter

parser = argparse.ArgumentParser()
parser.add_argument("--key", default='OPENAI_KEY', type=str)
parser.add_argument("--start", default=0, type=int)
parser.add_argument("--end", default=-1, type=int)
parser.add_argument("--greedy", default=False, action='store_true')
parser.add_argument("--dry_run", default=False, action='store_true')

args = parser.parse_args()
args.start = 0
args.end = 1
args.greedy = False
args.dry_run = False

def parse_api_result(result):
    to_return = []
    for idx, g in enumerate(result['choices']):
        text = g['text']
        logprob = sum(g['logprobs']['token_logprobs'])
        to_return.append((text, logprob))
    to_return = sorted(to_return, key=lambda tup: tup[1], reverse=True)
    to_return = [r[0] for r in to_return]
    return to_return

def safe_execute(code_string: str, keys=None):
    def execute(x):
        try:
            exec(x)
            locals_ = locals()
            if keys is None:
                return locals_.get('ans', None)
            else:
                return [locals_.get(k, None) for k in keys]
        except Exception:
            return None
    try:
        ans = func_timeout.func_timeout(5, execute, args=(code_string,))
    except func_timeout.FunctionTimedOut:
        ans = None

    return ans

if __name__ == "__main__":
    aqua_test = []
    with open('data/aqua_test.jsonl') as f:
        for line in f:
            tmp = json.loads(line)
            aqua_test.append(tmp)

    now = datetime.now()
    dt_string = now.strftime("%m_%d_%H_%M")

    correct, wrong = 0, 0
    aqua_test = aqua_test[args.start:args.end]
    if args.greedy:
        filename = f'outputs/aqua_s{args.start}_e{args.end}_{dt_string}.jsonl'
    else:
        filename = f'outputs/aqua_sc_s{args.start}_e{args.end}_{dt_string}.jsonl'
        
    writer = open(filename, 'w')
    writer.write(json.dumps({'demonstration': pot_prompt}) + '\n')
    for example in tqdm(aqua_test):
        full_prompt = pot_prompt + "\n"
        full_prompt += create_reader_request(example)
        if args.dry_run:
            print(full_prompt)
            print('=======================')
            continue

        if args.greedy:
            # greedy decoding
            got_result = False
            while not got_result:
                try:
                    result = openai.Completion.create(
                        engine='code-davinci-002',
                        prompt=full_prompt,
                        api_key=os.getenv(args.key),
                        max_tokens=256,
                        temperature=0.0,
                        top_p=1,
                        n=1,
                        stop=['\n\n'],
                        logprobs=1
                    )
                    got_result = True
                except Exception:
                    sleep(3)
        else:
            # self-consistency decoding
            got_result = False
            while not got_result:
                try:
                    result = openai.Completion.create(
                        engine='code-davinci-002',
                        prompt=full_prompt,
                        api_key=os.getenv(args.key),
                        max_tokens=256,
                        temperature=0.3,
                        top_p=1,
                        n=30,
                        stop=['\n\n'],
                        logprobs=1
                    )
                    got_result = True
                except Exception as e:
                    sleep(3)
            
        # self-consistency decoding or greedy decoding.
        result_counter = Counter()
        codes = parse_api_result(result)
        for r in codes:
            ans = safe_execute(r)
            pred = simplify_ans(ans)
            if pred is not None:
                result_counter.update([pred])
        print(result_counter)

        if len(result_counter) > 0:
            prediction = result_counter.most_common(1)[0][0]        
        else:
            prediction = None

        if prediction is None:
            chosen_option = 'A'
        else:
            chosen_option = prompt_for_choice(
                example['question'], example['options'], prediction)

        if chosen_option == example['correct']:
            correct += 1
        else:
            wrong += 1

        tmp = {'question': example['question'],
               'generated': codes,
               'generated_prediction': str(prediction),
               'options': example['options'],
               'answer': example['correct'],
               'prediction': chosen_option}

        writer.write(json.dumps(tmp) + '\n')

    writer.close()
    print()
    print(correct / (correct + wrong))