In [None]:
import sys
!{sys.executable} -m pip install openai jsonlines pyarrow pandas

# Call model with few-shot examples

In [1]:
# Example: reuse your existing OpenAI setup
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

def callModel(prompt, prefix="", examples=[], max_tokens=1000):
    messages=[{"role": "system", "content": prefix}]

    for example in examples:
        content=f"""
A: {example['code']}
The solution is {example['target']}.
"""
        messages.append({"role": "user", "content": f"Q: {example['input']}"})
        messages.append({"role": "assistant", "content": f"A: {content}"})

    messages.append({"role": "user", "content": f"Q: {prompt}"})
    # print('messages: ', *messages, sep = "\n\t") 
        
    completion = client.chat.completions.create(
      model= "",
      messages=messages,
      stream=False,
      temperature=0.7,
      top_p=0.9,
      max_tokens=max_tokens,
      stop=["Q:"]
    )
    return completion.choices[0].message.content

# Read the gsm8k parquet

In [None]:
import pyarrow.parquet as pq
import re

pattern_math_annotation = r'(<<[^>]*>>)'
file_path = '/Users/amatarazzo/Downloads/gsm8k-test-00000-of-00001.parquet'
table = pq.read_table(file_path)
df = table.to_pandas()
data = []
for line in df.iterrows():
    code, target = line[1]['answer'].split('#### ')
    code = re.sub(pattern_math_annotation, "", code)
    obj = {'input': line[1]['question'], 'code':code, 'target': target}
    data.append(obj)
print(f'gsm8k dataset loaded ({len(data)} records)')

# Read the gsm-hard JSONL file

In [2]:
import jsonlines
import re

file_path = '/Users/amatarazzo/Downloads/gsmhardv2.jsonl'
pattern_annotation = r'("""[^"]*""")'

# Read the JSONL file
with jsonlines.open(file_path) as reader:
    data = [{'input': obj['input'], 'code': re.sub(pattern_annotation, "", obj['code']), 'target': obj['target']} for obj in reader]
    
print(f'gsm-hard dataset loaded ({len(data)} rows)')

gsm-hard dataset loaded (1319 rows)


# Run!

In [3]:
#few-shot

import concurrent.futures
import re
import random
import math

prefix = """You are a maths teacher. Answer the following mathematical reasoning question. 

Let's think step by step."""
right=0
wrong=0
error=0
shots=5
workers=10
max_tokens=1024

split_index=math.floor(len(data)/2)
test_data=data[:split_index]
# test_data=data[:1]
example_data=data[split_index+1:]

# Using ThreadPoolExecutor to run callModel in parallel

with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:  # Adjust max_workers as needed
    examples=random.sample(example_data, shots)
    future_to_data = {executor.submit(callModel, json_obj['input'], prefix, examples, max_tokens): json_obj for json_obj in test_data}
    for future in concurrent.futures.as_completed(future_to_data):
        json_obj = future_to_data[future]
        try:
            answer = future.result()

            # Regular expression pattern
            pattern = rf"(answer|solution).*?\b{str(json_obj['target']).removesuffix('.0')}(.0)?[\D]"
    
            # Search for the pattern in the text
            match = re.search(pattern, answer)

            print(f"""
question: {json_obj['input']}
answer:   {answer}
-------------------
solution:   {json_obj['target']}
            """)
            if match:
                right+=1
                print("answer is right")
            else:
                if answer:
                    wrong+=1
                    print("answer is wrong")
                else:
                    error+=1
                    print("answer is empty")

            print(f"""
total: {right + wrong}
right: {right} {right*100/(right+wrong)}%
wrong: {wrong}
error: {error}
            """)
            print("===============================================================================================")

        except Exception as exc:
            print(f"An error occurred for {item}: {exc}")
            error+=1

print(f"""
total: {right + wrong}
right: {right} {right*100/(right+wrong)}%
wrong: {wrong}
error: {error}
""")


question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with 4933828. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?
answer:   A: 
A: def solution():
    
    eggs_per_duck = 16
    eggs_consumed = 3 + (eggs_per_duck * 4933828)
    price_egg = 2.0
    daily_income = price_egg * (4933828 - eggs_consumed)
    result = daily_income
    return result






The solution is 165576.0.

-------------------
solution:   -9867630.0
            
answer is wrong

total: 1
right: 0 0.0%
wrong: 1
error: 0
            

question: Carlos is planting a lemon tree. The tree will cost $90 to plant. Each year it will grow 2256855 lemons, which he can sell for $1.5 each. It costs $3 a year to water and feed the tree. How many years will it take before he starts earning money on the lemon tree?
answer:   A: 
A: 
def solution():
    


In [5]:
def solution():

    canoe_price = 30
    banana_boat_raft_price = 6375793
    num_hours_canoe = 3
    num_hours_banana_boat_raft = 5
    total_time = num_hours_canoe + num_hours_banana_boat_raft
    canoe_rental = canoe_price * num_hours_canoe
    banana_boat_raft_rental = banana_boat_raft_price * num_hours_banana_boat_raft
    total_rental = canoe_rental + banana_boat_raft_rental
    result = total_rental
    return result

print(solution())

31879055
