In [29]:
# Set up API key and do the necessary imports
import os
from taskgen import *

#os.environ['OPENAI_API_KEY'] = ""

## DATA LOADING

In [30]:
import json
with open('train_data.jsonl', 'r') as file:
    #selected_problems = json.load(file)

    #the file consists of dictionaries, each being in one line. We read the file line by line and append the dictionaries to a list selected_problems
    selected_problems = [json.loads(line) for line in file]


# create a name for current experiment, taking day, month, year, hour, minute
import datetime

selected_problems = selected_problems[64:]

now = datetime.datetime.now()
experiment_name = now.strftime("%d_%m_%Y_%H_%M")
experiment_name = "09_09_2024_11_52"


## TaskGen Agent And Functions 

In [31]:

def python_run_tool(code_snippet: str) -> str:
    '''Runs code_snippet and outputs the result of all print statements'''
    import sys
    import io
    import datetime
    import dateutil
    import sympy
    import numpy as np
    # Disable file access
    def restricted_open(*args, **kwargs):
        raise PermissionError("File access is restricted")
    
    # Capture the output
    old_stdout = sys.stdout
    sys.stdout = io.StringIO()

    try:
        # Safe environment to execute the user code
        allowed_globals = {
            '__builtins__': {
                'print': print,
                'range': range,
                'len': len,
                'int': int,
                'float': float,
                'str': str,
                'list': list,
                'dict': dict,
                'set': set,
                'tuple': tuple,
                'abs': abs,
                'min': min,
                'max': max,
                'sum': sum,
                'any': any,
                'all': all,
                'sorted': sorted,
                'zip': zip,
                'map': map,
                'filter': filter,
                '__import__': __import__,
                'datetime': datetime, # Allow access to datetime module
                'open': restricted_open,  # Override open to restrict file access
                'sympy': sympy, 
                'dateutil:' : dateutil,               
            }
        }

        safe_locals = {}

        exec(code_snippet, allowed_globals, safe_locals)
        output = sys.stdout.getvalue()
    except Exception as e:
        output = f"Error: {e}"
    finally:
        # Restore the original stdout
        sys.stdout = old_stdout

    return output

python_generator_tool = Function('''Generate code based only on <instruction: str> without additional context.
    Ensure that you define all variables and list out all imports.
    You can only import the following modules: datetime, dateutil
    You are not able to use the Equipped Functions using this tool.
    Never use the function datetime.date.today() or datetime.datetime.now() as the tasks always give a static date, which does not depend on the current date.
    Ensure the required output is a single print statement and only contains the date in the required format but never print anything else besides a date. For example: print("2014/01/01").''',
                                     output_format = {'Output Code': 'type: code'}, fn_name = 'python_generator_tool',model="gpt-4o")



python_debug_tool = Function('''Debugs Python Code and returns corrected code. Ensures the required output is a single print statement and only contains the date in the required format but never print anything else besides a date. For example: print("2014/01/01").
Instruction: <instruction: str>
Current Code: <python_code: str>
Error Message: <error_msg: str>''',
                                 output_format = {'Thoughts': 'How to correct code', 'Corrected Code': 'type: code'}, fn_name = 'python_debug_tool',model="gpt-4o")




# Uses LLM to generate Code
def python_generate_and_run_code_tool(shared_variables, instruction: str) -> str:
    ''' Generates and runs code based on instruction. Returns 1) the result of all print statements in code, or error messages, and 2) the code '''
    # from termcolor import colored
    
    # Append context to tool
    if shared_variables and 'agent' in shared_variables:
        instruction = f"Context: {shared_variables['agent'].overall_task}\nPrevious Subtasks: {shared_variables['agent'].subtasks_completed}\nInstruction: {instruction}"
    # Generate Code
    python_code = python_generator_tool(instruction)['Output Code']
    
    # Run and Debug Code
    for _ in range(3):
        output = python_run_tool(python_code)

        if output[:5] == "Error":
            debugged_code = python_debug_tool(instruction, python_code, output)
            python_code = debugged_code['Corrected Code']

        else:
            break

    #log the output and python code in a outputs.txt file
    with open(f'{experiment_name}_outputs.txt', 'a') as file:
        file.write(f"New Experiment:\nInstruction: {instruction}\nOutput: {output}\nPython Code: {python_code}\n\n\n")

            
    return output, python_code

In [32]:
# Agent for Math Problem Solver

agent = Agent('Date Problem Solver', 
'''You are an expert skilled in solving a wide range of date problems, being able to reason about correct dates and formats when given a tasdk. Your task is to analyze the given problem, break it down into manageable steps, and provide a comprehensive solution with clear, logical reasoning. Ensure that your explanations are thorough, using relevant principles. Importantly, present the final answer clearly and explicitly, such that no further calculations are required from the user. Please note: do not use any large language models to derive the solution, you are free to use the tools given to you for the solution like dateutil library; all steps should be manually explainable. When using code to solve the problem, only print out the final answer in its format, so e.g. 24/12/1976, no further text! You should only use code to solve the problem if it is necessary and should not be the primary method of solving the problem.
''',model="gpt-4o").assign_functions(
    [python_generate_and_run_code_tool])


# Assign the shared variables for agent so code tool can get context of the task
agent.shared_variables['agent'] = agent


### Example : Counting and Probability

In [33]:
example_prob = selected_problems[0]

#with open(example_prob, 'r') as file:
#    example_data = json.load(file)

example_prob["question"]

'The first day of 2019 is a Tuesday, and today is the first Monday of 2019. What is the date one week from today in MM/DD/YYYY?'

#### Simple example of running the agent on the problem

In [34]:
correct = False

correct_tasks = []
incorrect_tasks = []

for example_prob in selected_problems:
    agent.reset()    
    soln = agent.run(example_prob["question"])

    # take prediction
    try:
        prediction = soln[0]["output_1"]
    except:
        prediction = "Something went wrong. Got no output. #ERROR"
    #remove \n and so on
    prediction = prediction.replace("\n", "")

    true_answer = example_prob["answer"]
    # remove \n and so on
    true_answer = true_answer.replace("\n", "")

    correct = (prediction in true_answer)
    print(f"ID: {example_prob['id']}: Prediction: {prediction} - True Answer: {true_answer} - Correct: {correct}")

    #log this also in a file called log.txt
    with open(f"{experiment_name}_log.txt", "a") as file:
        file.write(f"ID: {example_prob['id']}: Prediction: {prediction} - True Answer: {true_answer} - Correct: {correct }\n")

    if correct:
        correct_tasks.append(example_prob["id"])
    else:
        incorrect_tasks.append(example_prob["id"])


[1m[30mObservation: The first day of 2019 is a Tuesday, and today is the first Monday of 2019. We need to find the date one week from today in MM/DD/YYYY format.[0m
[1m[32mThoughts: To determine the date one week from today, we first need to identify the date of the first Monday of 2019. Then, we can add 7 days to this date to find the required date.[0m
[1m[34mSubtask identified: Calculate the date of the first Monday of 2019.[0m
Calling function python_generate_and_run_code_tool with parameters {'instruction': 'Calculate the date of the first Monday of 2019.'}
> {'output_1': '01/08/2019\n', 'output_2': 'import datetime\n\n# Given the first day of 2019 is a Tuesday\nfirst_day_of_2019 = datetime.date(2019, 1, 1)\n\n# Calculate the first Monday of 2019\nfirst_monday_of_2019 = first_day_of_2019 + datetime.timedelta(days=(7 - first_day_of_2019.weekday() + 1) % 7)\n\n# Calculate the date one week from the first Monday of 2019\none_week_from_first_monday = first_monday_of_2019 + dat

In [35]:
soln[0]["output_1"] in example_prob["answer"]

False

In [37]:
def calculate_accuracy(log_file_path):
    correct_count = 0
    total_count = 0

    with open(log_file_path, 'r') as file:
        for line in file:
            if "Correct:" in line:
                total_count += 1
                # Extract the value after "Correct:" and strip leading/trailing whitespaces
                correct_value = line.split("Correct:")[1].strip()
                if correct_value == "True":
                    correct_count += 1

    # Calculate accuracy as a percentage
    accuracy = (correct_count / total_count) * 100 if total_count > 0 else 0
    return accuracy

calculate_accuracy(f"{experiment_name}_log.txt")

69.11764705882352