# Agent

In [1]:
from __future__ import annotations
from typing import Optional
import os
import openai
import warnings
os.environ["OPENAI_API_KEY"] = "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"
# openai.organization = "org-IpivVcfezFhHNrRgT5zhR0Ng"
openai.api_key = os.getenv("OPENAI_API_KEY")

OPENAI_LLMS = [
    'gpt-3.5-turbo',
    'text-ada-001'
]
AVAILABLE_LLMS = {
    'OpenAI': OPENAI_LLMS
}

### Prompt Templates
# Feel free to alter them here in order to use less tokens or get better results
def initial_task_prompt(task_prompt):
    prompt = 'You are an expert Data Scientist. Below, you will be given a task to solve.'
    prompt += ' Your answer can take only two forms:'
    prompt += '\n1. If the code to solve the task fits in one prompt: ONLY contain Python code. Any comments must be #commented.'
    prompt += '\n2. Otherwise: ONLY bullet points numbered in format 1. ,'
    prompt += ' where each bullet point is a smaller step to complete main task you were given.'
    prompt += '\nHere is your task:\n'
    prompt += task_prompt
    return prompt

### Classes

class LLM:
    '''
    If the user just wants to prompt an LLM, they can instantiate one without an Agent.

    Otherwise an LLM can only belong to one Agent.
    Reciprocally, an Agent can only have one LLM.
    '''
    def __init__(self, new_model_name = None, agent: Optional[Agent]= None, var_dict: dict= {}):
        self.agent = agent
        self.model_provider = None
        self._model_name = None
        self._write_prompt_code = None
        self.var_dict = var_dict
        self.prompts_sent = []
        self.code_run = []

        if new_model_name is not None:
            self.set_model_name(new_model_name)

    @property
    def model_name(self):
        return self._model_name
    
    def set_model_name(self, new_model_name):
        self._model_name = new_model_name
        self._initialize_model()

    def _initialize_model(self):
        # Raise error if no model_name to initialize
        if self._model_name is None:
            message = self._error_message(f'Empty model_name to initialize LLM')
            raise ValueError(message)

        # Let's try to find the model_name in the ones that are implemented
        model_provider = None
        for llm_provider, models in AVAILABLE_LLMS.items():
            if self._model_name in models:
                model_provider = llm_provider
                break
        
        # If no model provider was found for the current model_name, raise error
        if model_provider is None:
            message = self._error_message(f"model_name '{self._model_name}' not found in AVAILABLE_LLMS")
            raise ValueError(message)

        self.model_provider = model_provider
        self._initialize_prompt_code()

    def _initialize_prompt_code(self):
        '''
        This method creates the function that writes code to prompt the LLM based on what provider we get it from.
        If it's an OpenAI LLM, we use its API, if its Llama, we use llama_cpp, and so on.
        '''
        if self.model_name == 'text-ada-001':
            def write_prompt_code(prompt):
                return f'''
response = openai.ChatCompletion.create(
    model={repr(self._model_name)},
    prompt={repr(prompt)}
)
'''
        elif self.model_name == 'gpt-3.5-turbo':
            def write_prompt_code(prompt):
                # code = 'response = openai.ChatCompletion.create('
                # code += f'\n    model={repr(self._model_name)},'
                # code += '\n    messages=['
                # code += '\n        {"system": "You are a Senior Data Analyst and expert in Pandas. You are always concise and precise."},'
                # code += f'        {{"role": "user", "content": {}}},'
                # code += '\n    ])'
                A = self._model_name
                B = prompt
                code = f'''response = openai.ChatCompletion.create(
    model='{A}',
    messages=[
        {{"role": "user", "content": {repr(B)}}},
    ]
)'''
                return code
            self._write_prompt_code = write_prompt_code
        else:
            message = self._err_msg(f'No prompt_code function was implemented for model_provider \'{self.model_provider}\'')
            raise ValueError(message)
    def _err_msg(self, message):
        '''
        Appends the agent name on error message if this LLM has an agent.
        '''
        if self.agent:
            return message + f" for LLM of Agent {self.agent.name}."
        else:
            return message + '.'
    
    @property
    def prompt_code(self, ):
        '''
        The user can ask an LLM for an example of what is its prompt code
        '''
        if self._write_prompt_code is None:
            return None
        else:
            return self._write_prompt_code("YOUR_PROMPT_HERE")

    def is_model_initialized(self):
        # Do we have a model_name?
        if self._model_name  is None:
            message = self._err_msg(f'Model not initialized: No model_name')
            raise ValueError(message)
        if self.model_provider  is None:
            message = self._err_msg(f'Model not initialized: No model_provider')
            raise ValueError(message)
        if self._write_prompt_code is None:
            message = self._err_msg(f'Model not initialized: No prompt_code')
            raise ValueError(message)

    def prompt(self, prompt):
        '''
        Prompts the LLM with the given message.
        The most important function in this class.

        In order to do this, the model must have been correctly initialized.
        '''
        self.is_model_initialized()

        # Let's write the code to prompt the LLM
        prompt_code = self._write_prompt_code(prompt)

        # Execute the code having var_dict available in the namespace
        exec(prompt_code, globals(), self.var_dict)

        # Let's log the prompts and code
        self.prompts_sent.append(prompt)

class Task:
    def __init__(self, name, request_prompt, agent = None):
        self.name = name
        self.request_prompt = request_prompt
        self.solution = None
        self.is_done = False
        self.agent = None

        if agent is not None:
            self.assign_agent(agent)
    
    def assign_agent(self, agent):
        # Does this task already have an Agent?
        if self.agent is not None:
            # Is it the same agent as the new one?
            if agent != self.agent:
                raise ValueError(f'Trying to assign Agent {agent.name} to Task {self.name}, but the latter already belongs to {self.agent.name}.')
        # The task now has a new Agent
        self.agent = agent
        # The agent also gets assigned this task
        if self.agent.task != self:
            agent.assign_task(self)

class Response:
    def __init__(self, rtype, object_dict = None, message = None, plan = None, code = None):
        self.rtype = rtype
        self.object_dict = object_dict
        self.message = message
        self.plan = plan
        self.code = code

class Agent:
    '''
    The class that solves tasks for Pamboo.
    It either solves the task itself by writing and running code,
    or it creates a plan and delegates tasks to other Agents.

    An Agent can only have one LLM. Think of the LLM as the "brain" of the Agents.
    '''
    def __init__(self, name: str, llm_name: str, task: Optional[Task]=None, var_dict: dict={}, verbose=True):
        self.name = name
        self.llm_name = llm_name
        self.llm_responses = []
        self.var_dict = var_dict
        self.verbose=verbose
        self.task = None

        # Initializing LLM
        self._initialize_llm()

        # If a task was already provided, start responding right away
        if task is not None:

            self.assign_task(task)

            self.response = self.respond_task()
    
    def assign_task(self, new_task):
        # Does the agent already have a task?
        if self.task is not None:
            # Is the old task different from the new one?
            if self.task.name != new_task.name:
                if not self.task.is_done:
                    # If old task is not done, raise error
                    raise ValueError(f'Trying to assign task {new_task.name} to Agent {self.name}, but the latter is already working on task {self.task.name}.')
                else:
                    message = f'\nAgent {self.name} is being given new task {new_task.name}, which will overwrite completed task {self.task.name}.'
                    message += '\nMake sure that:'
                    message += '\n1 - You don\'t care if reference to old task is lost'
                    message += '\n2 - This behavior is desired'
                    message += '\n3 - var_dict and llm_name are properly set for the new task.'
                    warnings.warn(message, RuntimeWarning)
                    # The old task does not belong to this agent anymore
                    # But did it still belong to it?
                    old_task_agent = self.task.agent
                    if not old_task_agent == self:
                        raise ValueError(f'Old task of Agent {self.name} belonged to another agent {old_task_agent}.')
                    # If old task did still belong to this agent, now it doesn't anymore
                    self.task.agent = None
        # The agent now has a new task
        self.task = new_task
        # If task is still the same, we just check if it is correctly assigned to this agent as well
        if self.task.agent != self:
            new_task.assign_agent(self)

    def _make_initial_prompt(self):

        # Can the agent solve the task right away, or does it need to break it into smaller tasks and delegate?
        # Let's create the prompt to know
        if self.verbose:
            self.speak(f'Can I solve task {self.task.name} right away or do I create a plan?')

        # Creating the initial prompt
        prompt = initial_task_prompt(self.task.request_prompt)

        # Prompting the llm
        self.llm.prompt(prompt)

    def respond_task(self):
        '''
        Here the agent either solves the task or creates a plan to solve it.
        If it has to create a plan, it will delegate the tasks to other agents.
        It will also know what steps need to be done before other steps and delegate accordingly.

        This is a loop that only ends in the following cases:
        1 - Agent found a correct solution by itself
        2 - Agent found a correct solution by delegating tasks to other agents
        2 - Agent decided he has failed at the task
        3 - User interrupted
        '''
        if not self.task:
            raise ValueError(f'Agent {self.name} cannot start responding to task because it is not set.')
        
        # Make the initial attempt to solve or create plan
        self._make_initial_prompt()

        # Get LLM response from initial prompt
        self.llm_responses.append(self.llm.var_dict['response']['choices'][0]['message']['content'])

        # Solving task of printing
        self.speak('Here is the solution to the task:\n')
        exec(self.llm_responses[0],globals(),self.var_dict)
        # while not self.task.is_done:

        #     # Case 1: Agent solved the task - need to check it
        #     if self.response.rtype == 'solution':
        #         print(f'\nAgent {self.name}: Task Solved.')
        #         return

        #     # Case 2: Agent created a plan to solve the task - need to delegate tasks
        #     if self.response.rtype == 'plan':
        #         print(f'\nAgent {self.name}: Plan devised.')
        #         return
            
        #     # Case 3: Agent failed to create solution or plan
        #     if self.response.rtype == 'failed':
        #         print(f'\nAgent {self.name}: Response Failed.')
        #         return

    def has_task(self):
        if self.task is None:
            return False
        else:
            return True

    def _initialize_llm(self):
        self.llm = LLM(self.llm_name, self, self.var_dict)

    def speak(self, msg):
        print(f'\n{self.name}: {msg}')

class Pamboo:
    '''
    This is the class users interact with.
    It receives the main task prompt from the user and gets the ball rolling to solve it by:
    1 - Creating a Task object for the main task
    2 - Creating a Root Agent which is assigned the main task

    When all agents have finished solving the task and have confirmed it is done,
    it then tells the user what was done and where to find the results.
    '''
    def __init__(self, task_prompt: str, llm_name: str='gpt-3.5-turbo'):
        self.main_task = Task('main_task', task_prompt)
        self.root_agent = Agent('root', llm_name, self.main_task)

# if __name__ == '__main__':
pb = Pamboo('say hello world')



root: Can I solve task main_task right away or do I create a plan?

root: Here is the solution to the task:

Hello, World!


In [15]:
vd = pb.root_agent.llm.var_dict
vd

{'response': <OpenAIObject chat.completion id=chatcmpl-7PkoUpcboi3tpyneChGo2HBwWgEC8 at 0x7fba64064d10> JSON: {
   "choices": [
     {
       "finish_reason": "stop",
       "index": 0,
       "message": {
         "content": "print(\"hello world\")",
         "role": "assistant"
       }
     }
   ],
   "created": 1686372506,
   "id": "chatcmpl-7PkoUpcboi3tpyneChGo2HBwWgEC8",
   "model": "gpt-3.5-turbo-0301",
   "object": "chat.completion",
   "usage": {
     "completion_tokens": 5,
     "prompt_tokens": 99,
     "total_tokens": 104
   }
 }}

In [16]:
vd['response']['choices'][0]['message']['content']

'print("hello world")'

In [20]:
import pandas as pd
df = pd.DataFrame({
    'a': [0,1,2,3,4],
    'b': [10,10,10,20,20],
    'c': ['dog','dog','cat','dog','cat']
})
df.to_csv('test.csv')

In [21]:
print(df[df["c"] == "dog"]["a"].mean())

1.3333333333333333


In [22]:
df[df["c"] == "dog"]

Unnamed: 0,a,b,c
0,0,10,dog
1,1,10,dog
3,3,20,dog


In [1]:
import pandas as pd
df = pd.read_csv('test.csv')
df

Unnamed: 0.1,Unnamed: 0,a,b,c
0,0,0,10,dog
1,1,1,10,dog
2,2,2,10,cat
3,3,3,20,dog
4,4,4,20,cat


In [2]:
print(df[df["c"] == "dog"]["a"].mean())

1.3333333333333333


In [6]:
from sklearn.datasets import load_iris

df = pd.DataFrame(load_iris(as_frame=True))

print(df.shape)
df.head()

ValueError: All arrays must be of the same length

In [16]:
import pandas as pd
from sklearn.datasets import load_iris

# Load the Iris dataset
iris_data = load_iris(as_frame=True)

# Create a Pandas DataFrame from the dataset
iris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)

# Add the target (species) column to the DataFrame
iris_df['target'] = iris_data.target

# Replace target integer values with actual target names (species names)
target_names = iris_data.target_names
iris_df['species'] = iris_df['target'].map({i: name for i, name in enumerate(target_names)})

# Display the first few rows of the DataFrame
iris_df.head()


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,species
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


In [17]:
iris_data.data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [18]:
iris_data.target.head()


0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64

In [23]:
import pandas as pd
from sklearn.datasets import load_diabetes

# Load the Diabetes dataset
diabetes_data = load_diabetes(as_frame=True)

# Create a Pandas DataFrame from the dataset
diabetes_df = diabetes_data.data

# Add the target (diabetes progression) column to the DataFrame
diabetes_df['target'] = diabetes_data.target

# Display the first few rows of the DataFrame
diabetes_df.head()


Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [25]:
diabetes_df.to_csv('diabetes.csv')

In [2]:
from pamboo import Pamboo

pb = Pamboo('Load diabetes.csv and print the average for age')

Pamboo: Starting work on the following task:
Load diabetes.csv and print the average for age

root: Can I solve task main_task right away or do I create a plan?

root: Here is the solution to the task:

-7.284268713603969e-18

root: Code to achieve solution:

import pandas as pd

# Load the dataset
data = pd.read_csv('diabetes.csv')

# Calculate the average age
average_age = data['age'].mean()

# Print the average age
print(average_age)



In [3]:
pb.root_agent.var_dict.keys()

dict_keys(['response', 'pd', 'data', 'average_age'])

In [4]:
pb.root_agent.var_dict['data'].head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [46]:
for i, response in enumerate(pb.root_agent.llm_responses):
    print(f'--- Response {i} ' + '-'*50)
    print(response)

--- Response 0 --------------------------------------------------

import pandas as pd

# Load diabetes.csv into a DataFrame
df = pd.read_csv('diabetes.csv')

# Calculate the average for age column
average_age = df['age'].mean()

# Print the average for age
print(average_age)



In [38]:
pb.root_agent.var_dict['data'].head()

Unnamed: 0.1,Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [5]:
pb2 = Pamboo('Load diabetes.csv, run a regression of \"target\" on all other variables. Print the coefficients and their confidence intervals.')

Pamboo: Starting work on the following task:
Load diabetes.csv, run a regression of "target" on all other variables. Print the coefficients and their confidence intervals.

root: Can I solve task main_task right away or do I create a plan?

root: Here is the solution to the task:

                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        152.1335      2.576     59.061      0.000     147.071     157.196
age          -10.0099     59.749     -0.168      0.867    -127.446     107.426
sex         -239.8156     61.222     -3.917      0.000    -360.147    -119.484
bmi          519.8459     66.533      7.813      0.000     389.076     650.616
bp           324.3846     65.422      4.958      0.000     195.799     452.970
s1          -792.1756    416.680     -1.901      0.058   -1611.153      26.802
s2           476.7390    339.030      1.406      0.160    -189.620    1143.098
s3     

In [61]:
df = pd.read_csv('diabetes.csv')
df.head()
# diabetes_df.index.name = 'index'
# diabetes_df.head()
# diabetes_df.to_csv('diabetes.csv',index=False)

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
