# Install necessary libraries

In [48]:
!pip install openai groq python-dotenv pillow



In [51]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [43]:
from openai import OpenAI
import os
import json
from groq import Groq
import json
from typing import List, Dict, Any, Callable
import ast
import io
import sys
from dotenv import load_dotenv
from pprint import pprint

import subprocess
import tempfile



In [2]:
# Load environment variables via dotenv

path = '../.env.local'
load_dotenv(dotenv_path = path)

True

In [3]:
pip install pillow pytesseract

Note: you may need to restart the kernel to use updated packages.


In [4]:
# Get environment variables
groq_api_key = os.getenv('GROQ_API_KEY')
openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
openai_api_key = os.getenv('OPENAI_API_KEY')

In [5]:
# initialize LLMs
groq = Groq(api_key=groq_api_key)
openrouter = OpenAI(
    base_url = 'https://openrouter.ai/api/v1',
    api_key = openrouter_api_key
)
openai = OpenAI()

**SOURCE**:
* [Github Repo](https://github.com/team-headstart/Agent-Workshop)

To create our AI Agent, we will define the following functions:

1. **Planner**: This function takes a user's query and breaks it down into smaller, manageable subtasks. It returns these subtasks as a list, where each one is either a reasoning task or a code generation task.

2. **Reasoner**: This function provides reasoning on how to complete a specific subtask, considering both the overall query and the results of any previous subtasks. It returns a short explanation on how to proceed with the current subtask.

3. **Actioner**: Based on the reasoning provided for a subtask, this function decides whether the next step requires generating code or more reasoning. It then returns the chosen action and any necessary details to perform it.

4. **Evaluator**: This function checks if the result of the current subtask is reasonable and aligns with the overall goal. It returns an evaluation of the result and indicates whether the subtask needs to be retried.

5. **generate_and_execute_code**: This function generates and executes Python code based on a given prompt and memory of previous steps. It returns both the generated code and its execution result.

6. **executor**: Depending on the action decided by the “actioner,” this function either generates and executes code or returns reasoning. It handles the execution of tasks based on the action type.

7. **final_answer_extractor**: After all subtasks are completed, this function gathers the results from previous steps to extract and provide the final answer to the user's query.

8. **autonomous_agent**: This is the main function that coordinates the process of answering the user's query. It manages the entire sequence of planning, reasoning, action, evaluation, and final answer extraction to produce a complete response.

![](../public/images/digram.png)
![](../public/images/workflow.png)

In [75]:
def get_llm_response(client, prompt, openai_model="gpt-4o-mini", json_mode=False):
    response = None  # Initialize response to avoid unbound variable errors

    if client == "openai":
        try:
            kwargs = {
                "model": openai_model,
                "messages": [{"role": "user", "content": prompt}]
            }
            if json_mode:
                kwargs["response_format"] = {"type": "json_object"}

            response = openai.chat.completions.create(**kwargs)
        except Exception as e:
            print(f"Error with OpenAI client: {e}")
            raise e  # Propagate error

    elif client == "groq":
        models = [
            "llama-3.1-8b-instant",
            "llama-3.1-70b-versatile",
            "llama3-70b-8192",
            "llama3-8b-8192",
            "gemma2-9b-it"
        ]

        for model in models:
            try:
                kwargs = {
                    "model": model,
                    "messages": [{"role": "user", "content": prompt}]
                }
                if json_mode:
                    kwargs["response_format"] = {"type": "json_object"}

                response = groq.chat.completions.create(**kwargs)
                break  # Exit loop on success
            except Exception as e:
                print(f"Error with Groq model '{model}': {e}")
                continue

        if response is None:  # All models failed
            try:
                kwargs = {
                    "model": "meta-llama/llama-3.1-8b-instruct:free",
                    "messages": [{"role": "user", "content": prompt}]
                }
                if json_mode:
                    kwargs["response_format"] = {"type": "json_object"}

                response = openrouter.chat.completions.create(**kwargs)
            except Exception as e:
                print(f"Error with fallback model: {e}")
                raise e  # Propagate error if fallback also fails

    else:
        raise ValueError(f"Invalid client: {client}")

    if response is None:
        raise RuntimeError("No response was generated by any model.")

    return response.choices[0].message.content


In [92]:
def planner(user_query) -> List[str]:
    prompt = f"""Given the user's query: '{user_query}', break down the query into as few subtasks as possible in order to answer the question.

    Each subtask should be either a reasoning task or a code generation task. Never duplicate a task.

    Here are the only 2 actions that can be taken for each subtask:
    - generate_code: This action involves generating Python code and executing it in order to make a calculation or verification
    - reasoning: This action involves providing reasoning for what to do to complete the subtask

    Each subtask should begin with either "reasoning" or "generate_code".

    Keep in mind the overall goal of answering the user's query throughout the planning process.

    Return the result as a JSON list of strings, where each string is a subtask.

    Here is an example JSON response:
    {{"subtasks": ["subtask1", "subtask2", "subtask3]}}
"""
    # Fetch response from LLM
    data = get_llm_response('groq', prompt, json_mode=True)
    
    # Extract content from response
    try:
        # Parse the content into JSON
        response = json.loads(data)
        pprint({"Parsed Response:": response})
        
        # Return the subtasks
        subtasks = response['subtasks']
        print(subtasks)
        return subtasks
    except (KeyError, json.JSONDecodeError) as e:
        print(f"Error while processing LLM response: {e}")
        return []

In [77]:
def evaluate_responses(prompt, reasoning_prompt=False, openai_model="gpt-4o-mini"):
    if reasoning_prompt:
        # Reasoning prompt
        prompt += f"{prompt}\n\n{reasoning_prompt}."

    openai_response = get_llm_response('openai', prompt, openai_model)
    groq_response = get_llm_response('groq', prompt)

    pprint(f'OpenAI Response: {openai_response}')
    pprint(f'\n\nGroq Response: {groq_response}')

In [78]:
query = "How many r's are in the word 'strawberry' ?"
subtasks = planner("groq", query)

{'Parsed Response:': {'subtasks': ['reasoning: Extract the specific word of '
                                   "interest from the user's query",
                                   'generate_code: Count the occurrences of '
                                   "the letter 'r' in the word 'strawberry'",
                                   'reasoning: Return the count as the final '
                                   'answer',
                                   'generate_code: Create a Python function to '
                                   'count occurrences of a letter in a word']}}
["reasoning: Extract the specific word of interest from the user's query", "generate_code: Count the occurrences of the letter 'r' in the word 'strawberry'", 'reasoning: Return the count as the final answer', 'generate_code: Create a Python function to count occurrences of a letter in a word']


In [79]:
evaluate_responses(query, reasoning_prompt=True)

'OpenAI Response: The word "strawberry" contains 2 \'r\'s.'
"\n\nGroq Response: The word 'strawberry' has 2 'r's."


In [80]:
def reasoner(user_query: str, subtasks: List[str], current_subtask: str, memory: List[Dict[str, Any]]) -> str:
    prompt = f"""Given the user's query (long-term goal): '{user_query}'

    Here are all the subtasks to complete in order to answer the user's query:
    <subtasks>
    {json.dumps(subtasks)}
    </subtasks>

    Here is the short-term memory (result of previous subtasks):
    
        {json.dumps(memory)}

    The current subtask to complete is:
    <current_subtask>
    {current_subtask}
    </current_subtask>

    - Provide concise reasoning on how to execute the current subtask, considering previous results and subtasks.
    - Prioritize explicit details over assumed patterns.
    - Avoid unnecessary complications in problem-solving.

    Return the result as a JSON object with 'reasoning' as a key.

    Example JSON response:
    {{
        "reasoning": "2 sentences max on how to complete the current subtask."
    }}
    """
    response = get_llm_response('groq', prompt, json_mode=True)
    response_json = json.loads(response)
    return response_json['reasoning']

In [82]:
def actioner(user_query: str, subtasks: List[str], current_subtask: str, reasoning: str, memory: List[Dict[str, Any]]) -> Dict[str, Any]:
    prompt = f"""Given the user's query (long-term goal): '{user_query}'

    The subtasks are:
    <subtasks>
    {json.dumps(subtasks)}
    </subtasks>

    The current subtask is:
    <current_subtask>
    {current_subtask}
    </current_subtask>

    The reasoning for this subtask is:
    <reasoning>
    {reasoning}
    </reasoning>

    Determine the most appropriate action to take:
    - If the task requires a calculation or verification through code, use the 'generate_code' action.
    - If the task requires reasoning without code or calculations, use the 'reasoning' action.

    Consider the overall goal and previous results when determining the action.

    Return the result as a JSON object with 'action' and 'parameters' keys. 
    The 'parameters' key should always be a dictionary with 'prompt' as a key.

    Example JSON responses:
    {{
        "action": "generate_code",
        "parameters": {{"prompt": "Write a function to calculate the area of a circle."}}
    }}
    {{
        "action": "reasoning",
        "parameters": {{"prompt": "Explain how to complete the subtask."}}
    }}
    """
    response = get_llm_response('groq', prompt, json_mode=True)
    response_json = json.loads(response)
    return response_json


In [85]:
def generate_and_execute_code(prompt: str, user_query: str, memory: List[Dict[str, Any]]) -> Dict[str, Any]:
    code_generation_prompt = f"""
    Generate Python code to implement the following task: '{prompt}'

    Here is the overall goal of answering the user's query: '{user_query}'

    Keep in mind the results of the previous subtasks, adn use them to complete the current subtask.
    <memory>
    {json.dumps(memory)}
    </memory>

    Here are the guidelines for generating the code:
    - Return only the Python code, without any explanations or markdown formatting.
    - The code should always print or return a value.
    - Don't include any backticks or code blocks in your response. Do not include ```python or ``` in your response, just give me the code.
    - Do not ever use the input() function in your code, use defined values instead.
    - Do not ever use NLP techniques in your code, such as importing nltk, spacy, or any other NLP library.
    - Don't ever define a function in your code, just generate the code to execute the subtask.
    - Don't ever provide the execution result in your response, just give me the code.
    - If your code needs to import any libraries, do it within the code itself.
    - The code should be self-contained and ready to execute on its own.
    - Prioritize explicit details over assumed patterns.
    - Avoid unnecessary complications in problem-solving.
    """

    # Step 1: Generate the code using the LLM
    generated_code = get_llm_response("groq", code_generation_prompt)
    print(f"\n\nGenerated Code: start|{generated_code}|END\n\n")

    # Step 2: Write the generated code to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
        temp_file_path = temp_file.name
        temp_file.write(generated_code.encode("utf-8"))

    try:
        # Step 3: Execute the code in a sandboxed subprocess
        result = subprocess.run(
            ["python", temp_file_path],  # Command to execute the Python file
            capture_output=True,         # Capture stdout and stderr
            text=True,                   # Decode output as text
            timeout=5                    # Timeout in seconds to prevent infinite loops
        )

        # Step 4: Check the result
        if result.returncode == 0:  # Successful execution
            execution_output = result.stdout.strip()
        else:  # Error during execution
            execution_output = f"Error: {result.stderr.strip()}"

    except subprocess.TimeoutExpired:
        execution_output = "Error: Code execution timed out."

    except Exception as e:
        execution_output = f"Error during execution: {str(e)}"

    finally:
        # Step 5: Clean up the temporary file
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)

    # Step 6: Return the result
    return {
        "generated_code": generated_code,
        "execution_result": execution_output
    }




def executor(action: str, parameters: Dict[str, Any], user_query: str, memory: List[Dict[str, Any]]) -> Any:
    if action == "generate_code":
        print(f"Generating code for: {parameters['prompt']}")
        return generate_and_execute_code(parameters["prompt"], user_query, memory)
    elif action == "reasoning":
        return parameters["prompt"]
    else:
        return f"Action '{action}' not implemented"

In [86]:
def evaluator(user_query: str, subtasks: List[str], current_subtask: str, action_info: Dict[str, Any], execution_result: Dict[str, Any], memory: List[Dict[str, Any]]) -> Dict[str, Any]:
    prompt = f"""Given the user's query (long-term goal): '{user_query}'

    The subtasks to complete to answer the user's query are:
    
        {json.dumps(subtasks)}
    

    The current subtask to complete is:
    
        {current_subtask}
    

    The result of the current subtask is:
    
        {action_info}
    

    The execution result of the current subtask is:
    
        {execution_result}
    

    Here is the short-term memory (result of previous subtasks):
    
        {json.dumps(memory)}
    

    Evaluate if the result is a reasonable answer for the current subtask, and makes sense in the context of the overall query.

    Return a JSON object with 'evaluation' (string) and 'retry' (boolean) keys.

    Example JSON response:
    {{
        "evaluation": "The result is a reasonable answer for the current subtask.",
        "retry": false
    }}
    """

    response = json.loads(get_llm_response("groq", prompt, json_mode=True))
    return response

def final_answer_extractor(user_query: str, subtasks: List[str], memory: List[Dict[str, Any]]) -> str:
    prompt = f"""Given the user's query (long-term goal): '{user_query}'

    The subtasks completed to answer the user's query are:
    
        {json.dumps(subtasks)}
    

    The memory of the thought process (short-term memory) is:
    
        {json.dumps(memory)}
    

    Extract the final answer that directly addresses the user's query, from the memory.
    Provide only the essential information without unnecessary explanations.

    Return a JSON object with 'finalAnswer' as a key.

    Here is an example JSON response:
    {{
        "finalAnswer": "The final answer to the user's query, addressing all aspects of the question, based on the memory provided",
    }}
    """

    response = json.loads(get_llm_response("groq", prompt, json_mode=True))
    return response["finalAnswer"]

In [93]:
def autonomous_agent(user_query: str) -> List[Dict[str, Any]]:
    memory = []
    subtasks = planner(user_query)

    print("User Query:", user_query)
    print(f"Subtasks: {subtasks}")

    for subtask in subtasks:
        max_retries = 1
        for attempt in range(max_retries):

            reasoning = reasoner(user_query, subtasks, subtask, memory)
            action_info = actioner(user_query, subtasks, subtask, reasoning, memory)

            print(f"\n\n ****** Action Info: {action_info} ****** \n\n")

            execution_result = executor(action_info["action"], action_info["parameters"], user_query, memory)

            print(f"\n\n ****** Execution Result: {execution_result} ****** \n\n")
            evaluation = evaluator(user_query, subtasks, subtask, action_info, execution_result, memory)

            step = {
                "subtask": subtask,
                "reasoning": reasoning,
                "action": action_info,
                "evaluation": evaluation
            }
            memory.append(step)

            print(f"\n\nSTEP: {step}\n\n")

            if not evaluation["retry"]:
                break

            if attempt == max_retries - 1:
                print(f"Max retries reached for subtask: {subtask}")

    final_answer = final_answer_extractor(user_query, subtasks, memory)
    return final_answer

## Sample Query # 1

In [95]:
query = "The surgeon, who is the boy's father, says, 'I can't operate on this boy, he's my son!' Who is the surgeon to the boy?"
result = get_llm_response("openai", query)
print(result)

The surgeon is the boy's mother. The scenario highlights a common assumption that surgeons are male, but in this case, the surgeon is a woman.


In [97]:
query = "The surgeon, who is the boy's father, says, 'I can't operate on this boy, he's my son!' Who is the surgeon to the boy?"
result = autonomous_agent(query)
print("FINAL ANSWER: ", result)

{'Parsed Response:': {'subtasks': ['reasoning: Identify the entities in the '
                                   "user's query, such as the surgeon and the "
                                   'boy.',
                                   'reasoning: Determine the relationship '
                                   'between the surgeon and the boy in the '
                                   'given context.',
                                   'reasoning: Use the identified relationship '
                                   "to determine the surgeon's role to the "
                                   'boy.',
                                   'generate_code: Analyze the constructed '
                                   'relationship to output the role of the '
                                   'surgeon to the boy.']}}
["reasoning: Identify the entities in the user's query, such as the surgeon and the boy.", 'reasoning: Determine the relationship between the surgeon and the boy in the given con

## Sample Query # 2

In [98]:
prompt = "The Bear Puzzle: A hunter leaves his tent. He travels 5 steps due south, 5 steps due east, and 5 steps due north. He arrives back at his tent, and sees a brown bear inside it. What color was the bear?"

result = get_llm_response("openai", prompt)
print(result)

The bear was white. The only place on Earth where it's possible to travel 5 steps south, then 5 steps east, and finally 5 steps north and end up back at the original starting point (the tent) is at the North Pole. Since polar bears are the only bears that live in the Arctic region, and they have white fur, the bear inside the tent would be white.


In [100]:
prompt = """The Bear Puzzle: A hunter leaves his tent. He travels 5 steps due south, 5 steps due east, and 5 steps due north. 
He arrives back at his tent, and sees a brown bear inside it. What color was the bear?"""

result = autonomous_agent(prompt)
print(result)

{'Parsed Response:': {'subtasks': ['reasoning: Understand the problem '
                                   'statement and identify any assumptions',
                                   'reasoning: Identify the implications of '
                                   "the hunter's movements on the bear's "
                                   'location',
                                   'reasoning: Deduce the necessary conditions '
                                   'for the bear to be in the tent when the '
                                   'hunter returns',
                                   'reasoning: Conclude the color of the bear '
                                   'based on the absence of any relevant '
                                   'information about its color in the '
                                   'surroundings',
                                   "generate_code: Observe that the bear's "
                                   "color was not affected by the hunter's "
      