In [None]:
! pip install openai groq

Collecting groq
  Downloading groq-0.12.0-py3-none-any.whl.metadata (13 kB)
Downloading groq-0.12.0-py3-none-any.whl (108 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.9/108.9 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.12.0


In [None]:
from openai import OpenAI
from google.colab import userdata
import os
import json
from groq import Groq
import json
from typing import List, Dict, Any, Callable
import ast
import io
import sys

groq_api_key = userdata.get("GROQ_API_KEY")
os.environ['GROQ_API_KEY'] = groq_api_key

openrouter_api_key = userdata.get("OPENROUTER_API_KEY")
os.environ['OPENROUTER_API_KEY'] = openrouter_api_key

openai_api_key = userdata.get("OPENAI_API_KEY")
os.environ['OPENAI_API_KEY'] = openai_api_key

groq_client = Groq(api_key=os.getenv('GROQ_API_KEY'))

openrouter_client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=os.getenv("OPENROUTER_API_KEY")
)

openai_client = OpenAI(
    base_url="https://api.openai.com/v1",
    api_key=os.getenv("OPENAI_API_KEY")
)

In [None]:
def get_llm_response(client, prompt, openai_model="o1-preview", json_mode=False):

    if client == "openai":

        kwargs = {
            "model": openai_model,
            "messages": [{"role": "user", "content": prompt}]
        }

        if json_mode:
            kwargs["response_format"] = {"type": "json_object"}

        response = openai_client.chat.completions.create(**kwargs)

    elif client == "groq":

        try:
            models = ["llama-3.1-8b-instant", "llama-3.1-70b-versatile", "llama3-70b-8192", "llama3-8b-8192", "gemma2-9b-it"]

            for model in models:

                try:
                    kwargs = {
                        "model": model,
                        "messages": [{"role": "user", "content": prompt}]
                    }
                    if json_mode:
                        kwargs["response_format"] = {"type": "json_object"}

                    response = groq_client.chat.completions.create(**kwargs)

                    break

                except Exception as e:
                    print(f"Error: {e}")
                    continue

        except Exception as e:
            print(f"Error: {e}")

            kwargs = {
                "model": "meta-llama/llama-3.1-8b-instruct:free",
                "messages": [{"role": "user", "content": prompt}]
            }

            if json_mode:
                kwargs["response_format"] = {"type": "json_object"}

            response = openrouter_client.chat.completions.create(**kwargs)

    else:
        raise ValueError(f"Invalid client: {client}")

    return response.choices[0].message.content


def evaluate_responses(prompt, reasoning_prompt=False, openai_model="o1-preview"):

    if reasoning_prompt:
        prompt = f"{prompt}\n\n{reasoning_prompt}."

    openai_response = get_llm_response("openai", prompt, openai_model)
    groq_response = get_llm_response("groq", prompt)

    print(f"OpenAI Response: {openai_response}")
    print(f"\n\nGroq Response: {groq_response}")

In [None]:
prompt1 = "How many r's are in the word 'strawberry'?"
evaluate_responses(prompt1)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
evaluate_responses(prompt1, openai_model="gpt-3.5-turbo")

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
prompt2 = "9.9 or 9.11, which number is bigger?"
evaluate_responses(prompt2)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
reasoning_prompt = "Let's first understand the problem and devise a plan to solve it. Then, let's carry out the plan and solve the problem step by step."
evaluate_responses(prompt1, reasoning_prompt)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
def planner(user_query: str) -> List[str]:
    prompt = f"""Given the user's query: '{user_query}', break down the query into as few subtasks as possible in order to anser the question.
    Each subtask is either a calculation or reasoning step. Never duplicate a task.

    Here are the only 2 actions that can be taken for each subtask:
        - generate_code: This action involves generating Python code and executing it in order to make a calculation or verification.
        - reasoning: This action involves providing reasoning for what to do to complete the subtask.

    Each subtask should begin with either "reasoning" or "generate_code".

    Keep in mind the overall goal of answering the user's query throughout the planning process.

    Return the result as a JSON list of strings, where each string is a subtask.

    Here is an example JSON response:

    {{
        "subtasks": ["Subtask 1", "Subtask 2", "Subtask 3"]
    }}
    """
    response = json.loads(get_llm_response("groq", prompt, json_mode=True))
    print(response)
    return response["subtasks"]

def reasoner(user_query: str, subtasks: List[str], current_subtask: str, memory: List[Dict[str, Any]]) -> str:
    prompt = f"""Given the user's query (long-term goal): '{user_query}'

    Here are all the subtasks to complete in order to answer the user's query:

        {json.dumps(subtasks)}


    Here is the short-term memory (result of previous subtasks):

        {json.dumps(memory)}


    The current subtask to complete is:

        {current_subtask}


    - Provide concise reasoning on how to execute the current subtask, considering previous results.
    - Prioritize explicit details over assumed patterns
    - Avoid unnecessary complications in problem-solving

    Return the result as a JSON object with 'reasoning' as a key.

    Example JSON response:
    {{
        "reasoning": "2 sentences max on how to complete the current subtask."
    }}
    """

    response = json.loads(get_llm_response("groq", prompt, json_mode=True))
    return response["reasoning"]

def actioner(user_query: str, subtasks: List[str], current_subtask: str, reasoning: str, memory: List[Dict[str, Any]]) -> Dict[str, Any]:
    prompt = f"""Given the user's query (long-term goal): '{user_query}'

    The subtasks are:

        {json.dumps(subtasks)}


    The current subtask is:

        {current_subtask}


    The reasoning for this subtask is:

        {reasoning}


    Here is the short-term memory (result of previous subtasks):

        {json.dumps(memory)}


    Determine the most appropriate action to take:
        - If the task requires a calculation or verification through code, use the 'generate_code' action.
        - If the task requires reasoning without code or calculations, use the 'reasoning' action.

    Consider the overall goal and previous results when determining the action.

    Return the result as a JSON object with 'action' and 'parameters' keys.  The 'parameters' key should always be a dictionary with 'prompt' as a key.

    Example JSON responses:

    {{
        "action": "generate_code",
        "parameters": {{"prompt": "Write a function to calculate the area of a circle."}}
    }}

    {{
        "action": "reasoning",
        "parameters": {{"prompt": "Explain how to complete the subtask."}}
    }}
    """

    response = json.loads(get_llm_response("groq", prompt, json_mode=True))
    return response

def evaluator(user_query: str, subtasks: List[str], current_subtask: str, action_info: Dict[str, Any], execution_result: Dict[str, Any], memory: List[Dict[str, Any]]) -> Dict[str, Any]:
    prompt = f"""Given the user's query (long-term goal): '{user_query}'

    The subtasks to complete to answer the user's query are:

        {json.dumps(subtasks)}


    The current subtask to complete is:

        {current_subtask}


    The result of the current subtask is:

        {action_info}


    The execution result of the current subtask is:

        {execution_result}


    Here is the short-term memory (result of previous subtasks):

        {json.dumps(memory)}


    Evaluate if the result is a reasonable answer for the current subtask, and makes sense in the context of the overall query.

    Return a JSON object with 'evaluation' (string) and 'retry' (boolean) keys.

    Example JSON response:
    {{
        "evaluation": "The result is a reasonable answer for the current subtask.",
        "retry": false
    }}
    """

    response = json.loads(get_llm_response("groq", prompt, json_mode=True))
    return response

def final_answer_extractor(user_query: str, subtasks: List[str], memory: List[Dict[str, Any]]) -> str:
    prompt = f"""Given the user's query (long-term goal): '{user_query}'

    The subtasks completed to answer the user's query are:

        {json.dumps(subtasks)}


    The memory of the thought process (short-term memory) is:

        {json.dumps(memory)}


    Extract the final answer that directly addresses the user's query, from the memory.
    Provide only the essential information without unnecessary explanations.

    Return a JSON object with 'finalAnswer' as a key.

    Here is an example JSON response:
    {{
        "finalAnswer": "The final answer to the user's query, addressing all aspects of the question, based on the memory provided",
    }}
    """

    response = json.loads(get_llm_response("groq", prompt, json_mode=True))
    return response["finalAnswer"]


def generate_and_execute_code(prompt: str, user_query: str, memory: List[Dict[str, Any]]) -> Dict[str, Any]:
    code_generation_prompt = f"""

    Generate Python code to implement the following task: '{prompt}'

    Here is the overall goal of answering the user's query: '{user_query}'

    Keep in mind the results of the previous subtasks, and use them to complete the current subtask.

        {json.dumps(memory)}



    Here are the guidelines for generating the code:
        - Return only the Python code, without any explanations or markdown formatting.
        - The code should always print or return a value
        - Don't include any backticks or code blocks in your response. Do not include ```python or ``` in your response, just give me the code.
        - Do not ever use the input() function in your code, use defined values instead.
        - Do not ever use NLP techniques in your code, such as importing nltk, spacy, or any other NLP library.
        - Don't ever define a function in your code, just generate the code to execute the subtask.
        - Don't ever provide the execution result in your response, just give me the code.
        - If your code needs to import any libraries, do it within the code itself.
        - The code should be self-contained and ready to execute on its own.
        - Prioritize explicit details over assumed patterns
        - Avoid unnecessary complications in problem-solving
    """

    generated_code = get_llm_response("groq", code_generation_prompt)


    print(f"\n\nGenerated Code: start|{generated_code}|END\n\n")

    old_stdout = sys.stdout
    sys.stdout = buffer = io.StringIO()

    exec(generated_code)

    sys.stdout = old_stdout
    output = buffer.getvalue()

    print(f"\n\n***** Execution Result: |start|{output.strip()}|end| *****\n\n")

    return {
        "generated_code": generated_code,
        "execution_result": output.strip()
    }

def executor(action: str, parameters: Dict[str, Any], user_query: str, memory: List[Dict[str, Any]]) -> Any:
    if action == "generate_code":
        print(f"Generating code for: {parameters['prompt']}")
        return generate_and_execute_code(parameters["prompt"], user_query, memory)
    elif action == "reasoning":
        return parameters["prompt"]
    else:
        return f"Action '{action}' not implemented"



def autonomous_agent(user_query: str) -> List[Dict[str, Any]]:
    memory = []
    subtasks = planner(user_query)

    print("User Query:", user_query)
    print(f"Subtasks: {subtasks}")

    for subtask in subtasks:
        max_retries = 1
        for attempt in range(max_retries):

            reasoning = reasoner(user_query, subtasks, subtask, memory)
            action_info = actioner(user_query, subtasks, subtask, reasoning, memory)

            print(f"\n\n ****** Action Info: {action_info} ****** \n\n")

            execution_result = executor(action_info["action"], action_info["parameters"], user_query, memory)

            print(f"\n\n ****** Execution Result: {execution_result} ****** \n\n")
            evaluation = evaluator(user_query, subtasks, subtask, action_info, execution_result, memory)

            step = {
                "subtask": subtask,
                "reasoning": reasoning,
                "action": action_info,
                "evaluation": evaluation
            }
            memory.append(step)

            print(f"\n\nSTEP: {step}\n\n")

            if not evaluation["retry"]:
                break

            if attempt == max_retries - 1:
                print(f"Max retries reached for subtask: {subtask}")

    final_answer = final_answer_extractor(user_query, subtasks, memory)
    return final_answer



In [None]:
query = "How many r's are in strawberry?"
result = autonomous_agent(query)
print("FINAL ANSWER: ", result)

{'subtasks': ["reasoning: Identify the string 'strawberry' which contains the letters we want to count.", "generate_code: Calculate the number of 'r's and 'R's in the string 'strawberry'.", "generate_code: Combine the counts of lower-case 'r's and upper-case 'R's to get the total count."]}
User Query: How many r's are in strawberry?
Subtasks: ["reasoning: Identify the string 'strawberry' which contains the letters we want to count.", "generate_code: Calculate the number of 'r's and 'R's in the string 'strawberry'.", "generate_code: Combine the counts of lower-case 'r's and upper-case 'R's to get the total count."]


 ****** Action Info: {'action': 'reasoning', 'parameters': {'prompt': "Identify the string 'strawberry' which contains the letters we want to count."}} ****** 




 ****** Execution Result: Identify the string 'strawberry' which contains the letters we want to count. ****** 




STEP: {'subtask': "reasoning: Identify the string 'strawberry' which contains the letters we wan

In [None]:

result

'The final answer is 2.'

In [None]:
query = "The surgeon, who is the boy's father, says, 'I can't operate on this boy, he's my son!' Who is the surgeon to the boy?"
result = get_llm_response("openai", query)
print(result)


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
prompt = "The Bear Puzzle: A hunter leaves his tent. He travels 5 steps due south, 5 steps due east, and 5 steps due north. He arrives back at his tent, and sees a brown bear inside it. What color was the bear?"

result = get_llm_response("openai", prompt)
print(result)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
prompt = "The Bear Puzzle: A hunter leaves his tent. He travels 5 steps due south, 5 steps due east, and 5 steps due north. He arrives back at his tent, and sees a brown bear inside it. What color was the bear?"

result = autonomous_agent(prompt)
print(result)


Error: Error code: 400 - {'error': {'message': "Failed to generate JSON. Please adjust your prompt. See 'failed_generation' for more details.", 'type': 'invalid_request_error', 'code': 'json_validate_failed', 'failed_generation': '{\n   "subtasks": [\n       "reasoning: Determine the initial position of the hunter and the boundaries of the camp.",\n       "reasoning: Analyze the movement of the hunter to determine the location where the bear would originally be if the hunter hadn\'t returned to camp.",\n       "reasoning: Deduce that since the hunter arrives back at camp, the bear must be in the same location the hunter left it in ("in the bear\'s den"),",\n       "reasoning: Knowing that the hunter arrives back at camp, whether the camp is on firm land or on ice on land of melting ice, it does not make a difference",\n       "reasoning: There are two pieces of information that would interfere with the outcome \'if the hunter and the bear did not meet in the middle of the camp\', they\