# Building your first OpenAI Assistant

In [1]:
from dotenv import load_dotenv
import json
import logging
from openai import  AsyncOpenAI
from openai.types.beta import Thread
from openai.types.beta.threads import Run
from pydantic import BaseModel
from typing import Dict, List

In [2]:
load_dotenv(override=True)

# logging.basicConfig(level=logging.INFO)
# log = logging.getLogger(__name__)

True

## 1. Define the Assistant class

In [3]:
class AssistantResult(BaseModel):
    response: str
    thread_id: str

class MaxTurnsReachedException(Exception):
    def __init__(self):
        super().__init__("Reached maximum number of turns")

In [4]:
class Assistant:
    def __init__(self, assistant_id: str, tools: Dict[str, callable]):
        self.client = AsyncOpenAI()
        self.assistant_id = assistant_id
        self.assistant = None
        self.tools = tools
    
    async def retrieve_assistant(self):
        if self.assistant is None:
            self.assistant = await self.client.beta.assistants.retrieve(self.assistant_id)
        return self.assistant
    
    async def run(self, query: str, thread_id: str = None, max_turns: int = 5) -> AssistantResult:
        await self.retrieve_assistant()

        print(f"Running assistant with thread_id {thread_id}")

        if(thread_id is None):
            thread: Thread = await self.client.beta.threads.create()
            print(f"Created new thread with id {thread.id}")
        else:
            thread: Thread = await self.client.beta.threads.retrieve(thread_id)
            print(f"Retrieved thread with id {thread.id}")
        
        print(f"Sending query to thread {thread.id}: {query}")
        await self.client.beta.threads.messages.create(
            thread_id=thread.id, role="user", content=query
        )

        run: Run = await self.client.beta.threads.runs.create_and_poll(
            thread_id=thread.id,
            assistant_id=self.assistant_id,
        )

        for turn in range(max_turns):

            # Fetch the last message from the thread
            messages = await self.client.beta.threads.messages.list(
                thread_id=thread.id,
                run_id=run.id,
                order="desc",
                limit=1,
            )
            print(f"Fetched last message from thread {thread.id}: {messages}")

            # Check for the terminal state of the Run.
            # If state is "completed", exit agent loop and return the LLM response.
            if run.status == "completed":
                print(f"Run completed for thread {thread.id}")
                assistant_res: str = next(
                    (
                        content.text.value
                        for content in messages.data[0].content
                        if content.type == "text"
                    ),
                    None,
                )

                return AssistantResult(thread_id=thread.id, response=assistant_res)
            
            # If state is "requires_action", function calls are required. Execute the functions and send their outputs to the LLM.
            if run.status == "requires_action":
                func_tool_outputs = []

                # LLM can ask for multiple functions to be executed. Execute all function calls in loop and
                # append the results into `func_tool_outputs` list.
                for tool in run.required_action.submit_tool_outputs.tool_calls:
                    # parse the arguments required for the function call from the LLM response
                    args = (
                        json.loads(tool.function.arguments)
                        if tool.function.arguments
                        else {}
                    )

                    try:
                        print("Running function {} with args {} for thread {}".format(tool.function.name, args, thread.id))
                        func_output = await self.tools[tool.function.name](**args)
                        print("Function outputs: {}".format(func_output))
                    except Exception as e:
                        print("Error in running function {}: {}".format(tool.function.name, e))
                        func_output = f'Error in running function {tool.function.name}: {e}'

                    # OpenAI needs the output of the function call against the tool_call_id
                    func_tool_outputs.append(
                        {"tool_call_id": tool.id, "output": str(func_output)}
                    )

                # Submit the function call outputs back to OpenAI
                run = await self.client.beta.threads.runs.submit_tool_outputs_and_poll(
                    thread_id=thread.id, run_id=run.id, tool_outputs=func_tool_outputs
                )

                # Continue the agent loop.
                # Agent will check the output of the function output submission as part of next iteration.
                continue

            # Handle errors if terminal state is "failed"
            else:
                if run.status == "failed":
                    print(
                        f"OpenAIFunctionAgent turn-{turn+1} | Run failure reason: {run.last_error}"
                    )

                raise Exception(
                    f"Failed to generate text due to: {run.last_error}"
                )
        
        # Raise error if turn-limit is reached.
        await self.client.beta.threads.runs.cancel(run.id,thread_id=thread_id)
        raise MaxTurnsReachedException()
    
    async def cancel_thread_run(self, thread_id: str):
        thread: Thread = await self.client.beta.threads.retrieve(thread_id)
        run: Run = await self.client.beta.threads.runs.list(thread_id=thread.id).data[0]

        await self.client.beta.threads.runs.cancel(run.id,thread_id=thread_id)

## 2. Define the Assistant Tools

In [5]:
import subprocess
import os


# define an isolated working directory for the agent
AGENT_WORKING_DIRECTORY = "./workdir"

async def list_repositories(directory : str = None) -> str:
    return os.popen("ls -lat " + AGENT_WORKING_DIRECTORY + " | awk '{print $6, $7, $8, $9}'").read()


async def run_git_command(command_arguments: str) -> str:
    try:
        result = subprocess.run("git " + command_arguments, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
        return result.stdout.strip()
    except Exception as e:
        return str(e)

In [6]:
# {
#   "name": "run_git_command",
#   "description": "Execute a git command and retrieve the result or error message. Example usage: 'run_git_command(command_arguments=\"status\")' to check the status of the git repository.",
#   "strict": true,
#   "parameters": {
#     "type": "object",
#     "properties": {
#       "command_arguments": {
#         "type": "string",
#         "description": "The git command arguments to execute, e.g. 'status', 'commit -m \"message\"', 'push origin main'"
#       }
#     },
#     "additionalProperties": false,
#     "required": [
#       "command_arguments"
#     ]
#   }
# }

# {
#   "name": "list_repositories",
#   "description": "Lists repositories in the specified directory",
#   "strict": true,
#   "parameters": {
#     "type": "object",
#     "properties": {
#       "directory": {
#         "type": "string",
#         "description": "The directory path to list repositories from (default is None)"
#       }
#     },
#     "additionalProperties": false,
#     "required": [
#       "directory"
#     ]
#   }
# }

## 3. Create the OpenAI Assistant using the OpenAI Playground

1. Navigate to [OpenAI Assistant Playground](https://platform.openai.com/playground/assistants)
2. Create a new Assistant and name following the format: `<name_first_letter>_<surname>_first_assistant`
3. Define the System Instructions
4. Select gpt-4o-mini as the model
5. Generate a new Function definition

## 4. Instantiate the Assistant

In [7]:
# You are an AI agent for managing and executing git actions on code repositories.
# Your focus is to perform the requested operations running git commands on the requested repositories.

# # Example session

# Question: Give the hash of the last commit for the github repository at https://github.com/grosa1/pyszz.git

# (print this thougth)
# First I need to look if the repository exists in my working directory, otherwise I have to clone it. Second, I have to extract the commit log using the git action.

# (now call the function) 
# list_repositories(workdir)

# (you receive the function output)
# Oct 20 01:05 grosa1_pyszz

# (print this thougth)
# I have the repository in ./workdir/grosa1_pyszz. Now I will extract the commit log.

# (now call the function) 
# run_git_command: --git-dir=./workdir/grosa1_pyszz/.git log -1 --pretty=format:"%H"

# (you receive the function output)
# 579e555005986be6a7249000510c07d3981485a3

# (You then output)
# The requested commit hash is 579e555005986be6a7249000510c07d3981485a3.

# # Notes
# - You can only operate in the directory './workdir' with git repositories. Ensure to act based on those rules:
#     - If a repository is not present, clone it there from github. This must be performed only once during the session. If a local copy already exists, do not clone it again.
#     - Use the existing repositories, but update them with a pull before usage if the last update time is more than 1 hour ago (when listing the repositories, you can see the last update time).
#     - To handle complex requests, you can run multiple git commands in a sequence (e.g. perform several actions of git log commands to compose the information you need).
#     - The naming format for the repositories is the same as the one used in the example above, e.g. 'owner_repositoryname'.

In [8]:
ASSISTANT_ID = "asst_K5m3YODjebQvZjwomkOJlXEg"
TOOLS = {
    "run_git_command": run_git_command,
    "list_repositories": list_repositories
    }

In [9]:
assistant = Assistant(ASSISTANT_ID, TOOLS)

## 5. Use the Assistant

In [None]:
query = """
Consider the github repository docker/genai-stack.
Return the commit message and hash for the commits that fix a bug and reference a specific issue tag (contains tag # with number).
Exclude the commits that are not referenced to an issue tag.
"""

res = await assistant.run(query=query, max_turns=5)
print("###########################################\n")
print(f"Thread ID: {res.thread_id}")
print(f"Response:\n{res.response}")

Running assistant with thread_id None
Created new thread with id thread_R7Np4mVGt34b49IqvUWKtSgd
Sending query to thread thread_R7Np4mVGt34b49IqvUWKtSgd: 
Consider the github repository docker/genai-stack.
Return the commit message and hash for the commits that fix a bug and reference a specific issue tag (contains tag # with number).
Exclude the commits that are not referenced to an issue tag.

Fetched last message from thread thread_R7Np4mVGt34b49IqvUWKtSgd: AsyncCursorPage[Message](data=[Message(id='msg_2VHaNrSUseZ91A9zBcfYCUln', assistant_id='asst_K5m3YODjebQvZjwomkOJlXEg', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="First, I need to check if the repository `docker/genai-stack` exists in my working directory. If it doesn't, I will clone it. Then, I will extract the commit log to find the commits that fix a bug and reference a specific issue tag.\n\nNow, I will list the repositories in the working directory."), type='text')], created

In [None]:
query = """
Return 2 or 3 fix commits hashes and messages that fixed a specific github issue for the repository DarkCaster/Perpetual.
Specifically, the commit must refer to the commit hash that introduced the fixed bug.

An example is a commits having as message: "Fix bug introduced in 3a4f5e6".

An approach could be:
- First look for commits that have a message that contains the words "fix" and "bug"
- Discard those that do not contain the word "introduced" and the commit hash
- Manually check if the commit hash is teh reference to the commit that introduced the bug which is fixed
"""

res = await assistant.run(query=query, max_turns=5)
print("###########################################\n")
print(f"Thread ID: {res.thread_id}")
print(f"Response:\n{res.response}")

Running assistant with thread_id None
Created new thread with id thread_ohWixO8WzFWBSrLxiah4i5rw
Sending query to thread thread_ohWixO8WzFWBSrLxiah4i5rw: 
Return 2 or 3 fix commits hashes and messages that fixed a specific github issue for the repository DarkCaster/Perpetual .
Specifically, the commit must refer to the commit hash that introduced the fixed bug.

An example is a commits having as message: "Fix bug introduced in 3a4f5e6".

An approach could be:
- First look for commits that have a message that contains the words "fix" and "bug"
- Discard those that do not contain the word "introduced" and the commit hash
- Manually check if the commit hash is teh reference to the commit that introduced the bug which is fixed

Fetched last message from thread thread_ohWixO8WzFWBSrLxiah4i5rw: AsyncCursorPage[Message](data=[Message(id='msg_iPN8x0GIS5vniF6TKvlH9zRm', assistant_id='asst_K5m3YODjebQvZjwomkOJlXEg', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotatio