In [1]:
# Installations

%pip install --upgrade pip -q
%pip install mypy -q

# %pip install numpy matplotlib pandas scipy -q
# %pip install setuptools wandb -q

%pip install openai python-dotenv -q

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# https://stackoverflow.com/questions/63142182/to-what-extent-does-google-colab-support-python-typing

from IPython.core.magic import register_cell_magic
from IPython import get_ipython
from mypy import api

@register_cell_magic
def mypy(line, cell):
  for output in api.run(['-c', '\n' + cell] + line.split()):
    if output and not output.startswith('Success'):
      raise TypeError(output)

  get_ipython().run_cell(cell)

In [3]:
from dataclasses import dataclass, field
from enum import Enum

from openai import OpenAI
from openai.types.chat import (
    ChatCompletionMessage,
    ChatCompletionSystemMessageParam,
    ChatCompletionUserMessageParam,
    ChatCompletionAssistantMessageParam,
    ChatCompletionToolMessageParam,
    ChatCompletionFunctionMessageParam,
  )

from openai.types.chat.completion_create_params import ResponseFormat
from openai._types import NotGiven

from dotenv import dotenv_values

# WANDB = False

# if WANDB:
#   from wandb.integration.openai import autolog
#   autolog({"project": "First RAG App"})

client = OpenAI(api_key=dotenv_values("../.env")["OPENAI_API_KEY"])

RawConversationMessage = ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam | ChatCompletionAssistantMessageParam | ChatCompletionToolMessageParam | ChatCompletionFunctionMessageParam

class Role(Enum):
  System = "system"
  User = "user"
  Assistant = "assistant"
  Tool = "tool"

class AiModel(Enum):
  Gpt3_5Turbo = "gpt-3.5-turbo"

class AssistantTool(Enum):
   CodeInterpreter = "code_interpreter"
   FileSearch = "file_search"
   Function = "function"

@dataclass(frozen=True, order=True, slots=True)
class Message():
  role: Role
  content: str

  def into(self) -> RawConversationMessage:
    return {"role": self.role.value, "content": self.content} # type:ignore

  @classmethod
  def from_raw(cls, raw: ChatCompletionMessage) -> "Message":
    return cls(Role(raw.role), raw.content or "")

def system(message: str) -> Message:
  return Message(Role.System, message)

def user(message: str) -> Message:
  return Message(Role.User, message)

def respond(messages: list[Message], response_format: ResponseFormat | NotGiven=NotGiven(), model: AiModel = AiModel.Gpt3_5Turbo) -> Message:
  raw_messages = [message.into() for message in messages]

  return Message.from_raw(
    client.chat.completions.create(
      model=model.value,
      messages=raw_messages,
      response_format=response_format
    ).choices[0].message
  )

@dataclass(slots=True)
class Chatbot:
    messages: list[Message] = field(default_factory=list)

    def respond(self, response_format: ResponseFormat | NotGiven=NotGiven()) -> Message:
        response = respond(self.messages, response_format = response_format)
        self.messages.append(response)
        return response

    def converse(self, chat: Message, response_format: ResponseFormat | NotGiven=NotGiven()) -> Message:
        self.messages.append(chat)
        return self.respond(response_format=response_format)

In [4]:
from time import sleep
from pprint import pprint

frankenstien_file = client.files.create(
    file=open("./frankenstien.txt", "rb"),
    purpose="assistants"
)

processed_documents = client.beta.vector_stores.create(
    name="Frankenstien Documents"
)

vector_store_file = client.beta.vector_stores.files.create(
  vector_store_id=processed_documents.id,
  file_id=frankenstien_file.id
)

assistant = client.beta.assistants.create(
    name="Frank(enstien)",
    instructions="""
You are a librarian who'd like to answer questions about the book Frankenstien by Mary Shelley.
The book is included in a file for your reference; 
when you answer, you cite your source in the book and explain why you're correct.
If you don't know the answer, say so.
""",
    model=AiModel.Gpt3_5Turbo.value,
    tools=[{"type": "file_search"}],
    tool_resources={"file_search": {"vector_store_ids": [processed_documents.id]}}
)

thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=f"What is the first words Victor Frankenstein speaks?"
)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

# Wait for Run to Complete
while run.status == "in_progress" or run.status == "queued":
  sleep(1)
  print(run.status)
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )

# Collect Messages from the Thread
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

client.beta.assistants.delete(assistant.id)
client.beta.vector_stores.delete(processed_documents.id)
client.files.delete(frankenstien_file.id)
client.beta.threads.delete(thread.id)

pprint(messages)

messages

queued
in_progress
in_progress
in_progress
SyncCursorPage[Message](data=[Message(id='msg_QAhSfbhQKO42FVpqXlFOthUY', assistant_id='asst_8AdQfwoHc1q2WoXurPu4r9Ov', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[FileCitationAnnotation(end_index=407, file_citation=FileCitation(file_id='file-LsiTe1nwVdzfrKgrlWkqnmRN'), start_index=395, text='【4:2†source】', type='file_citation')], value='The first words spoken by Victor Frankenstein in the book "Frankenstein" by Mary Shelley are, "Cursed be the day, abhorred devil, in which you first saw light! Cursed (although I curse myself) be the hands that formed you! You have made me wretched beyond expression. You have left me no power to consider whether I am just to you or not. Begone! Relieve me from the sight of your detested form"【4:2†source】.'), type='text')], created_at=1722303860, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_GuyBTi7rtWCLZ38

SyncCursorPage[Message](data=[Message(id='msg_QAhSfbhQKO42FVpqXlFOthUY', assistant_id='asst_8AdQfwoHc1q2WoXurPu4r9Ov', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[FileCitationAnnotation(end_index=407, file_citation=FileCitation(file_id='file-LsiTe1nwVdzfrKgrlWkqnmRN'), start_index=395, text='【4:2†source】', type='file_citation')], value='The first words spoken by Victor Frankenstein in the book "Frankenstein" by Mary Shelley are, "Cursed be the day, abhorred devil, in which you first saw light! Cursed (although I curse myself) be the hands that formed you! You have made me wretched beyond expression. You have left me no power to consider whether I am just to you or not. Begone! Relieve me from the sight of your detested form"【4:2†source】.'), type='text')], created_at=1722303860, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_GuyBTi7rtWCLZ38ORzmMqyOe', status=None, thread_id='thread_

In [8]:
from typing import Literal

A = str | int
A = Literal["asdf"] | Literal[True]

In [None]:
from pprint import pprint
from typing import Literal, Union
from types import FunctionType, NoneType

def get_temperature(location: str, unit: Literal["celsius"] | Literal["farenheit"] = "celsius") -> str:
    """
    Get the current temperature at a specified location.
    """
    
    return f"Weather: {location}:{unit}"
    
def non_annotated(a: Literal[None] | Literal[123]) -> str:
    "asdf"
    return None

def to_openai_tool(function: FunctionType):
    parameters = getattr(function, "__annotations__", None)
    
    assert parameters != None and len(parameters) != 0, "The function must have type annotations to generate tool data!"
    assert parameters.pop("return") == str, "The return type of the function must be a string!" 
    
    def type_to_json_schema(parameter_type):
        SCHEMA_TYPE_MAPPING = {
            str: "string",
            int: "number",
            float: "number",
            dict: "object",
            list: "array",
            bool: "boolean",
            None: "null"
        }
        
        schema_type = SCHEMA_TYPE_MAPPING.get(parameter_type, None)
        
        if schema_type != None:
            return {"type": schema_type}
        
        error_message = f"Unconvertable type annotation [{parameter_type}]"
        
        maybe_union_type = getattr(parameter_type, "__origin__", None)
        assert maybe_union_type != None and maybe_union_type == Union, error_message
        
        def parse_argument(argument):
            maybe_argument_type = getattr(argument, "__origin__", None)
            assert maybe_argument_type != None and maybe_argument_type == Literal, error_message 
            
            maybe_single_argument = argument.__args__
            
            assert len(maybe_single_argument) == 1, error_message
            
            return maybe_single_argument[0]
        
        arguments = [parse_argument(argument) for argument in parameter_type.__args__]
        
        def parse_argument_type(argument):
            argument_type = type(argument)
            
            if argument_type != NoneType:
                assert SCHEMA_TYPE_MAPPING.get(argument_type, None) != None, error_message
            
            return argument_type
        
        argument_types = [parse_argument_type(argument) for argument in arguments]
        
        enum_type = {"enum": arguments}
        
        arguments_type_iter = iter(argument_types)
        first_argument_type = next(arguments_type_iter)
        
        if all((first_argument_type == argument_type for argument_type in argument_types)):
            enum_type.update(type_to_json_schema(first_argument_type))
        
        return enum_type
    
    parameters = {
        parameter_name: type_to_json_schema(parameter_type) for 
        parameter_name, parameter_type in parameters.items()
    }
    
    description = getattr(function, "__doc__", None)
    
    assert description != None, "The function must have a docstring!"
    
    return {
        "type": "function",
        "function": {
            "name": function.__name__,
            "description": description.strip(),
            "parameters": {
                "type": "object",
                "properties": parameters
            }
        }
    }
    
pprint(to_openai_tool(non_annotated))