In [1]:
# Installations

%pip install --upgrade pip -q
%pip install mypy -q

# %pip install numpy matplotlib pandas scipy -q
# %pip install setuptools wandb -q

%pip install openai python-dotenv -q

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# https://stackoverflow.com/questions/63142182/to-what-extent-does-google-colab-support-python-typing

from IPython.core.magic import register_cell_magic
from IPython import get_ipython
from mypy import api

@register_cell_magic
def mypy(line, cell):
  for output in api.run(['-c', '\n' + cell] + line.split()):
    if output and not output.startswith('Success'):
      raise TypeError(output)

  get_ipython().run_cell(cell)

In [2]:
from dataclasses import dataclass, field
from enum import Enum

from openai import OpenAI
from openai.types.chat import (
    ChatCompletionMessage,
    ChatCompletionSystemMessageParam,
    ChatCompletionUserMessageParam,
    ChatCompletionAssistantMessageParam,
    ChatCompletionToolMessageParam,
    ChatCompletionFunctionMessageParam,
  )

from openai.types.chat.completion_create_params import ResponseFormat
from openai._types import NotGiven

from dotenv import dotenv_values

# WANDB = False

# if WANDB:
#   from wandb.integration.openai import autolog
#   autolog({"project": "First RAG App"})

client = OpenAI(api_key=dotenv_values("../.env")["OPENAI_API_KEY"])

RawConversationMessage = ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam | ChatCompletionAssistantMessageParam | ChatCompletionToolMessageParam | ChatCompletionFunctionMessageParam

class Role(Enum):
  System = "system"
  User = "user"
  Assistant = "assistant"
  Tool = "tool"

class AiModel(Enum):
  Gpt3_5Turbo = "gpt-3.5-turbo"

class AssistantTool(Enum):
   CodeInterpreter = "code_interpreter"
   FileSearch = "file_search"
   Function = "function"

@dataclass(frozen=True, order=True, slots=True)
class Message():
  role: Role
  content: str

  def into(self) -> RawConversationMessage:
    return {"role": self.role.value, "content": self.content} # type:ignore

  @classmethod
  def from_raw(cls, raw: ChatCompletionMessage) -> "Message":
    return cls(Role(raw.role), raw.content or "")

def system(message: str) -> Message:
  return Message(Role.System, message)

def user(message: str) -> Message:
  return Message(Role.User, message)

def respond(messages: list[Message], response_format: ResponseFormat | NotGiven=NotGiven(), model: AiModel = AiModel.Gpt3_5Turbo) -> Message:
  raw_messages = [message.into() for message in messages]

  return Message.from_raw(
    client.chat.completions.create(
      model=model.value,
      messages=raw_messages,
      response_format=response_format
    ).choices[0].message
  )

@dataclass(slots=True)
class Chatbot:
    messages: list[Message] = field(default_factory=list)

    def respond(self, response_format: ResponseFormat | NotGiven=NotGiven()) -> Message:
        response = respond(self.messages, response_format = response_format)
        self.messages.append(response)
        return response

    def converse(self, chat: Message, response_format: ResponseFormat | NotGiven=NotGiven()) -> Message:
        self.messages.append(chat)
        return self.respond(response_format=response_format)

In [3]:
from time import sleep
from pprint import pprint

frankenstien_file = client.files.create(
    file=open("./frankenstien.txt", "rb"),
    purpose="assistants"
)

processed_documents = client.beta.vector_stores.create(
    name="Frankenstien Documents"
)

vector_store_file = client.beta.vector_stores.files.create(
  vector_store_id=processed_documents.id,
  file_id=frankenstien_file.id
)

assistant = client.beta.assistants.create(
    name="Frank(enstien)",
    instructions="""
You are a librarian who'd like to answer questions about the book Frankenstien by Mary Shelley.
The book is included in a file for your reference; 
when you answer, you cite your source in the book and explain why you're correct.
If you don't know the answer, say so.
""",
    model=AiModel.Gpt3_5Turbo.value,
    tools=[{"type": "file_search"}],
    tool_resources={"file_search": {"vector_store_ids": [processed_documents.id]}}
)

thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=f"What is the first words Victor Frankenstein speaks?"
)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

# Wait for Run to Complete
while run.status == "in_progress" or run.status == "queued":
  sleep(1)
  print(run.status)
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )

# Collect Messages from the Thread
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

pprint(messages)

client.beta.assistants.delete(assistant.id)
client.beta.vector_stores.delete(processed_documents.id)
client.files.delete(frankenstien_file.id)
client.beta.threads.delete(thread.id)

queued
in_progress
SyncCursorPage[Message](data=[Message(id='msg_UpOxKHbBoCgkcLoL620tdn4i', assistant_id='asst_d21pbJKTSpxRO644q9LVkEyC', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='How can I assist you with the book "Frankenstein" by Mary Shelley? What specific information or questions do you have about the text?'), type='text')], created_at=1722217586, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_8yoJHyGzVZEOTX8IfV2RqWFI', status=None, thread_id='thread_0fjYnYsKPlTLjBaRdBlcg3K8')], object='list', first_id='msg_UpOxKHbBoCgkcLoL620tdn4i', last_id='msg_UpOxKHbBoCgkcLoL620tdn4i', has_more=False)
