In [1]:
from openai import OpenAI

import os
from dotenv import load_dotenv

# Load the environment variables from the .env file
load_dotenv()

True

In [2]:
# Retrieve the secrets from the loaded environment variables
openai_api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=openai_api_key)
client

<openai.OpenAI at 0x1076931f0>

In [6]:
## Create a file

file = client.files.create(
    file=open("complete_works_of_shakespeare.pdf", 'rb'),
    purpose='assistants'
)

file

FileObject(id='file-dbTzvUqIQS7HhKbdOgkjqSmo', bytes=77032153, created_at=1729710522, filename='complete_works_of_shakespeare.pdf', object='file', purpose='assistants', status='processed', status_details=None)

In [7]:
## Create a vector store from the file

vector_store = client.beta.vector_stores.create(
    name="Shakespeare Vector Store",
    file_ids=[file.id]
)

vector_store

VectorStore(id='vs_NdOQW1FfexbMlXKcGY8BCm9U', created_at=1729710583, file_counts=FileCounts(cancelled=0, completed=0, failed=0, in_progress=1, total=1), last_active_at=1729710583, metadata={}, name='Shakespeare Vector Store', object='vector_store', status='in_progress', usage_bytes=0, expires_after=None, expires_at=None)

In [8]:
## Create an Assistant

assistant = client.beta.assistants.create(
    name="Shakespeare AI Assistant",
    instructions="You are a Shakespeare plays expert who answers the question based on the Shakespeare plays inside the Shakespeare Vector Store.",
    model="gpt-4o-mini",
    tools=[{"type": "file_search"}],
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}
)

assistant

Assistant(id='asst_DpHaMsJIiZXqE7UU79Pl5N3D', created_at=1729710607, description=None, instructions='You are a Shakespeare plays expert who answers the question based on the Shakespeare plays inside the Shakespeare Vector Store.', metadata={}, model='gpt-4o-mini', name='Shakespeare AI Assistant', object='assistant', tools=[FileSearchTool(type='file_search', file_search=FileSearch(max_num_results=None, ranking_options=FileSearchRankingOptions(score_threshold=0.0, ranker='default_2024_08_21')))], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=None, file_search=ToolResourcesFileSearch(vector_store_ids=['vs_NdOQW1FfexbMlXKcGY8BCm9U'])), top_p=1.0)

In [9]:
## Create a Thread

thread = client.beta.threads.create()
thread

Thread(id='thread_SfbI8d6p7PFeI5ThjeqZNNY9', created_at=1729710668, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))

In [10]:
## Add a Message to the Thread

message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content="How many plays there are in total?"
)

message

Message(id='msg_K6LY2rY7fCTa8D0OHdP4vWXH', assistant_id=None, attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='How many plays there are in total?'), type='text')], created_at=1729710697, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='user', run_id=None, status=None, thread_id='thread_SfbI8d6p7PFeI5ThjeqZNNY9')

In [11]:
## Add a Run to the Thread - this triggers the assistant to respond to the message

run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

run

Run(id='run_iBWoe4rGUeQnCUm12NSTPEzU', assistant_id='asst_DpHaMsJIiZXqE7UU79Pl5N3D', cancelled_at=None, completed_at=1729710728, created_at=1729710714, expires_at=None, failed_at=None, incomplete_details=None, instructions='You are a Shakespeare plays expert who answers the question based on the Shakespeare plays inside the Shakespeare Vector Store.', last_error=None, max_completion_tokens=None, max_prompt_tokens=None, metadata={}, model='gpt-4o-mini', object='thread.run', parallel_tool_calls=True, required_action=None, response_format='auto', started_at=1729710715, status='completed', thread_id='thread_SfbI8d6p7PFeI5ThjeqZNNY9', tool_choice='auto', tools=[FileSearchTool(type='file_search', file_search=FileSearch(max_num_results=None, ranking_options=FileSearchRankingOptions(score_threshold=0.0, ranker='default_2024_08_21')))], truncation_strategy=TruncationStrategy(type='auto', last_messages=None), usage=Usage(completion_tokens=187, prompt_tokens=98626, total_tokens=98813), temperatur

In [12]:
if run.status == 'completed': 
  messages = client.beta.threads.messages.list(thread_id=thread.id)
  print(messages)
else:
  print(run.status)

SyncCursorPage[Message](data=[Message(id='msg_aqxE7bgyJSO48Rd9w42xX4nB', assistant_id='asst_DpHaMsJIiZXqE7UU79Pl5N3D', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="The total number of plays written by William Shakespeare is generally considered to be 37. This includes tragedies, comedies, and histories, although the exact count may vary based on the classification of certain works and whether collaborative pieces are included. \n\nIf needed, please provide further details if you're looking for anything specific about these plays."), type='text')], created_at=1729710727, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_iBWoe4rGUeQnCUm12NSTPEzU', status=None, thread_id='thread_SfbI8d6p7PFeI5ThjeqZNNY9'), Message(id='msg_K6LY2rY7fCTa8D0OHdP4vWXH', assistant_id=None, attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='How many plays ther

In [13]:
## A Loop: ask a question, get a response, ask another question, get another response

while True:
    run = client.beta.threads.runs.create_and_poll(thread_id=thread.id, assistant_id=assistant.id)
    if run.status=="completed":
        messages = client.beta.threads.messages.list(thread_id=thread.id)
        latest_message = messages.data[0]
        text = latest_message.content[0].text.value
        print(text)
        break;

The total number of plays attributed to William Shakespeare is 37. This number includes various genres such as tragedies, comedies, and histories【16:0†source】.
