In [1]:
# from open ai cookbook:
# https://cookbook.openai.com/examples/assistants_api_overview_python

# inits
from openai import OpenAI
from dotenv import load_dotenv
import os
import json

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

def show_json(obj):
    display(json.loads(obj.model_dump_json()))

In [2]:
#step 1 - create an assistant with file search enabled
assistant = client.beta.assistants.create(
    name = "Machine learning researcher",
    instructions = "You are a machine learning researcher. Answer questions using the research paper.",
    tools = [{"type": "file_search"}],
    model = "gpt-4o"
)

In [3]:
# Step 2: Upload files and add them to a Vector Store
# https://platform.openai.com/docs/assistants/tools/file-search/step-2-upload-files-and-add-them-to-a-vector-store

vector_store = client.beta.vector_stores.create(name="memgpt_research")
 
# Ready the files for upload to OpenAI
file_paths = ["memgpt-paper.pdf"]
file_streams = [open(path, "rb") for path in file_paths]
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

# file = client.files.create(
#     file=open("memgpt-paper.pdf", "rb"),
#     purpose = "assistants"
# )
# print(file)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


In [4]:
# step 3: Update the assistant to use the new Vector Store
# To make the files accessible to your assistant, update the assistant’s tool_resources with the new vector_store id.

assistant = client.beta.assistants.update(
  assistant_id=assistant.id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

In [5]:
# step 4 - create a thread

# Upload the user provided file to OpenAI
message_file = client.files.create(
  file=open("memgpt-paper.pdf", "rb"), purpose="assistants"
)

# Create a thread and attach the file to the message
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "Summarize the research paper",
      # Attach the new file to the message.
      "attachments": [
        { "file_id": message_file.id, "tools": [{"type": "file_search"}] }
      ],
    }
  ]
)

print(thread.tool_resources.file_search)

# thread = client.beta.threads.create()
# print(thread)

ToolResourcesFileSearch(vector_store_ids=['vs_6QU1KDAL8rSyfxHRrQKNimBZ'])


In [6]:
# step 5: Crate a run and check the output (without streaming)

# Use the create and poll SDK helper to create a run and poll the status of
# the run until it's in a terminal state.

run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id, assistant_id=assistant.id
)

messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

message_content = messages[0].content[0].text
annotations = message_content.annotations
citations = []
for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")

print(message_content.value)
print("\n".join(citations))

### Summary of Research Paper: MemGPT: Towards LLMs as Operating Systems

#### Abstract
MemGPT is proposed to overcome the limitations of large language models (LLMs), which typically have constrained context windows. Leveraging techniques from traditional operating systems (OS), such as hierarchical memory management and virtual memory paging, MemGPT manages different storage tiers to extend the effective context for LLMs. This allows for superior performance in tasks like extensive document analysis and long-term conversational interactions[0].

#### Introduction
Recent advances in LLMs, including transformer architectures, have significantly enhanced conversational AI. However, their limited fixed-length context windows restrict their effectiveness in long conversations and document analysis. Approaches to extend context length result in increasing computational costs, both in time and memory, due to the self-attention mechanism of transformers. Consequently, there is a need for alt