# Introduction

This notebook shows how to get a summary of a pdf document using OpenAI API

# Load OpenAI API key

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

if api_key:
    print(f"API key loaded: {api_key[:5]}... (truncated)")
else:
    print("API key not loaded")

# Create OpenAI client

In [None]:
from openai import OpenAI

try:
    openai_client = OpenAI()
    print("OpenAI client created successfully.")
except Exception as e:
    print(f"Failed to create OpenAI client: {e}")

# Upload a pdf document to OpenAI API Assistent

In [None]:
file = openai_client.files.create(
    file=open("Tech_Screening_Cloud_Architect.pdf", "rb"),
    purpose="assistants"
)
print("Uploaded file ID:", file.id)

In [None]:
# Check the file you have uploaded
files = openai_client.files.list()
for f in files.data:
    print(f"{f.id} | {f.filename} | {f.purpose} | {f.status}")

# Create API assistant

In [None]:
assistant = openai_client.beta.assistants.create(
    name="Document Analyzer",
    instructions="You help summarize and analyze uploaded documents.",
    tools=[{"type": "file_search"}],
    model="gpt-4o"
)
print("Assistant ID:", assistant.id)

# Create a thread

In [None]:
thread = openai_client.beta.threads.create()
print("Thread ID:", thread.id)

# Send a message with attached file

In [None]:
message = openai_client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Can you summarize this document and write the summary in markdown format?",
    attachments=[
                {
                    "file_id": file.id,
                    "tools": [{"type": "file_search"}]
                }
            ]
)
print("Message ID:", message.id)

# Start a run and attach the pdf document

In [None]:
run = openai_client.beta.threads.runs.create(
            thread_id=thread.id,
            assistant_id=assistant.id,
        )

# Poll for result

In [None]:
import time
while True:
    run_status = openai_client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
    if run_status.status in ["completed", "failed"]:
        break
    time.sleep(1)

# Read the assistant’s response

In [None]:
from IPython.display import Markdown, display

def display_markdown(text):
    display(Markdown(text))

In [None]:
messages = openai_client.beta.threads.messages.list(thread_id=thread.id)
for msg in reversed(messages.data):
    display_markdown(msg.content[0].text.value)

# Delete attached files

In [None]:
files = openai_client.files.list()
for f in files.data:
    openai_client.files.delete(file_id=f.id)

In [None]:
# Check if all files have been deleted

files = openai_client.files.list()
if not files.data:
    print("All files have been deleted successfully.")
else:
    print("Some files were not deleted:")
    for f in files.data:
        print(f"{f.id} | {f.filename} | {f.purpose} | {f.status}")