In [1]:
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv
import os
from datetime import datetime
import time
import pathlib 

project_dir=pathlib.Path("/media/hawksley/E2B01BDEB01BB7CD/Users/hawks/Documents/Harvard/CSCI E-104 Advanced Deep Learning/Final Project")
log_dir=project_dir/"data/raw"
csv_path=project_dir/"outputs/combined_logs.csv"

In [2]:
# 1. Load API key and create client
_ = load_dotenv(find_dotenv())
client = OpenAI(
    api_key=os.getenv("OPENAI_TOKEN"),
    default_headers={"OpenAI-Beta": "assistants=v2"}
)

In [3]:
# 2. Upload your CSV file(s)
file_ids = []
for path in [csv_path]:  # Add more files as needed
    with open(path, "rb") as f:
        file_obj = client.files.create(file=f, purpose="assistants")
        file_ids.append(file_obj.id)
print("File IDs:", file_ids)

File IDs: ['file-LdPLNS2aAgxLuYwevbmnV9']


In [10]:
# 3. Create the assistant (no file_ids needed)
today = datetime.now().strftime("%Y-%m-%d")
instructions = (
    "You are a detection log and date/time expert. Use Python and pandas to analyze the detection logs. "
    "Only answer using your knowledge of the date and time and the detection logs.\n"
    f"Today's date is {today}. For any questions involving time, such as 'most recent', "
    "'last', 'yesterday', 'last week', 'last month', 'this winter', or any other relative "
    "date or time phrase, use this date as the reference for 'today'. "
    "You have access to the detection logs as CSV files and can use Python and pandas to analyze them."
)
assistant = client.beta.assistants.create(
    name="AI Report Assistant",
    instructions=instructions,
    model="gpt-3.5-turbo", #"gpt-4o",
    tools=[{"type": "code_interpreter"}]
)
print("Assistant ID:", assistant.id)

Assistant ID: asst_K79674mKbD9fBTp7jYrOPwl6


In [11]:
# 4. Create a thread
thread = client.beta.threads.create()
print("Thread ID:", thread.id)

Thread ID: thread_3PEVw0zLlwsWE8nzN7KWUzii


In [12]:
# 5. Send a message with the file attached
user_input = "What is the date and time of the last record for when a school bus was detected?"
user_message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=user_input,
    attachments=[{"file_id": file_id, "tools": [{"type": "code_interpreter"}]} for file_id in file_ids]
)
print("User message sent.")

User message sent.


In [13]:
# # 6. Run the assistant on the thread
# run = client.beta.threads.runs.create(
#     thread_id=thread.id,
#     assistant_id=assistant.id
# )
# print("Run started. Waiting for completion...")

Run started. Waiting for completion...


In [14]:
# # 7. Poll for run completion
# while True:
#     run_status = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
#     if run_status.status == "completed":
#         break
#     elif run_status.status in {"failed", "cancelled", "expired"}:
#         raise RuntimeError(f"Run failed: {run_status.status}")
#     time.sleep(1)

In [15]:
# # 8. Retrieve and print the assistant's response
# messages = client.beta.threads.messages.list(thread_id=thread.id)
# for msg in messages.data:
#     for content in msg.content:
#         if hasattr(content, "text"):
#             print("Assistant response:", content.text.value)

Assistant response: The date and time of the last record for when a school bus was detected is on May 9, 2025, at 15:01:00.
Assistant response: The detection log contains information about different objects detected at various timestamps. To find the date and time of the last record for when a school bus was detected, we can filter the dataframe for rows where 'label_1' or 'label_2' is 'school bus' and then identify the record with the latest timestamp.

Let's proceed with this analysis.
Assistant response: What is the date and time of the last record for when a school bus was detected?


In [24]:
# Cell: Ask a new question (repeat as much as you want)
user_input = "Were any objects more likely to be detected at night?"
user_message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=user_input
)
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id
)
print("Run started. Waiting for completion...")

while True:
    run_status = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
    if run_status.status == "completed":
        break
    elif run_status.status in {"failed", "cancelled", "expired"}:
        raise RuntimeError(f"Run failed: {run_status.status}")
    time.sleep(1)

messages = client.beta.threads.messages.list(thread_id=thread.id)
latest_msg = messages.data[0]  # Most recent message
for content in latest_msg.content:
    if hasattr(content, "text"):
        print("Assistant response:", content.text.value)

Run started. Waiting for completion...
Assistant response: The following objects were more likely to be detected at night:

- Racoon
- Newspaper
- Skunk
- Truck
- Police car
- Bench
- Snow plow
- Fire truck
- Bus
- Person

These objects showed a higher frequency of detection during nighttime hours (from 8:00 PM to 5:59 AM) compared to other objects in the detection logs.
