In [3]:
# ==================== SETTING UP THE MODEL FROM OPEN AI ====================

import os
import json
import pandas as pd
from dotenv import load_dotenv
from langchain_openai import OpenAI
from openai import OpenAI
import time
from IPython.display import clear_output

# ==================== LODING THE ENV VARIABLES =========================
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

# ==================== INISIATING THE CLIENT =============================
client = OpenAI()

In [5]:
file = client.files.create(
  file=open("./data/diabetes.csv", "rb"),
  purpose='assistants'
)

assistant = client.beta.assistants.create(
  instructions="""You are an assitant answering questions about
                  a Diabetes dataset.""",
  model="gpt-4o-mini-2024-07-18", 
  tools=[{"type": "code_interpreter"}],
  tool_resources={"code_interpreter": {"file_ids": [file.id]}}
)

thread = client.beta.threads.create()
print(thread)

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="the average age group of Diabetes positive people we have?"
)
print(message)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
)


status = run.status
start_time = time.time()
while status not in ["completed", "cancelled", "expired", "failed"]:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
        thread_id=thread.id,
        run_id=run.id
    )
    print("Elapsed time: {} minutes {} seconds".format(
        int((time.time() - start_time) // 60),
        int((time.time() - start_time) % 60))
         )
    status = run.status
    print(f'Status: {status}')
    clear_output(wait=True)


messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

print(messages.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "msg_Xp71Ntp46rKyAcY5RJQhKTh2",
      "assistant_id": "asst_L3zlx2obB7BSEMNHY9XGVaDW",
      "attachments": [],
      "completed_at": null,
      "content": [
        {
          "text": {
            "annotations": [],
            "value": "The average age of diabetes-positive individuals in the dataset is approximately **37.07 years**."
          },
          "type": "text"
        }
      ],
      "created_at": 1722452032,
      "incomplete_at": null,
      "incomplete_details": null,
      "metadata": {},
      "object": "thread.message",
      "role": "assistant",
      "run_id": "run_lSxDnYyPwrgzJGn9IuhhVZbU",
      "status": null,
      "thread_id": "thread_rWqhzgAwcFcO1YdoaYiyGolj"
    },
    {
      "id": "msg_b2wX8WCpnFBFdAwWHRh8Vk1O",
      "assistant_id": "asst_L3zlx2obB7BSEMNHY9XGVaDW",
      "attachments": [],
      "completed_at": null,
      "content": [
        {
          "text": {
            "annotations": [],
            "value": "Th