In [2]:
import io
import time
import os
import pandas as pd
import openai
import requests
from dotenv import load_dotenv
from openai import AzureOpenAI

In [None]:
load_dotenv("azure.env", override=True)

# Azure Open AI
# OBS!
# https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#assistants-preview
# https://learn.microsoft.com/en-us/azure/ai-services/openai/assistants-reference?tabs=python
openai.api_type = "azure"
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")

# MODEL_NAME: str = 'gpt-4-1106-preview'
MODEL_NAME: str = 'gpt-35-turbo-0613'

URL: str = "https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv"

print("Open AI version:", openai.__version__)
print("Open AI API Version:", openai.api_version)
print("Open AI API BASE:", os.getenv("OPENAI_API_BASE"))

In [None]:
# CHECK DATA
c = pd.read_csv(URL)
print(c.head(5))
number_of_countries = c['Country'].nunique()
number_of_countries

In [5]:
client = AzureOpenAI(
    api_key=openai.api_key,
    api_version=openai.api_version,
    azure_endpoint=openai.api_base
)

In [6]:
assistant = client.beta.assistants.create(
    name=f"TEST - {MODEL_NAME}",
    instructions=f"You are a helpful AI assistant who analyses data."
                 f"You have access to a sandboxed environment for writing and testing code."
                 f"When you are asked to do analysis should follow these steps:"
                 f"1. Write the code."
                 f"2. Anytime you write new code display a preview of the code to show your work."
                 f"3. Run the code to confirm that it runs."
                 f"4. If the code is successful return the result."
                 f"5. If the code is unsuccessful display the error message and try to revise the code and rerun going through the steps from above again.",
    tools=[{"type": "code_interpreter"}],
    model=MODEL_NAME,
)

In [7]:
# Loading the remote file to the assistant
response = requests.get(URL)
file_io = io.BytesIO(response.content)

# OBS! The file name is not inferred from the URL and has to be explicitly set
# Setting it to a different name to demonstrate
file = client.files.create(
    file=("countries_regions.csv",file_io),
    purpose="assistants"
)

In [8]:
# Add a user question to the thread
thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="How many regions are listed in the file?",
    file_ids = [file.id]
)

# Run the thread
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id
)

# Looping until the run completes or fails
while run.status in ['queued', 'in_progress', 'cancelling']:
    time.sleep(1)
    run = client.beta.threads.runs.retrieve(
        thread_id=thread.id,
        run_id=run.id
    )

In [None]:
if run.status == 'completed':
    messages = client.beta.threads.messages.list( thread_id=thread.id )
    text = [bloc.text.value for message in messages for bloc in message.content]

    for t in list(reversed(text)):
        print(t)
        
elif run.status == 'requires_action':
    pass
else:
    print(run.status)

In [None]:
# Show generated code
run_steps = client.beta.threads.runs.steps.list(
    thread_id=thread.id,
    run_id=run.id
)

tool_calls = [object[1] for object, type in [step.step_details for step in run_steps] if object[0] == "tool_calls"]
code_segments = [call[0].code_interpreter.input for call in tool_calls if call[0].code_interpreter]

for code_segment in code_segments:
    print("------")
    print(code_segment)

In [86]:
# Remove the uploaded file
cursor = client.files.list()

for file in cursor:
    print(file.id)
    client.files.delete(file.id)