In [1]:
# Import Library
from openai import OpenAI
import os
import requests
from dotenv import load_dotenv

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
# Step 1: Check existing containers and find active ones
print("Checking existing containers...")
containers = client.containers.list()
print(containers)


Checking existing containers...
SyncCursorPage[ContainerListResponse](data=[ContainerListResponse(id='cntr_68314a508e8081919e5d1051f3fcbe27', created_at=1748060752, name='data-analysis-container', object='container', status='running', expires_after=ExpiresAfter(anchor='last_active_at', minutes=20), last_active_at=1748060947), ContainerListResponse(id='cntr_6831312c4e608191b358bef087b081be', created_at=1748054316, name='test-container', object='container', status='expired', expires_after=ExpiresAfter(anchor='last_active_at', minutes=20), last_active_at=1748060947), ContainerListResponse(id='cntr_68312ae94c608191a7ae842feb3da6cd', created_at=1748052713, name='test-container', object='container', status='expired', expires_after=ExpiresAfter(anchor='last_active_at', minutes=20), last_active_at=1748060947), ContainerListResponse(id='cntr_683129f7152881918ba879d28b5cfca7', created_at=1748052471, name='auto', object='container', status='expired', expires_after=ExpiresAfter(anchor='last_active

In [4]:
# Find first active (running) container
active_container = None
for container in containers.data:
    print(f"Container {container.id}: {container.status}")
    if container.status == "running":
        active_container = container
        break

if active_container:
    container_id = active_container.id
    print(f"Using active container: {container_id}")
else:
    print("No active containers found. Creating new one...")
    container = client.containers.create(name="data-analysis-container")
    container_id = container.id
    print(f"Created new container: {container_id}")

Container cntr_68314a508e8081919e5d1051f3fcbe27: running
Using active container: cntr_68314a508e8081919e5d1051f3fcbe27


In [5]:
# Step 2: Upload file
url = f"https://api.openai.com/v1/containers/{container_id}/files"
headers = {"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"}
files = {'file': ('data.csv', open('data.csv', 'rb'))}

response = requests.post(url, headers=headers, files=files)
file_path = response.json()['path']
print(f"File uploaded: {file_path}")

# Step 3: Analyze with code interpreter
response = client.responses.create(
    model="gpt-4.1-mini",
    tools=[{"type": "code_interpreter", "container": container_id}],
    tool_choice="required",
    input=f"Analyze CSV at '{file_path}'. How many rows?"
)


File uploaded: /mnt/data/56942508662ee703512c82c531286d53-data.csv


In [6]:
# Results
print(f"Answer: {response.output_text}")
print(f"Code executed:\n{response.output[0].code}")
print(f"Tokens used: {response.usage.total_tokens}")

Answer: The CSV file contains 1000 rows. Would you like me to perform any other analysis on this data?
Code executed:
import pandas as pd

# Load the CSV file
file_path = '/mnt/data/56942508662ee703512c82c531286d53-data.csv'
data = pd.read_csv(file_path)

# Get number of rows
num_rows = data.shape[0]
num_rows
Tokens used: 510


# Handling charts in openai container

In [11]:
# Step 3: Analyze with code interpreter
response = client.responses.create(
    model="gpt-4.1-mini",
    tools=[{"type": "code_interpreter", "container": container_id}],
    tool_choice="required",
    input=f"""Analyze CSV at '{file_path}'. Plot a bar chart showing user breakdown by gender.
Rotate x-axis labels by 45 degrees for readability. Increase figure width if needed.
Add count labels on top of each bar."""

)


In [12]:
# Extract file information from the response
for output in response.output:
    if hasattr(output, 'content'):
        for content in output.content:
            if hasattr(content, 'annotations'):
                for annotation in content.annotations:
                    if annotation.type == 'container_file_citation':
                        file_id = annotation.file_id
                        filename = annotation.filename
                        print(f"Generated file: {filename} (ID: {file_id})")
                        
                        # Download the image file
                        download_url = f"https://api.openai.com/v1/containers/{container_id}/files/{file_id}/content"
                        headers = {"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"}
                        
                        file_response = requests.get(download_url, headers=headers)
                        if file_response.status_code == 200:
                            # Save the image locally
                            local_filename = f"gender_breakdown_chart.png"
                            with open(local_filename, 'wb') as f:
                                f.write(file_response.content)
                            print(f"Image saved as: {local_filename}")
                        else:
                            print(f"Failed to download file: {file_response.status_code}")

# Also display the response for reference
print(f"\nAnswer: {response.output_text}")
print(f"Tokens used: {response.usage.total_tokens}")

Generated file: cfile_68314d0f620481919d557c0e09a639ef.png (ID: cfile_68314d0f620481919d557c0e09a639ef)
Image saved as: gender_breakdown_chart.png

Answer: The bar chart shows the user breakdown by gender from the provided CSV file. The x-axis labels have been rotated by 45 degrees for better readability, and the figure width was increased to accommodate the labels. Count labels are displayed on top of each bar for clarity.

If you need any further analysis or adjustments, please let me know!
Tokens used: 3952


In [10]:
response

Response(id='resp_68314b9a4bc08191990e7d962070b0e20cdb1ad7bd57b962', created_at=1748061082.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4.1-mini-2025-04-14', object='response', output=[ResponseCodeInterpreterToolCall(id='ci_68314b9afc6c81919920e1e5ba0a1e300cdb1ad7bd57b962', code="import pandas as pd\n\n# Load the CSV file to examine its content\nfile_path = '/mnt/data/56942508662ee703512c82c531286d53-data.csv'\ndata = pd.read_csv(file_path)\n\n# Display the first few rows and columns to understand its structure\ndata.head(), data.columns", results=None, status='completed', type='code_interpreter_call', container_id='cntr_68314a508e8081919e5d1051f3fcbe27', outputs=None), ResponseOutputMessage(id='msg_68314b9d19448191bbae006e2a38f10f0cdb1ad7bd57b962', content=[ResponseOutputText(annotations=[], text='The dataset contains the following columns: id, first_name, last_name, email, gender, and ip_address. I will now plot a chart to show the user breakdown