# GPT-5 Basic Test Notebook (Azure OpenAI)

This notebook provides examples of the Chat Completions API and the Responses API to quickly validate the basic functionality of GPT-5 in Azure OpenAI. It uses the Azure OpenAI endpoint, key, and deployment name configured in the environment variables.

## Requirements

- Python 3.10+
- `pip install -r GPT-5/requirements.txt`
- `GPT-5/.env` 
  - AZURE_OPENAI_ENDPOINT
  - AZURE_OPENAI_API_KEY
  - AZURE_OPENAI_API_VERSION
  - AZURE_OPENAI_DEPLOYMENT_NAME

## GPT-5 series model comparision 
Reference: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions 

| Model          | Chat Completions API | Responses API        | Structured Outputs API | Function / Tool Calling | Context Window |
|---------------|-----------------------|----------------------|--------------------------|--------------------------------------|----------------|
| **gpt-5**      | ✅                   | ✅                   | ✅                     | ✅                                   | 400,000        |
| **gpt-5-mini** | ✅                   | ✅                   | ✅                     | ✅                                   | 400,000        |
| **gpt-5-chat** | ✅                   | ✅                    | X                      | X                                     | 128,000        |

> - gpt-5 and gpt-5-mini support Chat Completions API, Responses API, Structured Outputs API, and Function/Tool Calling (including parallel calls).
> - As of September 8, 2025, gpt-5-chat (Preview) supports Chat Completions API and Responses API, but does not support Structured Outputs or Function/Tool Calling.


## Chat Completions API Test

In [None]:
import os
import json
import openai
import base64
from openai import AzureOpenAI
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential, EnvironmentCredential
from azure.keyvault.secrets import SecretClient
from io import BytesIO
import gradio as gr
import time

import json
load_dotenv(override=True)

azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY", "") if len(os.getenv("AZURE_OPENAI_API_KEY", "")) > 0 else None
azure_openai_gpt5_deployment_name = "gpt-5-mini" # can be "gpt-5" or "gpt-5-chat" or "gpt-5-mini"
azure_openai_gpt41_deployment_name = "gpt-4.1"
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-ada-002")
aoai_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "") if len(os.getenv("AZURE_OPENAI_API_VERSION", "")) > 0 else None

try:
    client = AzureOpenAI(
        azure_endpoint=azure_openai_endpoint,
        api_key=azure_openai_key,
        api_version=aoai_api_version
    )
except (ValueError, TypeError) as e:
    print(e)

# Performance measurement utilities
class PerformanceTimer:
    def __init__(self, test_name):
        self.test_name = test_name
        self.start_time = None
        
    def __enter__(self):
        self.start_time = time.time()
        return self
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        end_time = time.time()
        self.execution_time = end_time - self.start_time
        
    def get_time(self):
        return getattr(self, 'execution_time', None)

def measure_and_compare(test_name, gpt5_func, gpt41_func):
    """Generic performance comparison for any test function"""
    print(f"\n=== {test_name} Performance Comparison ===")
    
    results = {}
    
    # Test GPT-5
    print(f"\n--- {azure_openai_gpt5_deployment_name} Results ---")
    try:
        with PerformanceTimer("GPT-5") as timer:
            gpt5_result = gpt5_func()
        results["GPT-5"] = {
            'time': timer.get_time(),
            'result': gpt5_result,
            'success': True
        }
        print(f"Execution Time: {timer.get_time():.2f}s")
    except Exception as e:
        results["GPT-5"] = {'time': None, 'result': None, 'success': False, 'error': str(e)}
        print(f"Error: {e}")
    
    # Test GPT-4.1
    print(f"\n--- {azure_openai_gpt41_deployment_name} Results ---")
    try:
        with PerformanceTimer("GPT-4.1") as timer:
            gpt41_result = gpt41_func()
        results["GPT-4.1"] = {
            'time': timer.get_time(),
            'result': gpt41_result,
            'success': True
        }
        print(f"Execution Time: {timer.get_time():.2f}s")
    except Exception as e:
        results["GPT-4.1"] = {'time': None, 'result': None, 'success': False, 'error': str(e)}
        print(f"Error: {e}")
    
    # Comparison summary
    if results["GPT-5"]['success'] and results["GPT-4.1"]['success']:
        time_diff = results["GPT-5"]['time'] - results["GPT-4.1"]['time']
        faster_model = "GPT-5" if time_diff < 0 else "GPT-4.1"
        print("\n--- Performance Summary ---")
        print(f"{azure_openai_gpt5_deployment_name}: {results['GPT-5']['time']:.2f}s")
        print(f"{azure_openai_gpt41_deployment_name}: {results['GPT-4.1']['time']:.2f}s")
        print(f"Time Difference: {abs(time_diff):.2f}s ({faster_model} faster)")
    
    return results

# Print environment variables (excluding sensitive info)
print("=== Azure OpenAI Configuration ===")
print(f"Endpoint: {azure_openai_endpoint}")
print(f"API Version: {aoai_api_version}")
print(f"GPT-5 Deployment: {azure_openai_gpt5_deployment_name}")
print(f"GPT-4.1 Deployment: {azure_openai_gpt41_deployment_name}")
print(f"Embedding Deployment: {azure_openai_embedding_deployment}")
print(f"API Key set: {azure_openai_key is not None}")
print(f"Client initialized: {client is not None}")

=== Azure OpenAI Configuration ===
Endpoint: https://hyo-ai-foundry-pjt1-resource.openai.azure.com/
API Version: 2025-03-01-preview
GPT-5 Deployment: gpt-5
GPT-4.1 Deployment: gpt-4.1
Embedding Deployment: text-embedding-ada-002
API Key set: True
Client initialized: True


## Basic Chat Test

In [43]:
# 1) Basic Chat Test with Performance Comparison
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Tell me about Microsoft"}
]

def test_basic_chat_gpt5():
    response = client.chat.completions.create(
        model=azure_openai_gpt5_deployment_name,
        messages=messages,
        reasoning_effort = "low"  # low, medium, or high
    )
    print(response.choices[0].message.content)
    return response.choices[0].message.content

def test_basic_chat_gpt41():
    response = client.chat.completions.create(
        model=azure_openai_gpt41_deployment_name,
        messages=messages
    )
    print(response.choices[0].message.content)
    return response.choices[0].message.content

# Performance comparison
results = measure_and_compare("Basic Chat", test_basic_chat_gpt5, test_basic_chat_gpt41)


=== Basic Chat Performance Comparison ===

--- gpt-5 Results ---


Here’s a concise overview of Microsoft (MSFT):

- What it is: One of the world’s largest technology companies, founded in 1975 by Bill Gates and Paul Allen. Headquartered in Redmond, Washington. CEO: Satya Nadella (since 2014).

- Scale: More than 220,000 employees worldwide and annual revenue well over $200 billion. Market capitalization surpassed $3 trillion in 2024. Microsoft pays a quarterly dividend and is a component of major stock indices.

- Core businesses (reporting segments):
  - Productivity and Business Processes: Microsoft 365 (Office apps and cloud services), Teams, Outlook/Exchange, SharePoint, OneDrive; LinkedIn; Dynamics 365 (ERP/CRM).
  - Intelligent Cloud: Azure (IaaS, PaaS, AI/ML services), Windows Server, SQL Server, Enterprise services, GitHub and developer offerings.
  - More Personal Computing: Windows client, Surface devices, Xbox hardware/services (including Game Pass), Bing/Ads, Microsoft Edge.

- Cloud and AI:
  - Azure is a leading global cloud platform co

## Function Calling Test

In [44]:
# 2) Function Calling Performance Comparison
def get_weather(city: str):
    """Mock weather function"""
    weather_data = {
        "Seoul": "Clear, 22°C",
        "Busan": "Cloudy, 19°C"
    }
    return weather_data.get(city, f"{city}: Weather information not available")

# function schema
tools = [{
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get weather for a city",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {"type": "string", "description": "City name"}
            },
            "required": ["city"]
        }
    }
}]

# query include function calling
messages = [
    {"role": "user", "content": "Tell me the weather in Seoul"}
]

def test_function_calling_gpt5():
    response = client.chat.completions.create(
        model=azure_openai_gpt5_deployment_name,
        messages=messages,
        tools=tools,
        tool_choice="auto"
    )
    
    # function call check and execution
    if response.choices[0].message.tool_calls:
        tool_call = response.choices[0].message.tool_calls[0]
        function_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)
        
        print(f"function calling: {function_name}({arguments})")
        
        if function_name == "get_weather":
            result = get_weather(arguments["city"])
            print(f"function result: {result}")

            # generate final response with function result
            final_messages = messages.copy()
            final_messages.append(response.choices[0].message)
            final_messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": result
            })
            
            final_response = client.chat.completions.create(
                model=azure_openai_gpt5_deployment_name,
                messages=final_messages,
                reasoning_effort = "low"  # low, medium, or high
            )

            print(f"final response: {final_response.choices[0].message.content}")
            return final_response.choices[0].message.content
    else:
        print(f"direct response: {response.choices[0].message.content}")
        return response.choices[0].message.content

def test_function_calling_gpt41():
    response = client.chat.completions.create(
        model=azure_openai_gpt41_deployment_name,
        messages=messages,
        tools=tools,
        tool_choice="auto"
    )
    
    # function call check and execution
    if response.choices[0].message.tool_calls:
        tool_call = response.choices[0].message.tool_calls[0]
        function_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)
        
        print(f"function calling: {function_name}({arguments})")
        
        if function_name == "get_weather":
            result = get_weather(arguments["city"])
            print(f"function result: {result}")

            # generate final response with function result
            final_messages = messages.copy()
            final_messages.append(response.choices[0].message)
            final_messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": result
            })
            
            final_response = client.chat.completions.create(
                model=azure_openai_gpt41_deployment_name,
                messages=final_messages
            )

            print(f"final response: {final_response.choices[0].message.content}")
            return final_response.choices[0].message.content
    else:
        print(f"direct response: {response.choices[0].message.content}")
        return response.choices[0].message.content

# Performance comparison
results = measure_and_compare("Function Calling", test_function_calling_gpt5, test_function_calling_gpt41)


=== Function Calling Performance Comparison ===

--- gpt-5 Results ---


function calling: get_weather({'city': 'Seoul'})
function result: Clear, 22°C
final response: It’s currently clear in Seoul, around 22°C. Would you like a forecast for the next few days?
Execution Time: 5.94s

--- gpt-4.1 Results ---
function calling: get_weather({'city': 'Seoul'})
function result: Clear, 22°C
final response: The weather in Seoul is currently clear with a temperature of 22°C.
Execution Time: 0.95s

--- Performance Summary ---
gpt-5: 5.94s
gpt-4.1: 0.95s
Time Difference: 4.98s (GPT-4.1 faster)


## Structured Output Test with Json Schema

In [45]:
# 3) Structured Output Performance Comparison (Chat Completions API - JSON Schema)
json_schema = {
    "name": "product_summary",
    "schema": {
        "type": "object",
        "properties": {
            "title": {"type": "string"},
            "category": {"type": "string"},
            "price_krw": {"type": "number"},
            "pros": {"type": "array", "items": {"type": "string"}},
            "cons": {"type": "array", "items": {"type": "string"}},
        },
        "required": ["title", "category", "price_krw", "pros", "cons"], 
        "additionalProperties": False,
    },
    "strict": True,
}

messages = [
    {"role": "user", "content": "Pick a gaming laptop model randomly and generate a summary JSON. Include price in Korean Won with approximate values."}
]

def test_structured_output_gpt5():
    response = client.chat.completions.create(
        model=azure_openai_gpt5_deployment_name,
        messages=messages,
        response_format={"type": "json_schema", "json_schema": json_schema},
    )
    print(response.choices[0].message.content)
    return response.choices[0].message.content

def test_structured_output_gpt41():
    response = client.chat.completions.create(
        model=azure_openai_gpt41_deployment_name,
        messages=messages,
        response_format={"type": "json_schema", "json_schema": json_schema},
    )
    print(response.choices[0].message.content)
    return response.choices[0].message.content

# Performance comparison
results = measure_and_compare("Structured Output (JSON Schema)", test_structured_output_gpt5, test_structured_output_gpt41)


=== Structured Output (JSON Schema) Performance Comparison ===

--- gpt-5 Results ---


{"title":"Lenovo Legion Pro 5 (16-inch, 2024, RTX 4070)","category":"Gaming Laptop","price_krw":2450000,"pros":["Strong 1440p gaming performance with RTX 4070 and latest Intel CPU","Bright 16-inch QHD+ 240Hz display with good color coverage","Sturdy build and comfortable keyboard with customizable RGB","Efficient cooling maintains sustained performance under load","Comprehensive port selection including USB-C and HDMI 2.1"],"cons":["Heavier and bulkier than some competitors","Mediocre battery life for everyday use","Fans can get loud under heavy gaming loads","No SD card slot; webcam quality is average"]}
Execution Time: 22.87s

--- gpt-4.1 Results ---
{"title":"ASUS ROG Strix G16","category":"Gaming Laptop","price_krw":2100000,"pros":["High-performance Intel Core i7-13650HX processor","NVIDIA GeForce RTX 4060 graphics card","Fast 165Hz FHD display","Effective cooling system","Stylish RGB keyboard"],"cons":["Relatively bulky and heavy chassis","Below-average battery life","No Thunderbo

## Responses API (stateful chat for 30days)

In [46]:
response = client.responses.create(   
  model=azure_openai_gpt5_deployment_name, 
  input="This is a test.",
)

print(response.model_dump_json(indent=2)) 

{
  "id": "resp_68be7e5ed01c819496fda3b2d3413829098ca9ac50ce8472",
  "created_at": 1757314654.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5",
  "object": "response",
  "output": [
    {
      "id": "rs_68be7e5f654c819480e49f6ba5b4b5f0098ca9ac50ce8472",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "id": "msg_68be7e63703c8194be231385fb834c05098ca9ac50ce8472",
      "content": [
        {
          "annotations": [],
          "text": "I’m here and ready. What would you like to test or work on?",
          "type": "output_text",
          "logprobs": null
        }
      ],
      "role": "assistant",
      "status": "completed",
      "type": "message"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": 1.0,
  "tool_choice": "auto",
  "tools": [],
  "top_p": 1.0,
  "background": false,
  "max_output_tokens": null,
  "max_

## Responses API - List previous chats

In [47]:
response = client.responses.input_items.list("resp_68be60657b208194a9c1549d18e9223a0d97914cd52a20b6")

print(response.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "msg_68be60657c2c8194a339915244faadfe0d97914cd52a20b6",
      "content": [
        {
          "text": "This is a test.",
          "type": "input_text"
        }
      ],
      "role": "user",
      "status": "completed",
      "type": "message"
    }
  ],
  "has_more": false,
  "object": "list",
  "first_id": "msg_68be60657c2c8194a339915244faadfe0d97914cd52a20b6",
  "last_id": "msg_68be60657c2c8194a339915244faadfe0d97914cd52a20b6"
}


## Responses API - Upload PDF and analyze

In [48]:
# Responses API PDF Upload and Analysis Performance Comparison
# Upload a file with a purpose of "assistants"
try:
    file = client.files.create(
      file=open("AI_Brief_Aug2025.pdf", "rb"), # This assumes a .pdf file in the same directory as the executing script
      purpose="assistants"
    )
    print("File uploaded successfully:")
    print(file.model_dump_json(indent=2))
    file_id = file.id
    
    def test_responses_pdf_analysis_gpt5():
        response = client.responses.create(
            model=azure_openai_gpt5_deployment_name,
            input=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "input_file",
                            "file_id": file_id
                        },
                        {
                            "type": "input_text",
                            "text": "Summarize this PDF in English in three sentences.",
                        },
                    ],
                },
            ]
        )
        print("--- GPT-5 Responses API PDF Analysis ---")
        print(response.model_dump_json(indent=2))
        return response.output_text if hasattr(response, 'output_text') else str(response.output)

    def test_responses_pdf_analysis_gpt41():
        response = client.responses.create(
            model=azure_openai_gpt41_deployment_name,
            input=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "input_file",
                            "file_id": file_id
                        },
                        {
                            "type": "input_text",
                            "text": "Summarize this PDF in English in three sentences.",
                        },
                    ],
                },
            ]
        )
        print("--- GPT-4.1 Responses API PDF Analysis ---")
        print(response.model_dump_json(indent=2))
        return response.output_text if hasattr(response, 'output_text') else str(response.output)

    # Performance comparison
    results = measure_and_compare("Responses API PDF Analysis", test_responses_pdf_analysis_gpt5, test_responses_pdf_analysis_gpt41)
    
except FileNotFoundError:
    print("PDF file 'AI_Brief_Aug2025.pdf' not found. Please ensure the file exists in the current directory.")
except Exception as e:
    print(f"Error uploading or processing PDF: {e}")

File uploaded successfully:
{
  "id": "assistant-RxiLmzhQLga3Q9wyF444f2",
  "bytes": 1165436,
  "created_at": 1757314661,
  "filename": "AI_Brief_Aug2025.pdf",
  "object": "file",
  "purpose": "assistants",
  "status": "processed",
  "expires_at": null,
  "status_details": null
}

=== Responses API PDF Analysis Performance Comparison ===

--- gpt-5 Results ---
--- GPT-5 Responses API PDF Analysis ---
{
  "id": "resp_68be7e669b148194b126ff03024d0dc608d12bbf65b0e1b4",
  "created_at": 1757314664.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5",
  "object": "response",
  "output": [
    {
      "id": "rs_68be7e69ee708194981bae4863b7d7da08d12bbf65b0e1b4",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "id": "msg_68be7e9410d08194882845785c05f2f408d12bbf65b0e1b4",
      "content": [
        {
          "annotations": [],
          "text

## Responses API - Background Task (async)

In [49]:
response = client.responses.create(
    model = azure_openai_gpt5_deployment_name,
    input = "Write me a bedtime story (1min)",
    background = True
)

print(response.model_dump_json(indent=2))

{
  "id": "resp_68be7ea578f88195a24185c89125699f0d9b46b05a1c0d6e",
  "created_at": 1757314725.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5",
  "object": "response",
  "output": [],
  "parallel_tool_calls": true,
  "temperature": 1.0,
  "tool_choice": "auto",
  "tools": [],
  "top_p": 1.0,
  "background": true,
  "max_output_tokens": null,
  "max_tool_calls": null,
  "previous_response_id": null,
  "prompt": null,
  "prompt_cache_key": null,
  "reasoning": {
    "effort": "medium",
    "generate_summary": null,
    "summary": null
  },
  "safety_identifier": null,
  "service_tier": "auto",
  "status": "queued",
  "text": {
    "format": {
      "type": "text"
    },
    "verbosity": null
  },
  "top_logprobs": null,
  "truncation": "disabled",
  "usage": null,
  "user": null,
  "content_filters": null,
  "store": true
}


In [50]:
from time import sleep

while response.status in {"queued", "in_progress"}:
    print(f"Current status: {response.status}")
    sleep(2)
    response = client.responses.retrieve(response.id)

print(f"Final status: {response.status}\nOutput:\n{response.output_text}")

Current status: queued


Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Current status: queued
Final status: completed
Output:
Each evening, when the sky turns the color of warm milk, a little moth named Luma lights her thimble-sized lantern. Inside it glows a drop of dew and a thread of moonlight, just enough to guide the sleepy world toward rest.

She floats over the meadow where a young fawn’s knees still feel springy. Luma hums a soft, leaf-hush lullaby, and the lantern gathers the last bright bounce from the fawn’s eyes. The fawn sighs, folds its legs, and dreams of clover.

By the stones, the brook keeps chattering about everywhere it has been. Luma dips the lantern close; the water takes 

## Responses API - Function Calling Test

In [51]:
# Responses API Function Calling Performance Comparison
tools_responses = [  
    {  
        "type": "function",  
        "name": "get_weather",  
        "description": "Get the weather for a location",  
        "parameters": {  
            "type": "object",  
            "properties": {  
                "location": {"type": "string"},  
            },  
            "required": ["location"],  
        },  
    }  
]

def test_responses_function_calling_gpt5():
    response = client.responses.create(  
        model=azure_openai_gpt5_deployment_name, 
        tools=tools_responses,
        input=[{"role": "user", "content": "What's the weather in San Francisco?"}],  
    )  
    
    print("--- GPT-5 Responses API Function Calling ---")
    print(response.model_dump_json(indent=2))
    
    # To provide output to tools, add a response for each tool call to an array passed  
    # to the next response as `input`  
    input_data = []  
    for output in response.output:  
        if output.type == "function_call":  
            match output.name:  
                case "get_weather":  
                    input_data.append(  
                        {  
                            "type": "function_call_output",  
                            "call_id": output.call_id,  
                            "output": '{"temperature": "70 degrees"}',  
                        }  
                    )  
                case _:  
                    raise ValueError(f"Unknown function call: {output.name}")  
      
    second_response = client.responses.create(  
        model=azure_openai_gpt5_deployment_name,  
        previous_response_id=response.id,  
        input=input_data  
    )  
    
    print("--- GPT-5 Second Response ---")
    print(second_response.model_dump_json(indent=2))
    return second_response.output_text if hasattr(second_response, 'output_text') else str(second_response.output)

def test_responses_function_calling_gpt41():
    response = client.responses.create(  
        model=azure_openai_gpt41_deployment_name, 
        tools=tools_responses,
        input=[{"role": "user", "content": "What's the weather in San Francisco?"}],  
    )  
    
    print("--- GPT-4.1 Responses API Function Calling ---")
    print(response.model_dump_json(indent=2))
    
    # To provide output to tools, add a response for each tool call to an array passed  
    # to the next response as `input`  
    input_data = []  
    for output in response.output:  
        if output.type == "function_call":  
            match output.name:  
                case "get_weather":  
                    input_data.append(  
                        {  
                            "type": "function_call_output",  
                            "call_id": output.call_id,  
                            "output": '{"temperature": "70 degrees"}',  
                        }  
                    )  
                case _:  
                    raise ValueError(f"Unknown function call: {output.name}")  
      
    second_response = client.responses.create(  
        model=azure_openai_gpt41_deployment_name,  
        previous_response_id=response.id,  
        input=input_data  
    )  
    
    print("--- GPT-4.1 Second Response ---")
    print(second_response.model_dump_json(indent=2))
    return second_response.output_text if hasattr(second_response, 'output_text') else str(second_response.output)

# Performance comparison
results = measure_and_compare("Responses API Function Calling", test_responses_function_calling_gpt5, test_responses_function_calling_gpt41) 


=== Responses API Function Calling Performance Comparison ===

--- gpt-5 Results ---


--- GPT-5 Responses API Function Calling ---
{
  "id": "resp_68be7ece91e881909ac86fca4027a6b60ec983fd2c219773",
  "created_at": 1757314766.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5",
  "object": "response",
  "output": [
    {
      "id": "rs_68be7ecf18b88190bdf782e2b3787c5b0ec983fd2c219773",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "arguments": "{\"location\":\"San Francisco\"}",
      "call_id": "call_B83Ij3z0huV1QQyIX9xhDLBN",
      "name": "get_weather",
      "type": "function_call",
      "id": "fc_68be7ed1b1d48190bb6dd2f0a2b58c8a0ec983fd2c219773",
      "status": "completed"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": 1.0,
  "tool_choice": "auto",
  "tools": [
    {
      "name": "get_weather",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
       

## Responses API - Structured Output Test with Json Schema

In [52]:
# 5) Structured Output (Responses API - JSON Schema)
# json_schema = {
#     "name": "product_summary",
#     "schema": {
#         "type": "object",
#         "properties": {
#             "title": {"type": "string"},
#             "category": {"type": "string"},
#             "price_krw": {"type": "number"},
#             "pros": {"type": "array", "items": {"type": "string"}},
#             "cons": {"type": "array", "items": {"type": "string"}},
#         },
#         "required": ["title", "category", "price_krw"],
#         "additionalProperties": False,
#     },
#     "strict": True,
# }

# inputs = [
#     {"role": "user", "content": "Pick a gaming laptop model randomly and generate a summary JSON. Include price in Korean Won with approximate values."}
# ]

# response = client.responses.create(
#     model=azure_openai_gpt5_deployment_name,
#     input=inputs,
#     response_format={"type": "json_schema", "json_schema": json_schema},
# )

# print(response.model_dump_json(indent=2)) 

print("Responses API doen't support response_format.")

Responses API doen't support response_format.


## Responses API - Streaming output Test

In [53]:
# 4) Streaming Output Performance Comparison
def test_streaming_gpt5():
    try:
        print("--- GPT-5 Streaming ---")
        stream = client.chat.completions.create(
            model=azure_openai_gpt5_deployment_name,
            messages=[{"role": "user", "content": "Tell me a five-line joke."}],
            stream=True,
        )
        
        content = ""
        for chunk in stream:
            if chunk.choices:
                delta = chunk.choices[0].delta.content
                if delta:
                    print(delta, end="", flush=True)
                    content += delta
                    time.sleep(0.05)
        print()
        return content
        
    except Exception as e:
        print(f"Streaming error: {e}")
        return None

def test_streaming_gpt41():
    try:
        print("--- GPT-4.1 Streaming ---")
        stream = client.chat.completions.create(
            model=azure_openai_gpt41_deployment_name,
            messages=[{"role": "user", "content": "Tell me a five-line joke."}],
            stream=True,
        )
        
        content = ""
        for chunk in stream:
            if chunk.choices:
                delta = chunk.choices[0].delta.content
                if delta:
                    print(delta, end="", flush=True)
                    content += delta
                    time.sleep(0.05)
        print()
        return content
        
    except Exception as e:
        print(f"Streaming error: {e}")
        return None

# Performance comparison
results = measure_and_compare("Streaming Output", test_streaming_gpt5, test_streaming_gpt41)


=== Streaming Output Performance Comparison ===

--- gpt-5 Results ---
--- GPT-5 Streaming ---


I told my smart fridge to make ice, and it sent me a calendar invite instead.
The subject said, "Let's circle back on your chilling needs."
I replied, "Just freeze water."
It emailed, "Per my last message, please find the attached ice forecast."
Now my freezer is full of PDFs.
Execution Time: 17.57s

--- gpt-4.1 Results ---
--- GPT-4.1 Streaming ---
Why did the tomato turn red?  
It saw the salad dressing!  
The cucumber was so embarrassed,  
The carrot turned beet red,  
And the lettuce just leafed!
Execution Time: 2.52s

--- Performance Summary ---
gpt-5: 17.57s
gpt-4.1: 2.52s
Time Difference: 15.05s (GPT-4.1 faster)


## Responses API - Code Interpreter Test

In [54]:
# Responses API Code Interpreter Performance Comparison
instructions = "You are a personal math tutor. When asked a math question, write and run code using the python tool to answer the question."

tools_code_interpreter = [
    {
        "type": "code_interpreter",
        "container": {"type": "auto"}
    }
]

math_question = "I need to solve the equation 3x + 11 = 14. Can you help me?"

def test_responses_code_interpreter_gpt5():
    response = client.responses.create(
        model=azure_openai_gpt5_deployment_name,
        tools=tools_code_interpreter,
        instructions=instructions,
        input=math_question,
    )
    print("--- GPT-5 Responses API Code Interpreter ---")
    print(response.model_dump_json(indent=2))
    return response.output_text if hasattr(response, 'output_text') else str(response.output)

def test_responses_code_interpreter_gpt41():
    response = client.responses.create(
        model=azure_openai_gpt41_deployment_name,
        tools=tools_code_interpreter,
        instructions=instructions,
        input=math_question,
    )
    print("--- GPT-4.1 Responses API Code Interpreter ---")
    print(response.model_dump_json(indent=2))
    return response.output_text if hasattr(response, 'output_text') else str(response.output)

# Performance comparison
results = measure_and_compare("Responses API Code Interpreter", test_responses_code_interpreter_gpt5, test_responses_code_interpreter_gpt41) 


=== Responses API Code Interpreter Performance Comparison ===

--- gpt-5 Results ---


--- GPT-5 Responses API Code Interpreter ---
{
  "id": "resp_68be7ee943f081959fa0f78de2adb16e08b4776c58f6eeef",
  "created_at": 1757314793.0,
  "error": null,
  "incomplete_details": null,
  "instructions": "You are a personal math tutor. When asked a math question, write and run code using the python tool to answer the question.",
  "metadata": {},
  "model": "gpt-5",
  "object": "response",
  "output": [
    {
      "id": "rs_68be7eec19cc81959cf36e1c2d42fb8908b4776c58f6eeef",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "id": "ci_68be7eeff2a0819585bedffb0a7f911908b4776c58f6eeef",
      "code": "# Solve 3x + 11 = 14\r\nx = (14 - 11) / 3\r\nx",
      "container_id": "cntr_68be7ee9f3ac8190a78cd3d9c3ec72eb0d07b8dd4686997a",
      "outputs": null,
      "status": "completed",
      "type": "code_interpreter_call"
    },
    {
      "id": "msg_68be7ef1b4808195a4e48bd24e078cea08b4776c58f6eeef"

## Responses API - Vision Support Test

In [55]:
# Responses API Vision Support Performance Comparison
# Note: Requires the deployed model to support vision and your API version to allow image input.
image_url = os.getenv("TEST_IMAGE_URL", "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Fronalpstock_big.jpg/640px-Fronalpstock_big.jpg")

inputs = [
    {"role": "user", "content": [
        {"type": "input_text", "text": "Summarize the main features of this image in three lines in English."},
        {"type": "input_image", "image_url": image_url},
    ]}
]

def test_responses_vision_gpt5():
    response = client.responses.create(
        model=azure_openai_gpt5_deployment_name, 
        input=inputs
    )
    print("--- GPT-5 Responses API Vision ---")
    print(response.model_dump_json(indent=2))
    return response.output_text if hasattr(response, 'output_text') else str(response.output)

def test_responses_vision_gpt41():
    response = client.responses.create(
        model=azure_openai_gpt41_deployment_name, 
        input=inputs
    )
    print("--- GPT-4.1 Responses API Vision ---")
    print(response.model_dump_json(indent=2))
    return response.output_text if hasattr(response, 'output_text') else str(response.output)

# Performance comparison
results = measure_and_compare("Responses API Vision Support", test_responses_vision_gpt5, test_responses_vision_gpt41)


=== Responses API Vision Support Performance Comparison ===

--- gpt-5 Results ---


--- GPT-5 Responses API Vision ---
{
  "id": "resp_68be7ef77bec81958ca597b6aa45acbb05b0a0d8fd8a6c2b",
  "created_at": 1757314807.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5",
  "object": "response",
  "output": [
    {
      "id": "rs_68be7ef9ad408195a313635f2050be1105b0a0d8fd8a6c2b",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "id": "msg_68be7efdac04819584088932975b2e5905b0a0d8fd8a6c2b",
      "content": [
        {
          "annotations": [],
          "text": "- Panoramic view of rugged, green mountains with steep cliffs and forested slopes.  \n- Rolling meadows and small settlements scattered across the valleys below.  \n- Clear blue sky with fluffy clouds above an alpine landscape.",
          "type": "output_text",
          "logprobs": null
        }
      ],
      "role": "assistant",
      "status": "completed",
