# GPT-5 Basic Test Notebook (Azure OpenAI)

This notebook provides examples of the Chat Completions API and the Responses API to quickly validate the basic functionality of GPT-5 in Azure OpenAI. It uses the Azure OpenAI endpoint, key, and deployment name configured in the environment variables.

## Requirements

- Python 3.10+
- `pip install -r GPT-5/requirements.txt`
- `GPT-5/.env` 
  - AZURE_OPENAI_ENDPOINT
  - AZURE_OPENAI_API_KEY
  - AZURE_OPENAI_API_VERSION
  - AZURE_OPENAI_DEPLOYMENT_NAME

## GPT-5 series model comparision 
Reference: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions 

| Model           | Reasoning Support       | Chat Completions API | Responses API | Structured Outputs | Function/Tool Calling     | Context Window (tokens)     | Price per 1M tokens (input/output) |
|-----------------|-------------------------|----------------------|---------------|--------------------|---------------------------|-----------------------------|------------------------------------|
| **GPT-5**       | ✅​                     | ✅​                  | ✅​           | ✅​                | ✅​                        | 400,000 (272k in + 128k out) | $1.25 / $10                        |
| **GPT-5-mini**  | ✅​                     | ✅​                  | ✅​           | ✅​                | ✅​                       | 400,000 (272k in + 128k out) | $0.25 / $2.00                      |
| **GPT-5-nano**  | ✅​                     | ✅​                  | ✅​           | ✅​                | ✅​                       | 400,000 (272k in + 128k out) | $0.05 / $0.40                      |
| **GPT-5-chat**  | 〰️ (undocumented)      | ✅​                  | ✅​           | 〰️ (undocumented)   | 〰️ (undocumented)          | 128,000 (16k max output)     | $1.25 / $10                        |
| **GPT-4.1**     | ✅​                     | ✅​                  | ✅​           | ✅​                | ✅​                       | 1,000,000                   | $2.00 / $8.00                      |
| **GPT-4.1-mini**| ✅​                     | ✅​                  | ✅​           | ✅​                | ✅​                       | 1,000,000                   | $0.40 / $1.60                      |
| **GPT-4.1-nano**| ✅​                     | ✅​                  | ✅​           | ✅​                | ✅​                       | 1,000,000                   | $0.10 / $0.40                      |
| **GPT-4o**      | ✅​                     | ✅​                  | 〰️ (undocumented) | ✅​           | ✅​                        | 128,000 (+16k out)          | $5.00 / $15.00                     |


> - gpt-5 series models support Reasoning, Chat Completions API, Responses API, Structured Outputs API, and Function/Tool Calling (including parallel calls).
> - As of September 8, 2025, gpt-5-chat (Preview) supports Chat Completions API and Responses API, but does not support Reasoning, Structured Outputs or Function/Tool Calling.


## Chat Completions API Test

In [1]:
import os
import json
import openai
import base64
from openai import AzureOpenAI
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential, EnvironmentCredential
from azure.keyvault.secrets import SecretClient
from io import BytesIO
import gradio as gr
import time

import json
load_dotenv(override=True)

azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY", "") if len(os.getenv("AZURE_OPENAI_API_KEY", "")) > 0 else None
azure_openai_gpt5_deployment_name = "gpt-5-mini" # can be "gpt-5" or "gpt-5-chat" or "gpt-5-mini"
azure_openai_gpt41_deployment_name = "gpt-4.1"  # can be "gpt-4.1" or "gpt-4.1-mini" or "gpt-4o"
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-ada-002")
aoai_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "") if len(os.getenv("AZURE_OPENAI_API_VERSION", "")) > 0 else None

try:
    client = AzureOpenAI(
        azure_endpoint=azure_openai_endpoint,
        api_key=azure_openai_key,
        api_version=aoai_api_version
    )
except (ValueError, TypeError) as e:
    print(e)

# Performance measurement utilities
class PerformanceTimer:
    def __init__(self, test_name):
        self.test_name = test_name
        self.start_time = None
        
    def __enter__(self):
        self.start_time = time.time()
        return self
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        end_time = time.time()
        self.execution_time = end_time - self.start_time
        
    def get_time(self):
        return getattr(self, 'execution_time', None)

def measure_and_compare(test_name, gpt5_func, gpt41_func):
    """Generic performance comparison for any test function"""
    print(f"\n=== {test_name} Performance Comparison ===")
    
    results = {}
    
    # Test GPT-5
    print(f"\n--- {azure_openai_gpt5_deployment_name} Results ---")
    try:
        with PerformanceTimer("GPT-5") as timer:
            gpt5_result = gpt5_func()
        results["GPT-5"] = {
            'time': timer.get_time(),
            'result': gpt5_result,
            'success': True
        }
        print(f"Execution Time: {timer.get_time():.2f}s")
    except Exception as e:
        results["GPT-5"] = {'time': None, 'result': None, 'success': False, 'error': str(e)}
        print(f"Error: {e}")
    
    # Test GPT-4.1
    print(f"\n--- {azure_openai_gpt41_deployment_name} Results ---")
    try:
        with PerformanceTimer("GPT-4.1") as timer:
            gpt41_result = gpt41_func()
        results["GPT-4.1"] = {
            'time': timer.get_time(),
            'result': gpt41_result,
            'success': True
        }
        print(f"Execution Time: {timer.get_time():.2f}s")
    except Exception as e:
        results["GPT-4.1"] = {'time': None, 'result': None, 'success': False, 'error': str(e)}
        print(f"Error: {e}")
    
    # Comparison summary
    if results["GPT-5"]['success'] and results["GPT-4.1"]['success']:
        time_diff = results["GPT-5"]['time'] - results["GPT-4.1"]['time']
        faster_model = "GPT-5" if time_diff < 0 else "GPT-4.1"
        print("\n--- Performance Summary ---")
        print(f"{azure_openai_gpt5_deployment_name}: {results['GPT-5']['time']:.2f}s")
        print(f"{azure_openai_gpt41_deployment_name}: {results['GPT-4.1']['time']:.2f}s")
        print(f"Time Difference: {abs(time_diff):.2f}s ({faster_model} faster)")
    
    return results

# Print environment variables (excluding sensitive info)
print("=== Azure OpenAI Configuration ===")
print(f"Endpoint: {azure_openai_endpoint}")
print(f"API Version: {aoai_api_version}")
print(f"GPT-5 Deployment: {azure_openai_gpt5_deployment_name}")
print(f"GPT-4.1 Deployment: {azure_openai_gpt41_deployment_name}")
print(f"Embedding Deployment: {azure_openai_embedding_deployment}")
print(f"API Key set: {azure_openai_key is not None}")
print(f"Client initialized: {client is not None}")

  from .autonotebook import tqdm as notebook_tqdm


=== Azure OpenAI Configuration ===
Endpoint: https://hyo-ai-foundry-pjt1-resource.openai.azure.com/
API Version: 2025-03-01-preview
GPT-5 Deployment: gpt-5-mini
GPT-4.1 Deployment: gpt-4.1
Embedding Deployment: text-embedding-ada-002
API Key set: True
Client initialized: True


## Basic Chat Test

In [2]:
# 1) Basic Chat Test with Performance Comparison
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Tell me about Microsoft"}
]

def test_basic_chat_gpt5():
    response = client.chat.completions.create(
        model=azure_openai_gpt5_deployment_name,
        messages=messages,
        reasoning_effort = "low"  # low, medium, or high
    )
    print(response.choices[0].message.content)
    return response.choices[0].message.content

def test_basic_chat_gpt41():
    response = client.chat.completions.create(
        model=azure_openai_gpt41_deployment_name,
        messages=messages
    )
    print(response.choices[0].message.content)
    return response.choices[0].message.content

# Performance comparison
results = measure_and_compare("Basic Chat", test_basic_chat_gpt5, test_basic_chat_gpt41)


=== Basic Chat Performance Comparison ===

--- gpt-5-mini Results ---
Microsoft Corporation is a major U.S. technology company founded in 1975 by Bill Gates and Paul Allen and headquartered in Redmond, Washington. Over nearly five decades it has grown from a software-startup to one of the world’s largest and most influential tech firms.

Key areas and products
- Operating systems and PC software: Windows (desktop and server) and Microsoft 365 (Office apps like Word, Excel, Outlook).
- Cloud computing and enterprise services: Azure (cloud platform and services), Windows Server, SQL Server, and enterprise management tools.
- Productivity and business services: Microsoft 365, Teams (collaboration/communications), LinkedIn (professional social network).
- Developer and platform tools: Visual Studio, GitHub (acquired 2018), developer frameworks and APIs.
- Devices and consumer hardware: Surface PCs and tablets, Xbox gaming consoles and services (including Xbox Game Pass).
- AI and emerging

## Function Calling Test

In [3]:
# 2) Function Calling Performance Comparison
def get_weather(city: str):
    """Mock weather function"""
    weather_data = {
        "Seoul": "Clear, 22°C",
        "Busan": "Cloudy, 19°C"
    }
    return weather_data.get(city, f"{city}: Weather information not available")

# function schema
tools = [{
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get weather for a city",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {"type": "string", "description": "City name"}
            },
            "required": ["city"]
        }
    }
}]

# query include function calling
messages = [
    {"role": "user", "content": "Tell me the weather in Seoul"}
]

def test_function_calling_gpt5():
    response = client.chat.completions.create(
        model=azure_openai_gpt5_deployment_name,
        messages=messages,
        tools=tools,
        tool_choice="auto"
    )
    
    # function call check and execution
    if response.choices[0].message.tool_calls:
        tool_call = response.choices[0].message.tool_calls[0]
        function_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)
        
        print(f"function calling: {function_name}({arguments})")
        
        if function_name == "get_weather":
            result = get_weather(arguments["city"])
            print(f"function result: {result}")

            # generate final response with function result
            final_messages = messages.copy()
            final_messages.append(response.choices[0].message)
            final_messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": result
            })
            
            final_response = client.chat.completions.create(
                model=azure_openai_gpt5_deployment_name,
                messages=final_messages,
                reasoning_effort = "low"  # low, medium, or high
            )

            print(f"final response: {final_response.choices[0].message.content}")
            return final_response.choices[0].message.content
    else:
        print(f"direct response: {response.choices[0].message.content}")
        return response.choices[0].message.content

def test_function_calling_gpt41():
    response = client.chat.completions.create(
        model=azure_openai_gpt41_deployment_name,
        messages=messages,
        tools=tools,
        tool_choice="auto"
    )
    
    # function call check and execution
    if response.choices[0].message.tool_calls:
        tool_call = response.choices[0].message.tool_calls[0]
        function_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)
        
        print(f"function calling: {function_name}({arguments})")
        
        if function_name == "get_weather":
            result = get_weather(arguments["city"])
            print(f"function result: {result}")

            # generate final response with function result
            final_messages = messages.copy()
            final_messages.append(response.choices[0].message)
            final_messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": result
            })
            
            final_response = client.chat.completions.create(
                model=azure_openai_gpt41_deployment_name,
                messages=final_messages
            )

            print(f"final response: {final_response.choices[0].message.content}")
            return final_response.choices[0].message.content
    else:
        print(f"direct response: {response.choices[0].message.content}")
        return response.choices[0].message.content

# Performance comparison
results = measure_and_compare("Function Calling", test_function_calling_gpt5, test_function_calling_gpt41)


=== Function Calling Performance Comparison ===

--- gpt-5-mini Results ---


function calling: get_weather({'city': 'Seoul'})
function result: Clear, 22°C
final response: Right now in Seoul it's clear and 22°C. Light jacket or long-sleeve should be fine. Would you like an hourly forecast or the next few days?
Execution Time: 3.34s

--- gpt-4.1 Results ---
function calling: get_weather({'city': 'Seoul'})
function result: Clear, 22°C
final response: The current weather in Seoul is clear with a temperature of 22°C.
Execution Time: 1.30s

--- Performance Summary ---
gpt-5-mini: 3.34s
gpt-4.1: 1.30s
Time Difference: 2.04s (GPT-4.1 faster)


## Structured Output Test with Json Schema

In [4]:
# 3) Structured Output Performance Comparison (Chat Completions API - JSON Schema)
json_schema = {
    "name": "product_summary",
    "schema": {
        "type": "object",
        "properties": {
            "title": {"type": "string"},
            "category": {"type": "string"},
            "price_krw": {"type": "number"},
            "pros": {"type": "array", "items": {"type": "string"}},
            "cons": {"type": "array", "items": {"type": "string"}},
        },
        "required": ["title", "category", "price_krw", "pros", "cons"], 
        "additionalProperties": False,
    },
    "strict": True,
}

messages = [
    {"role": "user", "content": "Pick a gaming laptop model randomly and generate a summary JSON. Include price in Korean Won with approximate values."}
]

def test_structured_output_gpt5():
    response = client.chat.completions.create(
        model=azure_openai_gpt5_deployment_name,
        messages=messages,
        response_format={"type": "json_schema", "json_schema": json_schema},
    )
    print(response.choices[0].message.content)
    return response.choices[0].message.content

def test_structured_output_gpt41():
    response = client.chat.completions.create(
        model=azure_openai_gpt41_deployment_name,
        messages=messages,
        response_format={"type": "json_schema", "json_schema": json_schema},
    )
    print(response.choices[0].message.content)
    return response.choices[0].message.content

# Performance comparison
results = measure_and_compare("Structured Output (JSON Schema)", test_structured_output_gpt5, test_structured_output_gpt41)


=== Structured Output (JSON Schema) Performance Comparison ===

--- gpt-5-mini Results ---


{"title":"Lenovo Legion 5 Pro (16-inch)","category":"Gaming Laptop","price_krw":1950000,"pros":["High-performance AMD CPU options and NVIDIA GPU configurations for strong gaming and content creation performance","16:10 QHD (2560×1600) high-refresh display option (120–165Hz+) with good color and detail","Robust cooling system that sustains boost clocks under load","Comfortable, responsive keyboard and solid build quality for the price","Generally good value compared to competing gaming laptops in the same class"],"cons":["Relatively heavy and not very thin — less portable than ultrabooks","Battery life is limited under gaming or heavy workloads","Fans can get loud under sustained load","Some configurations use a mostly plastic chassis and limited upgrade options on certain SKUs"]}
Execution Time: 5.27s

--- gpt-4.1 Results ---
{"title":"ASUS ROG Strix G16 (2023)","category":"Gaming Laptop","price_krw":2000000,"pros":["Powerful Intel Core i7-13650HX processor","RTX 4060 GPU for high-perf

## Responses API (stateful chat for 30days)

In [5]:
response = client.responses.create(   
  model=azure_openai_gpt5_deployment_name, 
  input="This is a test.",
)

print(response.model_dump_json(indent=2)) 

{
  "id": "resp_68be9808e27c8195b87f4a524b6706bd08bd28640864d135",
  "created_at": 1757321224.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5-mini",
  "object": "response",
  "output": [
    {
      "id": "rs_68be98092b18819588204d8e5b1c2cb708bd28640864d135",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "id": "msg_68be9809aa588195b6986c763239378c08bd28640864d135",
      "content": [
        {
          "annotations": [],
          "text": "Got it — test received. How can I help you today?",
          "type": "output_text",
          "logprobs": null
        }
      ],
      "role": "assistant",
      "status": "completed",
      "type": "message"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": 1.0,
  "tool_choice": "auto",
  "tools": [],
  "top_p": 1.0,
  "background": false,
  "max_output_tokens": null,
  "max_tool_

## Responses API - List previous chats

In [6]:
response = client.responses.input_items.list("resp_68be60657b208194a9c1549d18e9223a0d97914cd52a20b6")

print(response.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "msg_68be60657c2c8194a339915244faadfe0d97914cd52a20b6",
      "content": [
        {
          "text": "This is a test.",
          "type": "input_text"
        }
      ],
      "role": "user",
      "status": "completed",
      "type": "message"
    }
  ],
  "has_more": false,
  "object": "list",
  "first_id": "msg_68be60657c2c8194a339915244faadfe0d97914cd52a20b6",
  "last_id": "msg_68be60657c2c8194a339915244faadfe0d97914cd52a20b6"
}


## Responses API - Upload PDF and analyze

In [7]:
# Responses API PDF Upload and Analysis Performance Comparison
# Upload a file with a purpose of "assistants"
try:
    file = client.files.create(
      file=open("AI_Brief_Aug2025.pdf", "rb"), # This assumes a .pdf file in the same directory as the executing script
      purpose="assistants"
    )
    print("File uploaded successfully:")
    print(file.model_dump_json(indent=2))
    file_id = file.id
    
    def test_responses_pdf_analysis_gpt5():
        response = client.responses.create(
            model=azure_openai_gpt5_deployment_name,
            input=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "input_file",
                            "file_id": file_id
                        },
                        {
                            "type": "input_text",
                            "text": "Summarize this PDF in English in three sentences.",
                        },
                    ],
                },
            ]
        )
        print("--- GPT-5 Responses API PDF Analysis ---")
        print(response.model_dump_json(indent=2))
        return response.output_text if hasattr(response, 'output_text') else str(response.output)

    def test_responses_pdf_analysis_gpt41():
        response = client.responses.create(
            model=azure_openai_gpt41_deployment_name,
            input=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "input_file",
                            "file_id": file_id
                        },
                        {
                            "type": "input_text",
                            "text": "Summarize this PDF in English in three sentences.",
                        },
                    ],
                },
            ]
        )
        print("--- GPT-4.1 Responses API PDF Analysis ---")
        print(response.model_dump_json(indent=2))
        return response.output_text if hasattr(response, 'output_text') else str(response.output)

    # Performance comparison
    results = measure_and_compare("Responses API PDF Analysis", test_responses_pdf_analysis_gpt5, test_responses_pdf_analysis_gpt41)
    
except FileNotFoundError:
    print("PDF file 'AI_Brief_Aug2025.pdf' not found. Please ensure the file exists in the current directory.")
except Exception as e:
    print(f"Error uploading or processing PDF: {e}")

File uploaded successfully:
{
  "id": "assistant-4araX5d3oTVQaMCJYfJ4W4",
  "bytes": 1165436,
  "created_at": 1757321228,
  "filename": "AI_Brief_Aug2025.pdf",
  "object": "file",
  "purpose": "assistants",
  "status": "processed",
  "expires_at": null,
  "status_details": null
}

=== Responses API PDF Analysis Performance Comparison ===

--- gpt-5-mini Results ---
--- GPT-5 Responses API PDF Analysis ---
{
  "id": "resp_68be980d2b14819698f271eb47c9d08a089243cd418c7c7c",
  "created_at": 1757321231.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5-mini",
  "object": "response",
  "output": [
    {
      "id": "rs_68be9810537481968d2a23d22a4526dd089243cd418c7c7c",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "id": "msg_68be98175c388196a367182f58497ea2089243cd418c7c7c",
      "content": [
        {
          "annotations": [],
     

## Responses API - Background Task (async)

In [8]:
response = client.responses.create(
    model = azure_openai_gpt5_deployment_name,
    input = "Write me a bedtime story (1min)",
    background = True
)

print(response.model_dump_json(indent=2))

{
  "id": "resp_68be982180dc81948fc220192b268fce0e576ae11a88bb1c",
  "created_at": 1757321249.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5-mini",
  "object": "response",
  "output": [],
  "parallel_tool_calls": true,
  "temperature": 1.0,
  "tool_choice": "auto",
  "tools": [],
  "top_p": 1.0,
  "background": true,
  "max_output_tokens": null,
  "max_tool_calls": null,
  "previous_response_id": null,
  "prompt": null,
  "prompt_cache_key": null,
  "reasoning": {
    "effort": "medium",
    "generate_summary": null,
    "summary": null
  },
  "safety_identifier": null,
  "service_tier": "auto",
  "status": "queued",
  "text": {
    "format": {
      "type": "text"
    },
    "verbosity": null
  },
  "top_logprobs": null,
  "truncation": "disabled",
  "usage": null,
  "user": null,
  "content_filters": null,
  "store": true
}


In [9]:
from time import sleep

while response.status in {"queued", "in_progress"}:
    print(f"Current status: {response.status}")
    sleep(2)
    response = client.responses.retrieve(response.id)

print(f"Final status: {response.status}\nOutput:\n{response.output_text}")

Current status: queued


Current status: queued
Current status: queued
Final status: completed
Output:
Once upon a time, in a tiny cottage at the edge of a sleepy forest, a little fox named Mira couldn’t fall asleep. The stars were twinkling like tiny lanterns, and the moon hung low and round, watching quietly.

Mira wrapped herself in a warm blanket of soft moss and listened. The trees whispered lullabies, and a slow breeze carried the scent of lavender and honey. A firefly came by and drew a silver path through the air, inviting Mira to follow with her eyes. Each blink of light felt like a gentle promise: you are safe, you are calm, you are loved.

Soon Mira’s breathing matched the steady rhythm of the night. Her eyes grew heavy, and the moon tucked a silvery curl around her cheek. In the hush, she slipped into a dream where the forest hummed a soft, happy tune.

Sleep now, little one. The night keeps watch, and morning will come when it’s time. Good night.


## Responses API - Function Calling Test

In [10]:
# Responses API Function Calling Performance Comparison
tools_responses = [  
    {  
        "type": "function",  
        "name": "get_weather",  
        "description": "Get the weather for a location",  
        "parameters": {  
            "type": "object",  
            "properties": {  
                "location": {"type": "string"},  
            },  
            "required": ["location"],  
        },  
    }  
]

def test_responses_function_calling_gpt5():
    response = client.responses.create(  
        model=azure_openai_gpt5_deployment_name, 
        tools=tools_responses,
        input=[{"role": "user", "content": "What's the weather in San Francisco?"}],  
    )  
    
    print("--- GPT-5 Responses API Function Calling ---")
    print(response.model_dump_json(indent=2))
    
    # To provide output to tools, add a response for each tool call to an array passed  
    # to the next response as `input`  
    input_data = []  
    for output in response.output:  
        if output.type == "function_call":  
            match output.name:  
                case "get_weather":  
                    input_data.append(  
                        {  
                            "type": "function_call_output",  
                            "call_id": output.call_id,  
                            "output": '{"temperature": "70 degrees"}',  
                        }  
                    )  
                case _:  
                    raise ValueError(f"Unknown function call: {output.name}")  
      
    second_response = client.responses.create(  
        model=azure_openai_gpt5_deployment_name,  
        previous_response_id=response.id,  
        input=input_data  
    )  
    
    print("--- GPT-5 Second Response ---")
    print(second_response.model_dump_json(indent=2))
    return second_response.output_text if hasattr(second_response, 'output_text') else str(second_response.output)

def test_responses_function_calling_gpt41():
    response = client.responses.create(  
        model=azure_openai_gpt41_deployment_name, 
        tools=tools_responses,
        input=[{"role": "user", "content": "What's the weather in San Francisco?"}],  
    )  
    
    print("--- GPT-4.1 Responses API Function Calling ---")
    print(response.model_dump_json(indent=2))
    
    # To provide output to tools, add a response for each tool call to an array passed  
    # to the next response as `input`  
    input_data = []  
    for output in response.output:  
        if output.type == "function_call":  
            match output.name:  
                case "get_weather":  
                    input_data.append(  
                        {  
                            "type": "function_call_output",  
                            "call_id": output.call_id,  
                            "output": '{"temperature": "70 degrees"}',  
                        }  
                    )  
                case _:  
                    raise ValueError(f"Unknown function call: {output.name}")  
      
    second_response = client.responses.create(  
        model=azure_openai_gpt41_deployment_name,  
        previous_response_id=response.id,  
        input=input_data  
    )  
    
    print("--- GPT-4.1 Second Response ---")
    print(second_response.model_dump_json(indent=2))
    return second_response.output_text if hasattr(second_response, 'output_text') else str(second_response.output)

# Performance comparison
results = measure_and_compare("Responses API Function Calling", test_responses_function_calling_gpt5, test_responses_function_calling_gpt41) 


=== Responses API Function Calling Performance Comparison ===

--- gpt-5-mini Results ---


--- GPT-5 Responses API Function Calling ---
{
  "id": "resp_68be98289ffc8197ab156eb3417821e70d0407089a45ab97",
  "created_at": 1757321256.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5-mini",
  "object": "response",
  "output": [
    {
      "id": "rs_68be98290a188197a3768578d25163f20d0407089a45ab97",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "arguments": "{\"location\":\"San Francisco\"}",
      "call_id": "call_eN0GmsyWJaby19erNp4VS7sO",
      "name": "get_weather",
      "type": "function_call",
      "id": "fc_68be982a294c8197afa573c7d607008f0d0407089a45ab97",
      "status": "completed"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": 1.0,
  "tool_choice": "auto",
  "tools": [
    {
      "name": "get_weather",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
  

## Responses API - Structured Output Test with Json Schema

In [11]:
# 5) Structured Output (Responses API - JSON Schema)
# json_schema = {
#     "name": "product_summary",
#     "schema": {
#         "type": "object",
#         "properties": {
#             "title": {"type": "string"},
#             "category": {"type": "string"},
#             "price_krw": {"type": "number"},
#             "pros": {"type": "array", "items": {"type": "string"}},
#             "cons": {"type": "array", "items": {"type": "string"}},
#         },
#         "required": ["title", "category", "price_krw"],
#         "additionalProperties": False,
#     },
#     "strict": True,
# }

# inputs = [
#     {"role": "user", "content": "Pick a gaming laptop model randomly and generate a summary JSON. Include price in Korean Won with approximate values."}
# ]

# response = client.responses.create(
#     model=azure_openai_gpt5_deployment_name,
#     input=inputs,
#     response_format={"type": "json_schema", "json_schema": json_schema},
# )

# print(response.model_dump_json(indent=2)) 

print("Responses API doen't support response_format.")

Responses API doen't support response_format.


## Responses API - Streaming output Test

In [12]:
# 4) Streaming Output Performance Comparison
def test_streaming_gpt5():
    try:
        print("--- GPT-5 Streaming ---")
        stream = client.chat.completions.create(
            model=azure_openai_gpt5_deployment_name,
            messages=[{"role": "user", "content": "Tell me a five-line joke."}],
            stream=True,
        )
        
        content = ""
        for chunk in stream:
            if chunk.choices:
                delta = chunk.choices[0].delta.content
                if delta:
                    print(delta, end="", flush=True)
                    content += delta
                    time.sleep(0.05)
        print()
        return content
        
    except Exception as e:
        print(f"Streaming error: {e}")
        return None

def test_streaming_gpt41():
    try:
        print("--- GPT-4.1 Streaming ---")
        stream = client.chat.completions.create(
            model=azure_openai_gpt41_deployment_name,
            messages=[{"role": "user", "content": "Tell me a five-line joke."}],
            stream=True,
        )
        
        content = ""
        for chunk in stream:
            if chunk.choices:
                delta = chunk.choices[0].delta.content
                if delta:
                    print(delta, end="", flush=True)
                    content += delta
                    time.sleep(0.05)
        print()
        return content
        
    except Exception as e:
        print(f"Streaming error: {e}")
        return None

# Performance comparison
results = measure_and_compare("Streaming Output", test_streaming_gpt5, test_streaming_gpt41)


=== Streaming Output Performance Comparison ===

--- gpt-5-mini Results ---
--- GPT-5 Streaming ---


I'm reading a book about anti-gravity.
It's impossible to put down.
I tried to lend it to a friend,
but he said he couldn't hold onto it either.
Now we're both floating through literature.
Execution Time: 6.80s

--- gpt-4.1 Results ---
--- GPT-4.1 Streaming ---
Why did the scarecrow win an award?  
Because he was outstanding in his field.  
But the corn was a little husky about it.  
The potatoes felt they’d been peeled off from the team.  
And the tomatoes just couldn’t ketchup!
Execution Time: 3.38s

--- Performance Summary ---
gpt-5-mini: 6.80s
gpt-4.1: 3.38s
Time Difference: 3.42s (GPT-4.1 faster)


## Responses API - Code Interpreter Test

In [13]:
# Responses API Code Interpreter Performance Comparison
instructions = "You are a personal math tutor. When asked a math question, write and run code using the python tool to answer the question."

tools_code_interpreter = [
    {
        "type": "code_interpreter",
        "container": {"type": "auto"}
    }
]

math_question = "I need to solve the equation 3x + 11 = 14. Can you help me?"

def test_responses_code_interpreter_gpt5():
    response = client.responses.create(
        model=azure_openai_gpt5_deployment_name,
        tools=tools_code_interpreter,
        instructions=instructions,
        input=math_question,
    )
    print("--- GPT-5 Responses API Code Interpreter ---")
    print(response.model_dump_json(indent=2))
    return response.output_text if hasattr(response, 'output_text') else str(response.output)

def test_responses_code_interpreter_gpt41():
    response = client.responses.create(
        model=azure_openai_gpt41_deployment_name,
        tools=tools_code_interpreter,
        instructions=instructions,
        input=math_question,
    )
    print("--- GPT-4.1 Responses API Code Interpreter ---")
    print(response.model_dump_json(indent=2))
    return response.output_text if hasattr(response, 'output_text') else str(response.output)

# Performance comparison
results = measure_and_compare("Responses API Code Interpreter", test_responses_code_interpreter_gpt5, test_responses_code_interpreter_gpt41) 


=== Responses API Code Interpreter Performance Comparison ===

--- gpt-5-mini Results ---


--- GPT-5 Responses API Code Interpreter ---
{
  "id": "resp_68be983896f881949c3337b4d855abd108a67e224c76594f",
  "created_at": 1757321272.0,
  "error": null,
  "incomplete_details": null,
  "instructions": "You are a personal math tutor. When asked a math question, write and run code using the python tool to answer the question.",
  "metadata": {},
  "model": "gpt-5-mini",
  "object": "response",
  "output": [
    {
      "id": "rs_68be983a5fc48194b24df42a9c8d675d08a67e224c76594f",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "id": "ci_68be983ce32881949a1530af8158a3b708a67e224c76594f",
      "code": "# Solve the equation 3x + 11 = 14\r\nx = (14 - 11) / 3\r\nsolution = x\r\ncheck = 3 * x + 11\r\nsolution, check\n",
      "container_id": "cntr_68be98397d708190bc4fbcebced4331d093b85ca32480b8c",
      "outputs": null,
      "status": "completed",
      "type": "code_interpreter_call"
    },


## Responses API - Vision Support Test

In [None]:
# Responses API Vision Support Performance Comparison
# Note: Requires the deployed model to support vision and your API version to allow image input.
image_url = os.getenv("TEST_IMAGE_URL", "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Fronalpstock_big.jpg/640px-Fronalpstock_big.jpg")

inputs = [
    {"role": "user", "content": [
        {"type": "input_text", "text": "Summarize the main features of this image in three lines in English."},
        {"type": "input_image", "image_url": image_url},
    ]}
]

def test_responses_vision_gpt5():
    response = client.responses.create(
        model=azure_openai_gpt5_deployment_name, 
        input=inputs
    )
    print("--- GPT-5 Responses API Vision ---")
    print(response.model_dump_json(indent=2))
    return response.output_text if hasattr(response, 'output_text') else str(response.output)

def test_responses_vision_gpt41():
    response = client.responses.create(
        model=azure_openai_gpt41_deployment_name, 
        input=inputs
    )
    print("--- GPT-4.1 Responses API Vision ---")
    print(response.model_dump_json(indent=2))
    return response.output_text if hasattr(response, 'output_text') else str(response.output)

# Performance comparison
results = measure_and_compare("Responses API Vision Support", test_responses_vision_gpt5, test_responses_vision_gpt41)


=== Responses API Vision Support Performance Comparison ===

--- gpt-5-mini Results ---


--- GPT-5 Responses API Vision ---
{
  "id": "resp_68be9844c0b48194a86d57297e8270ad03ddd670ee72823a",
  "created_at": 1757321284.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "model": "gpt-5-mini",
  "object": "response",
  "output": [
    {
      "id": "rs_68be9845b3c0819499202e6ac6e0137a03ddd670ee72823a",
      "summary": [],
      "type": "reasoning",
      "content": null,
      "encrypted_content": null,
      "status": null
    },
    {
      "id": "msg_68be9847b8cc819488073838dce7274f03ddd670ee72823a",
      "content": [
        {
          "annotations": [],
          "text": "A wide alpine mountain range with steep rocky cliffs and densely forested slopes.  \nGreen meadows and patchwork valleys with scattered clearings at the base.  \nBright blue sky with scattered white clouds and distant snow-capped peaks.",
          "type": "output_text",
          "logprobs": null
        }
      ],
      "role": "assistant",
      "status":