# OpenAI Responses API Examples

In [10]:
from openai import OpenAI

In [11]:
client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key="no-need"
)

## Text input

### Non-streaming response

In [12]:
response = client.responses.create(
  model="mlx-local-model",
  input="Tell me a three sentence bedtime story about a unicorn."
)

print(response)


Response(id='resp_000e7d933b6e41379381f7970b316b56', created_at=1771748881.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='mlx-local-model', object='response', output=[ResponseReasoningItem(id='rs_000e7d933b6e41379381f7970b316b56', summary=[Summary(text='Okay, user wants a three-sentence bedtime story about a unicorn. That\'s a sweet request - probably for a child or someone needing lulling content. \n\nHmm, bedtime stories need to be soothing but magical. Should avoid scary elements since it\'s for sleep. Unicorn tropes are safe: sparkle, gentle nature, dreams. \n\nUser didn\'t specify audience age, so I\'ll aim for universal charm - simple words but vivid imagery. Key is making it feel complete in exactly three sentences. First sentence sets scene, second adds wonder, third resolves peacefully. \n\n*checks structure* \nSentence 1: Establishing calm (moonlight, meadow) \nSentence 2: Unicorn\'s magical action (shimmering horn) \nSentence 3: Emotional pa

### Streaming response

In [14]:
response = client.responses.create(
  model="mlx-local-model",
  input="Tell me a three sentence bedtime story about a unicorn.",
  stream=True
)

for chunk in response:
  print(chunk)


ResponseCreatedEvent(response=Response(id='resp_05565b3160e54a60897dd6d2daa653c5', created_at=1771748906.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='mlx-local-model', object='response', output=[], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, background=False, conversation=None, max_output_tokens=100000, max_tool_calls=None, previous_response_id=None, prompt=None, prompt_cache_key=None, prompt_cache_retention=None, reasoning=None, safety_identifier=None, service_tier='auto', status='in_progress', text=None, top_logprobs=None, truncation='disabled', usage=None, user=None), sequence_number=0, type='response.created')
ResponseInProgressEvent(response=Response(id='resp_05565b3160e54a60897dd6d2daa653c5', created_at=1771748906.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='mlx-local-model', object='response', output=[], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', to

## Image input

### Non-streaming response

In [17]:
response = client.responses.create(
    model="mlx-local-model",
    input=[
        {
            "role": "user",
            "content": [
                { "type": "input_text", "text": "what is in this image?"},
                {
                    "type": "input_image",
                    "image_url": "examples/images/password.jpg",
                    "detail": "low"
                }
            ]
        }
    ]
)

print(response)


Response(id='resp_e729e4a81d644db18e01413731637ae3', created_at=1771749057.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='mlx-local-model', object='response', output=[ResponseReasoningItem(id='rs_e729e4a81d644db18e01413731637ae3', summary=[Summary(text='So, let\'s look at the image. It\'s a printed receipt or a Wi-Fi information slip. The top says "Ashley Hotel West Coast" with a line. Then "WiFi Internet Service". Next, details: Username: fppp, Password: aeeu, Traffic: 1 GB, Price: 0.00, Validity: 1w (wait, maybe "Validity" is a typo, should be "Validity: 1w"? Wait, the text says "Validity: 1w"? Wait, no, looking again: "Validity: 1w"? Wait, the original text has "Validity: 1w"? Wait, let\'s check the user\'s image description.\n\nWait, the user provided the image description: "Ashley Hotel West Coast\\nWiFi Internet Service\\nUsername: fppp\\nPassword: aeeu\\nTraffic: 1 GB\\nPrice: 0.00\\nValidity: 1w\\nStarts: Now\\nShared Users: 4\\n27/02/2019 11:0

### Streaming response

In [19]:
response = client.responses.create(
    model="mlx-local-model",
    input=[
        {
            "role": "user",
            "content": [
                { "type": "input_text", "text": "what is in this image?"},
                {
                    "type": "input_image",
                    "image_url": "examples/images/password.jpg",
                    "detail": "low"
                }
            ]
        }
    ],
    stream=True
)

for chunk in response:
  print(chunk)


ResponseCreatedEvent(response=Response(id='resp_ea847c2767c142b6b87baa54c855d2fd', created_at=1771749070.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='mlx-local-model', object='response', output=[], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, background=False, conversation=None, max_output_tokens=100000, max_tool_calls=None, previous_response_id=None, prompt=None, prompt_cache_key=None, prompt_cache_retention=None, reasoning=None, safety_identifier=None, service_tier='auto', status='in_progress', text=None, top_logprobs=None, truncation='disabled', usage=None, user=None), sequence_number=0, type='response.created')
ResponseInProgressEvent(response=Response(id='resp_ea847c2767c142b6b87baa54c855d2fd', created_at=1771749070.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='mlx-local-model', object='response', output=[], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', to

## Functions

In [None]:
tools = [
    {
        "type": "function",
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
          "type": "object",
          "properties": {
              "location": {
                  "type": "string",
                  "description": "The city and state, e.g. San Francisco, CA",
              },
              "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
          },
          "required": ["location", "unit"],
        }
    }
]

### Non-streaming response

In [21]:
response = client.responses.create(
  model="mlx-local-model",
  tools=tools,
  input="What is the weather like in Boston today?",
  tool_choice="auto"
)

print(response)

Response(id='resp_d603c021b0b34922b428c496496ba285', created_at=1771749176.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='mlx-local-model', object='response', output=[ResponseReasoningItem(id='rs_d603c021b0b34922b428c496496ba285', summary=[Summary(text='Okay, the user is asking about the weather in Boston today. Let me check the tools available. There\'s a function called get_current_weather that takes location and unit as parameters. The location needs to be a city and state, like "San Francisco, CA". The user mentioned Boston, but didn\'t specify the state. Wait, Boston is in Massachusetts, so the location should be "Boston, MA". The unit parameter is required and has to be either celsius or fahrenheit. Since the user didn\'t specify, maybe I should default to a common one. In the US, people usually use Fahrenheit, but sometimes people might expect Celsius. Hmm, but the question is in English, so probably Fahrenheit. Let me check the tool\'s required

### Streaming response

In [22]:
response = client.responses.create(
  model="mlx-local-model",
  tools=tools,
  input="What is the weather like in Boston today?",
  tool_choice="auto",
  stream=True
)

for chunk in response:
  print(chunk)


ResponseCreatedEvent(response=Response(id='resp_4670d32e8fef4e33af3b2e6732428865', created_at=1771749186.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='mlx-local-model', object='response', output=[], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[FunctionTool(name='get_current_weather', parameters={'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location', 'unit']}, strict=None, type='function', description='Get the current weather in a given location')], top_p=1.0, background=False, conversation=None, max_output_tokens=100000, max_tool_calls=None, previous_response_id=None, prompt=None, prompt_cache_key=None, prompt_cache_retention=None, reasoning=None, safety_identifier=None, service_tier='auto', status='in_progress', text=None, top_logprobs=None, truncation='disabled', usag

## Structured Outputs

In [23]:
from pydantic import BaseModel

In [37]:
class Address(BaseModel):
    street: str
    city: str
    state: str
    zip: str

response = client.responses.parse(
    model="mlx-local-model",
    input=[
        {
            "role": "system",
            "content": "You are an assistant that can answer questions and help with tasks."
        },
        {"role": "user", "content": "Format: 1 Hacker Wy Menlo Park CA 94025"}
    ],
    text_format=Address,
)

address = response.output_parsed

print(address)

street='1 Hacker Wy' city='Menlo Park' state='CA' zip='94025'
