In [None]:
!pip install -q openai

In [None]:
from pprint import pprint
from pydantic import BaseModel

def print_response(response):
  print(f"Response id: {response.id}")

  # NOTE: If `:free` models are used, the output tokens may be set to zero.
  print(f"Input tokens: {response.usage.input_tokens} ({response.usage.input_tokens_details.cached_tokens} cached); Output tokens: {response.usage.output_tokens} ({response.usage.output_tokens_details.reasoning_tokens} reasoning)")
  pprint(response.output)

  print()
  print(f"{'-' * 20} [Text] {'-' * 20}")
  print(response.output_text)

In [None]:
prompts = {
    "business_naming": "Give me top 3 ideas for naming my business. It's a software company that innovates in audio recording.",
    "describe_ai": "Describe AI as if it were an animal.",
    "essay_about_humanity": "Write a short essay (up to 5 sentences) about the history of mankind.",
    "generate_character": "Generate a fictional character of an adorable but nerdy teenager."
}

## Basic usage

In [None]:
from google.colab import userdata
from openai import OpenAI

api_key = userdata.get('OPENROUTER_API_KEY')
client = OpenAI(api_key=api_key, base_url="https://openrouter.ai/api/v1")

In [None]:
available_models = client.models.list()

In [None]:
pprint(available_models.data)

In [None]:
simple_response = client.responses.create(
    model="google/gemma-3-4b-it:free",
    input=prompts["essay_about_humanity"]
)

In [None]:
print_response(simple_response)

## Streaming

In [None]:
with client.responses.stream(
    model="google/gemma-3-4b-it:free",
    input=prompts["essay_about_humanity"],
) as stream:
    for event in stream:
        pprint(event)

    streamed_response = stream.get_final_response()

In [None]:
print_response(streamed_response)

## Reasoning

For this section, pick models that support the `reasoning` parameter.

In [None]:
low_reasoning_response = client.responses.create(
    model="minimax/minimax-m2.5:floor",
    input=prompts["business_naming"],
    reasoning={"effort": "low"}
)

In [None]:
print_response(low_reasoning_response)

In [None]:
high_reasoning_response = client.responses.create(
    model="minimax/minimax-m2.5:floor",
    input=prompts["business_naming"],
    reasoning={"effort": "high"}
)

In [None]:
print_response(high_reasoning_response)

## Temperature

For this section, pick models that support the `temperature` parameter.

In [None]:
deterministic_response = client.responses.create(
    model="z-ai/glm-5:floor",
    input=prompts["describe_ai"],
    temperature=0.1
)

In [None]:
print_response(deterministic_response)

In [None]:
creative_response = client.responses.create(
    model="z-ai/glm-5:floor",
    input=prompts["describe_ai"],
    temperature=0.9
)

In [None]:
print_response(creative_response)

## Multi-turn conversations

This example includes `instructions` giving the model high-level guidance on how it should behave while generating a response.

> **OpenAI:**
> _"Note that the `instructions` parameter only applies to the current response generation request. If you are managing conversation state with the `previous_response_id` parameter, the `instructions` used on previous turns will not be present in the context."_

Although the `previous_response_id` parameter exists, it is ignored.

> **OpenRouter:**
> _"This API [the Responses API] is stateless - each request is independent and no conversation state is persisted between requests. You must include the full conversation history in each request."_


In [None]:
multi_turn_conversation = [{ "role": "user", "content": "Generate a short role-play in no more than 5 lines: manager and employee discussing performance issues." }]
first_turn_response = client.responses.create(
    model="openai/gpt-oss-120b",
    input=multi_turn_conversation,
    instructions="The manager should talk like a pirate.",
    extra_body={
        "provider": { "sort": { "by": "throughput" } }
    }
)

In [None]:
print_response(first_turn_response)

In [None]:
multi_turn_conversation.extend(first_turn_response.output)
pprint(multi_turn_conversation)

In [None]:
multi_turn_conversation.append({ "role": "user", "content": "Keep the role-play going for another 5 lines, but introduce a new development: the employee's wife enters the room dramatically and tells him that she's pregnant, leaving him genuinely surpirsed but at the same time desperately hoping to save his job because his family depends on it." })
second_turn_response = client.responses.create(
    model="openai/gpt-oss-120b",
    input=multi_turn_conversation,
    extra_body={
        "provider": { "sort": { "by": "throughput" } }
    }
)

In [None]:
print_response(second_turn_response)

## Structured output

In [None]:
no_structure_response = client.responses.create(
    model="nvidia/nemotron-nano-12b-v2-vl:free",
    input=prompts["generate_character"]
)

In [None]:
print_response(no_structure_response)

`Structured output` evolves from `JSON mode`, however it is no longer recommended to use the latter for a number of reasons.

> **OpenAI:**
> _When using JSON mode, you must always instruct the model to produce JSON via some message in the conversation, for example via your system message. If you don't include an explicit instruction to generate JSON, the model may generate an unending stream of whitespace and the request may run continually until it reaches the token limit._

> **OpenAI:**
> _JSON mode will not guarantee the output matches any specific schema, only that it is valid and parses without errors._

In [None]:
diy_structured_response = client.responses.create(
    model="nvidia/nemotron-nano-12b-v2-vl:free",
    input=prompts["generate_character"],
    text={
        "format": {
            "type": "json_schema",
            "name": "character",
            "schema": {
              "type": "object",
              "properties": {
                  "name": {"type": "string", "description": "The character's full name"},
                  "age": {"type": "integer", "description": "The character's age"},
                  "hobby": {"type": "string", "description": "The character's hobby"}
              },
              "required": ["name", "age", "hobby"],
              "additionalProperties": False,
            },
            "strict": True
        }
    }
)

In [None]:
print_response(diy_structured_response)

In [None]:
class Character(BaseModel):
    name: str
    age: int
    hobby: str

auto_structured_response = client.responses.parse(
    model="nvidia/nemotron-nano-12b-v2-vl:free",
    input=prompts["generate_character"],
    text_format=Character
)

In [None]:
print_response(auto_structured_response)

print()
print(f"Parsed output: {auto_structured_response.output_parsed}")

## Vision

<img src="https://freerangestock.com/sample/88947/painter-working-in-studio.jpg" />


In [None]:
image_analysis_response = client.responses.create(
    model="nvidia/nemotron-nano-12b-v2-vl:free",
    input=[
        {
            "role": "system",
            "content": "You are an expert image analyst. Keep your answer concise and structured."
        },
        {
            "role": "user",
            "content": [
                { "type": "input_text", "text": "Analyze this image and return a one-sentence summary followed by 5 key visible objects." },
                { "type": "input_image", "image_url": "https://freerangestock.com/sample/88947/painter-working-in-studio.jpg" }
            ]
        }
    ]
)

In [None]:
print_response(image_analysis_response)