In [1]:
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(
    base_url="http://localhost:4243/",
)

response = client.chat.completions.create(
    model="gpt-4.1",
    messages=[
        {"role": "system", "content": "You are a helpful assistant. Be concise."},
        {"role": "user", "content": "Say 'Hello from LLMProxy!' and nothing else."},
    ],
    extra_body={"cache": {"no-cache": True}},
)

print(response.model_dump_json(indent=2))

{
  "id": "chatcmpl-Bfofdlfkp6mp1PNpSGOkvsiHqWT40",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
        "content": "Hello from LLMProxy!",
        "refusal": null,
        "role": "assistant",
        "annotations": [],
        "audio": null,
        "function_call": null,
        "tool_calls": null
      },
      "content_filter_results": {
        "hate": {
          "filtered": false,
          "severity": "safe"
        },
        "protected_material_code": {
          "detected": false,
          "filtered": false
        },
        "protected_material_text": {
          "detected": false,
          "filtered": false
        },
        "self_harm": {
          "filtered": false,
          "severity": "safe"
        },
        "sexual": {
          "filtered": false,
          "severity": "safe"
        },
        "violence": {
          "filtered": false,
          "severity": "safe"
        }
      }
    }
  ],


In [2]:
# Completions API Streaming
client = OpenAI(
    base_url="http://localhost:4243/",
)
stream = client.chat.completions.create(
    model="gpt-4.1",
    messages=[
        {
            "role": "user",
            "content": "Write a one-sentence bedtime story about a unicorn.",
        },
    ],
    stream=True,
)

for chunk in stream:
    print(chunk.choices[0].delta.content)


B
ene
ath
 a
 sil
very
 moon
,
 a
 gentle
 unicorn
 tip
to
ed
 through
 a
 field
 of
 dreams
,
 leaving
 tw
ink
ling
 stars
 wherever
 her
 hoof
s
 touched
 the
 grass
.
None


In [6]:
from openai import AsyncOpenAI

openai_client = AsyncOpenAI()
proxy_client = AsyncOpenAI(base_url="http://localhost:4243/")


async def main():
    stream = await openai_client.responses.create(
        model="gpt-4.1",
        input="Write a one-sentence bedtime story about a unicorn. Come up with a very creative story.",
        stream=True,
        temperature=1.0,
    )

    print("OpenAI streaming:")
    async for event in stream:
        if event.type == "response.output_text.delta":
            print(event.delta)

    print("Proxy streaming:")
    stream = await proxy_client.responses.create(
        model="gpt-4.1",
        input="Write a one-sentence bedtime story about a unicorn. Come up with a very creative story.",
        stream=True,
        temperature=1.0,
    )

    async for event in stream:
        if event.type == "response.output_text.delta":
            print(event.delta)


await main()


OpenAI streaming:
Under
 a
 sky
 dust
ed
 with
 diamond
 stars
,
 Luna
 the
 unicorn
 soared
 above
 the
 clouds
,
 bra
iding
 moon
be
ams
 into
 her
 mane
 to
 scatter
 gentle
 dreams
 for
 every
 child
 asleep
 below
.
Proxy streaming:
In
 a
 hidden
 valley
 of
 floating
 moon
flowers
,
 a
 st
arl
it
 unicorn
 composed
 lull
abies
 with
 her
 shimmering
 horn
,
 weaving
 dreams
 so
 magical
 that
 even
 the
 Mil
ky
 Way
 paused
 to
 listen
.


In [5]:
# response = client.chat.completions.create(
#     model="gpt-4.1",
#     messages=[
#         {"role": "system", "content": "You are a helpful assistant. Be concise."},
#         {"role": "user", "content": "Say 'Hello from LLMProxy!' and nothing else."},
#     ],
#     extra_body={"cache": {"no-cache": True}},
# )

response = client.responses.create(
    model="gpt-4.1",
    input=[
        {
            "role": "user",
            "content": "Write a one-sentence bedtime story about a unicorn.",
        }
    ],
    # extra_body={"cache": {"no-cache": True}},
)

print(response.model_dump_json(indent=2))

{
  "id": "msg_38e1becaf51943d9bec9fc163d635a4b",
  "created_at": null,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": null,
  "model": "gpt-4.1-2025-04-14",
  "object": "response",
  "output": [
    {
      "id": "msg_38e1becaf51943d9bec9fc163d635a4b",
      "content": [
        {
          "annotations": [],
          "text": "Under a velvet moon, a gentle unicorn tiptoed through a field of glowing flowers, leaving sparkling dreams for all the children asleep nearby.",
          "type": "output_text",
          "logprobs": null
        }
      ],
      "role": "assistant",
      "status": null,
      "type": "message"
    }
  ],
  "parallel_tool_calls": null,
  "temperature": null,
  "tool_choice": null,
  "tools": null,
  "top_p": null,
  "background": null,
  "max_output_tokens": null,
  "previous_response_id": null,
  "reasoning": null,
  "service_tier": null,
  "status": null,
  "text": null,
  "truncation": null,
  "usage": {
    "input_toke