In [7]:
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(
    base_url="http://localhost:4243/",
)

response = client.chat.completions.create(
    model="gpt-4.1",
    messages=[
        {"role": "system", "content": "You are a helpful assistant. Be concise."},
        {"role": "user", "content": "Say 'Hello from LLMProxy!' and nothing else."},
    ],
    extra_body={"cache": {"no-cache": True}},
)

print(response.model_dump_json(indent=2))

{
  "id": "chatcmpl-BgDiTUmOtnGcOo5szCT9TqpWvh0pu",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
        "content": "Hello from LLMProxy!",
        "refusal": null,
        "role": "assistant",
        "annotations": [],
        "audio": null,
        "function_call": null,
        "tool_calls": null
      },
      "content_filter_results": {
        "hate": {
          "filtered": false,
          "severity": "safe"
        },
        "protected_material_code": {
          "detected": false,
          "filtered": false
        },
        "protected_material_text": {
          "detected": false,
          "filtered": false
        },
        "self_harm": {
          "filtered": false,
          "severity": "safe"
        },
        "sexual": {
          "filtered": false,
          "severity": "safe"
        },
        "violence": {
          "filtered": false,
          "severity": "safe"
        }
      }
    }
  ],


In [8]:
# Completions API Streaming
client = OpenAI(
    base_url="http://localhost:4243/",
)
stream = client.chat.completions.create(
    model="gpt-4.1",
    messages=[
        {
            "role": "user",
            "content": "Write a one-sentence bedtime story about a unicorn.",
        },
    ],
    stream=True,
)

for chunk in stream:
    print(chunk.choices[0].delta.content)


Under
 a
 shimmering
 silver
 moon
,
 a
 gentle
 unicorn
 named
 Luna
 tip
to
ed
 through
 a
 field
 of
 glowing
 flowers
,
 spreading
 sweet
 dreams
 wherever
 her
 sparkling
 horn
 touched
 the
 night
.
None


In [9]:
from openai import AsyncOpenAI

openai_client = AsyncOpenAI()
proxy_client = AsyncOpenAI(base_url="http://localhost:4243/")


async def main():
    # stream = await openai_client.responses.create(
    #     model="gpt-4.1",
    #     input="Write a one-sentence bedtime story about a unicorn. Come up with a very creative story.",
    #     stream=True,
    #     temperature=1.0,
    # )

    # print("OpenAI streaming:")
    # async for event in stream:
    #     if event.type == "response.output_text.delta":
    #         print(event.delta)

    print("Proxy streaming:")
    stream = await proxy_client.responses.create(
        model="gpt-4.1",
        input="Write a one-sentence bedtime story about a unicorn. Come up with a very creative story.",
        stream=True,
        temperature=1.0,
    )

    async for event in stream:
        if event.type == "response.output_text.delta":
            print(event.delta)


await main()


Proxy streaming:
As
 the
 silver
 clock
 struck
 midnight
,
 a
 sky
-blue
 unicorn
 tip
to
ed
 across
 the
 clouds
,
 painting
 dreams
 into
 the
 stars
 with
 her
 shimmering
,
 rainbow
-s
wir
led
 tail
.


In [10]:
client = OpenAI(
    base_url="http://localhost:4243/",
)

stream = client.responses.create(
    model="o4-mini",
    instructions="You are a helpful assistant.",
    input="Count from 1 to 5, one number at a time.",
    stream=True,
    extra_body={"cache": {"no-cache": True}},
)
for event in stream:
    if event.type == "response.output_text.delta":
        print(event.delta)

1


In [11]:
from openai import AsyncOpenAI

openai_client = AsyncOpenAI()
proxy_client = AsyncOpenAI(base_url="http://localhost:4243/")


async def main():
    print("Proxy streaming:")
    stream = await proxy_client.responses.create(
        model="gpt-4.1",
        input="Count from 1 to 20, one number at a time.",
        stream=True,
        temperature=1.0,
        extra_body={"cache": {"no-cache": True}},
    )

    async for event in stream:
        if event.type == "response.output_text.delta":
            print(event.delta)


await main()


Proxy streaming:
1
  

2
  

3
  

4
  

5
  

6
  

7
  

8
  

9
  

10
  

11
  

12
  

13
  

14
  

15
  

16
  

17
  

18
  

19
  

20
