In [None]:
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(
    base_url="http://localhost:4000/",
)

response = client.chat.completions.create(
    model="gpt-5",
    messages=[
        {"role": "system", "content": "You are a helpful assistant. Be concise."},
        {"role": "user", "content": "Say 'Hello from LLMProxy!' and nothing else."},
    ],
    # extra_body={"cache": {"no-cache": True}},
)

print(response.model_dump_json(indent=2))

In [27]:
# Completions API Streaming
client = OpenAI(
    base_url="http://localhost:4243/",
)
stream = client.chat.completions.create(
    model="gpt-4.1",
    messages=[
        {
            "role": "user",
            "content": "Write a one-sentence bedtime story about a unicorn.",
        },
    ],
    stream=True,
)

for chunk in stream:
    print(chunk.choices[0].delta.content)


Under
 a
 sil
very
 moon
,
 a
 gentle
 unicorn
 tip
to
ed
 through
 a
 dreamy
 forest
,
 sprink
ling
 st
ard
ust
 that
 made
 every
 child
â€™s
 sweetest
 wish
 come
 true
 as
 they
 slept
.
None


In [6]:
from openai import AsyncOpenAI

openai_client = AsyncOpenAI()
proxy_client = AsyncOpenAI(base_url="http://localhost:4243/")


async def main():
    stream = await proxy_client.responses.create(
        model="gpt-5",
        input="Say 'Hello from LLMProxy!' and nothing else.",
        stream=False,
        temperature=1.0,
        extra_body={"cache": {"no-cache": True}},
        reasoning={"effort": "minimal"},
        timeout=10,
    )

    return stream.output_text


await main()

'Hello from LLMProxy!'

In [31]:
from openai import AsyncOpenAI

openai_client = AsyncOpenAI()
proxy_client = AsyncOpenAI(base_url="http://localhost:4243/")


async def main():
    response = await proxy_client.responses.create(
        model="gpt-4.1",
        input="Write a one-sentence bedtime story about a unicorn. Come up with a very creative story.",
        temperature=0.0,
    )

    print(response.output_text)


await main()

Under a sky painted with shimmering constellations, a unicorn named Lira tiptoed across the clouds, planting seeds of laughter that blossomed into dreams for every sleeping child below.


In [35]:
client = OpenAI(
    base_url="http://localhost:4243/",
)

stream = client.responses.create(
    model="o4-mini",
    instructions="You are a helpful assistant.",
    input="Count from 1 to 5, one number at a time.",
    stream=True,
    # extra_body={"cache": {"no-cache": True}},
)
for event in stream:
    if event.type == "response.output_text.delta":
        print(event.delta)

1



2



3



4



5


In [10]:
from openai import AsyncOpenAI

openai_client = AsyncOpenAI()
proxy_client = AsyncOpenAI(base_url="http://localhost:4243/")


async def main():
    print("Proxy streaming:")
    stream = await proxy_client.responses.create(
        model="gpt-5-mini",
        input="Count from 1 to 20, one number at a time.",
        stream=True,
        temperature=1.0,
        extra_body={"cache": {"no-cache": False}},
        reasoning={"effort": "medium"},
    )

    async for event in stream:
        if event.type == "response.output_text.delta":
            print(event.delta)


await main()

Proxy streaming:
1


2


3


4


5


6


7


8


9


10


11


12


13


14


15


16


17


18


19


20
