In [1]:
import os
from pathlib import Path
from typing import Annotated

import nest_asyncio
import uvicorn
from fastapi import FastAPI, Request, WebSocket
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates

from autogen.agentchat.realtime_agent import FunctionObserver, RealtimeAgent, WebsocketAudioAdapter



In [2]:
# Configuration
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PORT = int(os.getenv("PORT", 5050))

if not OPENAI_API_KEY:
    raise ValueError("Missing the OpenAI API key. Please set it in the .env file.")

llm_config = {
    "timeout": 600,
    "cache_seed": 45,  # change the seed for different trials
    "config_list": [
        {
            "model": "gpt-4o-realtime-preview-2024-10-01",
            "api_key": OPENAI_API_KEY,
        }
    ],
    "temperature": 0.8,
}

In [3]:
nest_asyncio.apply()

In [None]:
app = FastAPI()

notebook_path = os.getcwd()

app.mount(
    "/static", StaticFiles(directory=Path(notebook_path) / "agentchat_realtime_websocket" / "static"), name="static"
)

# Templates for HTML responses

templates = Jinja2Templates(directory=Path(notebook_path) / "agentchat_realtime_websocket" / "templates")


@app.get("/", response_class=JSONResponse)
async def index_page():
    return {"message": "Websocket Audio Stream Server is running!"}


@app.get("/start-chat/", response_class=HTMLResponse)
async def start_chat(request: Request):
    """Endpoint to return the HTML page for audio chat."""
    port = PORT  # Extract the client's port
    return templates.TemplateResponse("chat.html", {"request": request, "port": port})


@app.websocket("/media-stream")
async def handle_media_stream(websocket: WebSocket):
    """Handle WebSocket connections providing audio stream and OpenAI."""
    await websocket.accept()

    audio_adapter = WebsocketAudioAdapter(websocket)
    realtime_agent = RealtimeAgent(
        name="Weather Bot",
        system_message="Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying How can I help you?",
        llm_config=llm_config,
        audio_adapter=audio_adapter,
    )

    @realtime_agent.register_realtime_function(name="get_weather", description="Get the current weather")
    def get_weather(location: Annotated[str, "city"]) -> str:
        return "The weather is cloudy." if location == "Seattle" else "The weather is sunny."

    await realtime_agent.run()


uvicorn.run(app, host="0.0.0.0", port=PORT)

INFO:     Started server process [2915440]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:5050 (Press CTRL+C to quit)


INFO:     127.0.0.1:60148 - "GET /start-chat/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:60148 - "GET /static/wavtools.js HTTP/1.1" 404 Not Found


INFO:     ('127.0.0.1', 60162) - "WebSocket /media-stream" [accepted]


OpenAIRealtimeClient.run(): started


INFO:     connection open


OpenAIRealtimeClient.run(): connected
initialize_session()
session_update: session_options={'turn_detection': {'type': 'server_vad'}, 'voice': 'alloy', 'instructions': 'Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying How can I help you?', 'modalities': ['audio', 'text'], 'temperature': 0.8}
send_text: role=user, text=Hi!
OpenAIRealtimeClient.run(): session initialized
OpenAIRealtimeClient.run(): create_task_group() finished.
Received event: SessionCreatedEvent(event_id='event_Aj5b9zsmOvHhI3z78k0ps', session=Session(id='sess_Aj5b8n5QGCOHFg2QwfA2W', input_audio_format='pcm16', input_audio_transcription=None, instructions="Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a l