In [None]:
import asyncio, base64, json, struct, uuid
import numpy as np
import sounddevice as sd
import websockets
import modal

SAMPLE_RATE = 16_000
CHUNK_MS = 100                # 0.1 s chunks
CHUNK_SAMPLES = SAMPLE_RATE * CHUNK_MS // 1000
CLIENT_ID = str(uuid.uuid4())

# 1. Ask Modal for your websocket URL
app = modal.App.lookup("")
Transcriber = app.cls["Transcriber"]
client_dict = modal.Dict.new()

async def resolve_ws_url():
    # register_client runs until you mark the dict entry false
    future = Transcriber.register_client.async_call(client_dict, CLIENT_ID)
    url = client_dict["url"]
    client_dict[CLIENT_ID] = False
    await future
    return url.replace("http", "ws")  # safety net

async def send_microphone(url: str):
    async with websockets.connect(url, ping_interval=20) as ws:
        await ws.send(json.dumps({"type": "start_client_session"}))
        await ws.send(json.dumps({"type": "set_vad", "vad": False}))

        in_stream = sd.InputStream(samplerate=SAMPLE_RATE, channels=1, dtype="int16")
        in_stream.start()

        async def reader():
            async for message in ws:
                print("Transcript:", message)

        async def writer():
            while True:
                frames, _ = in_stream.read(CHUNK_SAMPLES)
                payload = base64.b64encode(frames.tobytes()).decode()
                await ws.send(json.dumps({"type": "audio", "audio": payload}))

        await asyncio.gather(reader(), writer())

if __name__ == "__main__":
    url = asyncio.run(resolve_ws_url())
    asyncio.run(send_microphone(url))