In [None]:
!pip install faster-whisper websockets sounddevice numpy pycloudflared==0.0.12

In [None]:
from pyngrok import ngrok

# --- CONFIG ---
NGROK_AUTHTOKEN = "36IQ9DY9U2NemU2lnEeBZ1jVetQ_4j2AHyAk36BHPN1XWuvYH"  # <-- auth token
PORT = 8000

# --- LOGIN ---
ngrok.kill()  # Clean all old tunnels
ngrok.set_auth_token(NGROK_AUTHTOKEN)

# Start ngrok tunnel
tunnel = ngrok.connect(PORT, "http")
public_http = tunnel.public_url

# Convert https → wss
public_ws = public_http.replace("https://", "wss://").replace("http://", "ws://") + "/ws"

print("────────────────────────────────────────")
print("Your WebSocket URL (copy this into client.py):")
print(public_ws)
print("────────────────────────────────────────")


In [None]:
import asyncio
import json
import numpy as np
from collections import defaultdict, deque

import websockets
from faster_whisper import WhisperModel
from pyngrok import ngrok

# ----------------- CONFIG -----------------
SAMPLE_RATE = 16000
BUFFER_SECONDS = 10  # keep last 10 seconds of audio per call

# We'll use port 8765 inside Colab
WS_PORT = 8765

# ----------------- MODEL ------------------
print("Loading Faster-Whisper model...")
model = WhisperModel("small", device="cuda", compute_type="float16")
print("Model loaded on GPU.")

# call_id -> deque buffer
session_buffers = defaultdict(
    lambda: deque(maxlen=BUFFER_SECONDS * SAMPLE_RATE)
)

async def transcribe_call(call_id: str) -> str:
    buf = session_buffers[call_id]
    if len(buf) == 0:
        return ""

    audio_np = np.array(buf, dtype=np.float32)

    segments, info = model.transcribe(
        audio_np,
        language="en",       # or None to auto-detect
        vad_filter=True,
        beam_size=1,
        without_timestamps=True
    )

    text = " ".join(seg.text.strip() for seg in segments)
    return text


async def handler(websocket, path):
    print("Client connected:", websocket.remote_address)
    try:
        async for message in websocket:
            data = json.loads(message)
            call_id = data["call_id"]
            audio_hex = data["audio_hex"]

            audio_bytes = bytes.fromhex(audio_hex)
            audio_int16 = np.frombuffer(audio_bytes, dtype=np.int16)
            audio_float32 = audio_int16.astype(np.float32) / 32768.0

            buf = session_buffers[call_id]
            buf.extend(audio_float32.tolist())

            # For demo, transcribe on every chunk
            transcript = await transcribe_call(call_id)

            response = {
                "call_id": call_id,
                "partial_transcript": transcript,
            }
            await websocket.send(json.dumps(response))
    except websockets.exceptions.ConnectionClosed:
        print("Client disconnected:", websocket.remote_address)


async def main():
    # Start local WebSocket server in Colab VM
    print(f"Starting WebSocket server on 0.0.0.0:{WS_PORT} ...")
    server = await websockets.serve(handler, "0.0.0.0", WS_PORT)

    # Expose this port via ngrok so your laptop can reach it
    public_url = ngrok.connect(WS_PORT, "http").public_url
    print("Ngrok public URL:", public_url)
    print("Use this URL from your client (convert http -> ws)")

    await asyncio.Future()  # run forever

await main()