In [None]:
!pip install "fastapi[standard]" nest-asyncio pyngrok uvicorn openai transformers -qq

# Must DO:
1. Go and Signup at [`ngrok`](https://dashboard.ngrok.com/signup) (One time thing)
2. After Successful login go to `https://dashboard.ngrok.com/get-started/setup/linux` and you'll see something like the below code line. Run it with you token
3. Just run every line as it is
4. Once your app will run it'll show random url like  `Public URL:https://2127-35-186-148-235.ngrok-free.app` (it'll change everytime you run the code)
5. Paste the above URL in your local `config.json -> transcription -> API_ENDPOINT`. If it is empty there, your local will model in your system
6. Run your local app as usual given in directory

In [None]:

!ngrok config add-authtoken <YOUR_NGROK_TOKEN>

# Load Whisper and Expose `/transcribe` Endpoint

In [None]:
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import asyncio, re, torch
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from typing import List
import asyncio
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import logging
from pydantic import BaseModel
import json
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np
import base64
from typing import Optional
import asyncio
from transformers import pipeline
import logging


# Whisper settings
WHISPER_LANGUAGE = "english"
TRANSCRIPTION_MODEL_NAME = "openai/whisper-large-v3-turbo"
MAX_SENTENCE_CHARACTERS = 128

# Diarization settings
NUM_SPEAKERS = 2

device_name = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu")
device = torch.device(device_name)
torch_dtype = torch.bfloat16

# ------ Transcription Helpers ------
transcription_model = AutoModelForSpeechSeq2Seq.from_pretrained(
    TRANSCRIPTION_MODEL_NAME, torch_dtype=torch_dtype, low_cpu_mem_usage=True
)
transcription_model.to(device)

processor = AutoProcessor.from_pretrained(TRANSCRIPTION_MODEL_NAME)

transcription_pipeline = pipeline(
    "automatic-speech-recognition",
    model=transcription_model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    chunk_length_s = 30, #  min(LENGTH_IN_SEC, 30)
    torch_dtype=torch_dtype,
    device=device,
)


logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)


app = FastAPI()
app.add_middleware(
    CORSMiddleware, allow_origins=['*'], allow_credentials=True, allow_methods=['*'], allow_headers=['*'])

class AudioRequest(BaseModel):
    audio_data: str  # Base64 encoded audio data

class TranscriptionResponse(BaseModel):
    text: str
    timestamps: Optional[list] = None
    error: Optional[str] = None

async def process_transcription(audio_array: np.ndarray, sample_rate: int = 16000, language: str = "english", return_timestamps: bool = False):
    """
    Process the audio array through the transcription pipeline.
    """
    try:
        return await asyncio.to_thread(
            transcription_pipeline,
            {"array": audio_array, "sampling_rate": sample_rate},
            return_timestamps=return_timestamps,
            generate_kwargs={
                "language": language,
                "return_timestamps": return_timestamps,
                "max_new_tokens": MAX_SENTENCE_CHARACTERS
            }
        )
    except Exception as e:
        logger.error(f"Transcription processing error: {e}")
        raise HTTPException(status_code=500, detail=f"Transcription processing failed: {str(e)}")


@app.post("/transcribe", response_model=TranscriptionResponse)
async def transcribe_audio(request: AudioRequest):
    """
    Endpoint to receive audio data and return transcription.

    Expects base64 encoded audio data in the request body.
    Returns transcription text and optional timestamps.
    """
    try:
        # Decode base64 audio data
        try:
            audio_bytes = base64.b64decode(request.audio_data)
        except Exception as e:
            raise HTTPException(status_code=400, detail="Invalid base64 audio data")

        # Convert to numpy array (assuming 16-bit PCM audio)
        try:
            audio_array = np.frombuffer(audio_bytes, np.int16).astype(np.float32) / 32768.0
        except Exception as e:
            raise HTTPException(status_code=400, detail="Failed to process audio data")

        # Validate audio data
        if len(audio_array) == 0:
            raise HTTPException(status_code=400, detail="Empty audio data")

        # Process transcription
        result = await process_transcription(audio_array)

        return TranscriptionResponse(text=result["text"])

    except HTTPException as he:
        raise he
    except Exception as e:
        logger.error(f"Unexpected error in transcribe_audio: {e}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

# Optional: Add a health check endpoint
@app.get("/health")
async def health_check():
    return {"status": "healthy"}

# Run Code


Copy the `Public URL` in your local of `/config`. Everytime you run the below code, it'll be a different URL so need to change in the config file too

In [None]:
import torch, nest_asyncio, uvicorn
from pyngrok import ngrok

ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)