In [None]:
# WhisperX Colab Notebook (Stable, No Diarization)

# ✅ Step 1: Install dependencies (no diarization, stable torch)
!pip install git+https://github.com/m-bain/whisperx.git

# ✅ Step 2: Upload an audio file
from google.colab import files
uploaded = files.upload()
audio_file = list(uploaded.keys())[0]

# ✅ Step 3: Select language
import ipywidgets as widgets
from IPython.display import display

language_dropdown = widgets.Dropdown(
    options=[('Hindi', 'hi'), ('Tamil', 'ta'), ('Kannada', 'kn'), ('Marathi', 'mr'), ('English', 'en')],
    value='ta',
    description='Language:',
    disabled=False,
)
display(language_dropdown)

# ✅ Step 4: Run transcription (no diarization)
import whisperx
import torch
import json

try:
    if torch.cuda.is_available():
        model = whisperx.load_model("medium", device="cuda", compute_type="float16")
    else:
        model = whisperx.load_model("small", device="cpu", compute_type="float32")
except Exception as e:
    print("⚠️ CUDA unavailable or incompatible. Falling back to CPU.")
    print("Error:", e)
    model = whisperx.load_model("small", device="cpu", compute_type="float32")

transcription = model.transcribe(audio_file, language=language_dropdown.value)

# ✅ Step 5: Export structured transcript as JSON
json_output = {
    "language": transcription.get("language", language_dropdown.label),
    "segments": transcription["segments"]
}

with open("transcript.json", "w") as f:
    json.dump(json_output, f, indent=2, ensure_ascii=False)

files.download("transcript.json")
