# IndicF5 Gradio Demo

**Prerequisites:**
1. GPU runtime: Runtime → Change runtime type → T4 GPU
2. Request access to the model: https://huggingface.co/ai4bharat/IndicF5
3. Get your HuggingFace token: https://huggingface.co/settings/tokens

In [None]:
# Check GPU
import torch
if not torch.cuda.is_available():
    raise RuntimeError('❌ GPU not available!')
print(f'✅ GPU: {torch.cuda.get_device_name(0)}')

In [None]:
# Install dependencies (RESTART RUNTIME AFTER THIS CELL)
!pip uninstall -y numpy scipy -q
!pip install numpy==1.26.4 -q
!pip install scipy -q
!pip install 'transformers<4.50' accelerate -q
!pip install git+https://github.com/ai4bharat/IndicF5.git -q
!pip install gradio torchcodec soundfile -q
print('\n⚠️ RESTART RUNTIME NOW! Then skip this cell and run the next ones.')

In [None]:
# Login to HuggingFace (required for gated model)
from huggingface_hub import notebook_login
notebook_login()

In [None]:
import torch
import gradio as gr
import tempfile
import soundfile as sf
import numpy as np
from transformers import AutoModel

print('Loading IndicF5 model...')
model = AutoModel.from_pretrained('ai4bharat/IndicF5', trust_remote_code=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f'✅ Model loaded on {device}')

def synthesize(text, ref_audio, ref_text):
    if not text or ref_audio is None or not ref_text:
        return 'Error: Provide all inputs.'
    sr, data = ref_audio
    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
        sf.write(tmp.name, data, sr)
        out = model(text, ref_audio_path=tmp.name, ref_text=ref_text)
    print(f'[DEBUG] dtype={out.dtype}, min={out.min()}, max={out.max()}')
    if out.dtype == np.int16:
        out = out.astype(np.float32) / 32768.0
    return 24000, out

with gr.Blocks(title='IndicF5') as app:
    gr.Markdown('# IndicF5 TTS')
    with gr.Row():
        with gr.Column():
            txt = gr.Textbox(label='Text', lines=3)
            ref = gr.Audio(label='Reference Audio', type='numpy')
            ref_txt = gr.Textbox(label='Reference Text')
            btn = gr.Button('Generate', variant='primary')
        out = gr.Audio(label='Output')
    btn.click(synthesize, [txt, ref, ref_txt], [out])

app.launch(share=True, debug=True)