# IndicF5 Gradio Demo

**Prerequisites:**
1. GPU runtime: Runtime → Change runtime type → T4 GPU
2. Request access: https://huggingface.co/ai4bharat/IndicF5
3. Get HF token: https://huggingface.co/settings/tokens

In [None]:
import torch
if not torch.cuda.is_available():
    raise RuntimeError('❌ GPU not available!')
print(f'✅ GPU: {torch.cuda.get_device_name(0)}')

In [None]:
# Install (RESTART RUNTIME AFTER THIS)
!pip uninstall -y numpy scipy -q
!pip install numpy==1.26.4 scipy -q
!pip install 'transformers<4.50' accelerate -q
!pip install git+https://github.com/ai4bharat/IndicF5.git -q
!pip install gradio torchcodec soundfile requests -q
print('\n⚠️ RESTART RUNTIME! Then skip this cell.')

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
import torch, gradio as gr, tempfile, soundfile as sf, numpy as np, requests, io
from transformers import AutoModel

# Load audio from URL
def load_audio_url(url):
    r = requests.get(url)
    data, sr = sf.read(io.BytesIO(r.content))
    return sr, (data * 32768).astype(np.int16) if data.dtype == np.float64 else data

# Example prompts
EXAMPLES = [
    {'name': 'PAN_F (Happy)', 'url': 'https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/PAN_F_HAPPY_00002.wav',
     'ref_text': 'ਇੱਕ ਗ੍ਰਾਹਕ ਨੇ ਸਾਡੀ ਬੇਮਿਸਾਲ ਸੇਵਾ ਬਾਰੇ ਦਿਲੋਂਗਵਾਹੀ ਦਿੱਤੀ ਜਿਸ ਨਾਲ ਸਾਨੂੰ ਅਨੰਦ ਮਹਿਸੂਸ ਹੋਇਆ।',
     'synth': 'मैं बिना किसी चिंता के अपने दोस्तों को अपने ऑटोमोबाइल एक्सपर्ट के पास भेज देता हूँ।'},
    {'name': 'TAM_F (Happy)', 'url': 'https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/TAM_F_HAPPY_00001.wav',
     'ref_text': 'நான் நெனச்ச மாதிரியே அமேசான்ல பெரிய தள்ளுபடி வந்திருக்கு.',
     'synth': 'ഭക്ഷണത്തിന് ശേഷം തൈര് സാദം കഴിച്ചാൽ ഒരു ഉഷാറാണ്!'},
    {'name': 'KAN_F (Happy)', 'url': 'https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/KAN_F_HAPPY_00001.wav',
     'ref_text': 'ನಮ್‌ ಫ್ರಿಜ್ಜಲ್ಲಿ  ಕೂಲಿಂಗ್‌ ಸಮಸ್ಯೆ ಆಗಿ ನಾನ್‌ ಭಾಳ ದಿನದಿಂದ ಒದ್ದಾಡ್ತಿದ್ದೆ.',
     'synth': 'চেন্নাইয়ের শেয়ারের অটোর যাত্রীদের মধ্যে খাবার ভাগ করে খাওয়াটা আমার কাছে মন খুব ভালো করে দেওয়া একটা বিষয়।'},
]

print('Loading examples...')
for ex in EXAMPLES:
    ex['sr'], ex['data'] = load_audio_url(ex['url'])
print('✅ Examples loaded')

print('Loading IndicF5...')
model = AutoModel.from_pretrained('ai4bharat/IndicF5', trust_remote_code=True)
model = model.to('cuda')
print('✅ Model loaded')

def synthesize(text, ref_audio, ref_text):
    if not text or ref_audio is None or not ref_text:
        return None
    sr, data = ref_audio
    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
        sf.write(tmp.name, data, sr)
        out = model(text, ref_audio_path=tmp.name, ref_text=ref_text)
    print(f'[DEBUG] min={out.min()}, max={out.max()}')
    if out.dtype == np.int16:
        out = out.astype(np.float32) / 32768.0
    return 24000, out

def load_example(name):
    ex = next((e for e in EXAMPLES if e['name'] == name), None)
    if ex:
        return (ex['sr'], ex['data']), ex['ref_text'], ex['synth']
    return None, '', ''

with gr.Blocks(title='IndicF5') as app:
    gr.Markdown('# IndicF5 Text-to-Speech')
    
    example_dropdown = gr.Dropdown([e['name'] for e in EXAMPLES], label='Load Example')
    
    with gr.Row():
        with gr.Column():
            txt = gr.Textbox(label='Text to synthesize', lines=3)
            ref = gr.Audio(label='Reference Audio', type='numpy')
            ref_txt = gr.Textbox(label='Reference Text')
            btn = gr.Button('Generate', variant='primary')
        out = gr.Audio(label='Output')
    
    example_dropdown.change(load_example, [example_dropdown], [ref, ref_txt, txt])
    btn.click(synthesize, [txt, ref, ref_txt], [out])

app.launch(share=True, debug=True)