In [None]:
from huggingface_hub import login
login()


In [None]:
import subprocess
import sys

subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", 
                       "git+https://github.com/huggingface/parler-tts.git"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", 
                       "soundfile", "fastapi", "uvicorn", "nest-asyncio", "pyngrok"])

print("Installation complete")


In [None]:
import torch
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer

device = "cuda" if torch.cuda.is_available() else "cpu"

model = ParlerTTSForConditionalGeneration.from_pretrained(
    "ai4bharat/indic-parler-tts"
).to(device)

tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
description_tokenizer = AutoTokenizer.from_pretrained(
    model.config.text_encoder._name_or_path
)

print("Model loaded successfully")
print(f"Using device: {device}")


In [None]:
from fastapi import FastAPI, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel
import nest_asyncio
import uvicorn
from pyngrok import ngrok
from threading import Thread
import soundfile as sf
import numpy as np
import io

nest_asyncio.apply()


In [None]:
from fastapi import FastAPI, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel
import nest_asyncio
import uvicorn
from pyngrok import ngrok
from threading import Thread
import soundfile as sf
import numpy as np
import io

nest_asyncio.apply() 

app = FastAPI(
    title="Indic TTS API",
    description="Text-to-speech endpoint for LLM orchestration",
    version="1.0"
)

class TextInput(BaseModel):
    text: str
    description: str = "Rani speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls"

@app.get("/")
def root():
    return {
        "status": "online",
        "service": "Indic TTS API",
        "model": "ai4bharat/indic-parler-tts"
    }

@app.post("/generate-speech")
def generate_speech(input: TextInput):
    """Receives text from LLM and returns audio file"""
    try:
        description = input.description
        description_lower = description.lower()
        
        #  Log gender detection for debugging
        if any(name in description_lower for name in ["rohit", "aman", "jatin", "dinesh", "thomas"]):
            print(f" Using MALE voice: {description[:60]}...")
        else:
            print(f" Using FEMALE voice: {description[:60]}...")
        
        #  USE ORIGINAL DESCRIPTION 
        description_input_ids = description_tokenizer(
            description,
            return_tensors="pt"
        ).to(device)
        
        prompt_input_ids = tokenizer(
            input.text,
            return_tensors="pt"
        ).to(device)
        
        # Generate audio
        with torch.no_grad():
            generation = model.generate(
                input_ids=description_input_ids.input_ids,
                attention_mask=description_input_ids.attention_mask,
                prompt_input_ids=prompt_input_ids.input_ids,
                prompt_attention_mask=prompt_input_ids.attention_mask,
            )
        
        audio_arr = generation.cpu().numpy().squeeze()
        sample_rate = model.config.sampling_rate
        
        # Save to buffer
        buffer = io.BytesIO()
        sf.write(buffer, audio_arr, sample_rate, format="WAV")
        buffer.seek(0)
        
        return Response(
            content=buffer.read(),
            media_type="audio/wav",
            headers={"Content-Disposition": "attachment; filename=speech.wav"}
        )
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Speech generation failed: {str(e)}")

@app.get("/health")
def health_check():
    return {
        "status": "healthy",
        "gpu_available": torch.cuda.is_available(),
        "device": device
    }

print(" FastAPI application created - using original voice descriptions")


In [None]:
import getpass

ngrok_token = getpass.getpass("Token")
ngrok.set_auth_token(ngrok_token)

public_url = ngrok.connect(8000)

print("TTS API is now live")
print(f"Public URL: {public_url}")
print(f"API Documentation: {public_url}/docs")
print(f"Health Check: {public_url}/health")
print("\nEndpoint for LLM:")
print(f"POST {public_url}/generate-speech")
print('Request body: {"text": "your text here"}')



In [None]:
def run_server():
    uvicorn.run(
        app,
        host="0.0.0.0",
        port=8000,
        log_level="info"
    )

server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print("Server is running")
print("API is accessible via the ngrok URL above")
