In [None]:
#install dependencies and libraries
%%bash
pip install TTS
pip install onnx
pip install onnxruntime
sudo apt-get install espeak-ng
pip install fastapi uvicorn pydantic pyngrok nest_asyncio
pip install python-multipart

In [1]:
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel
import uvicorn
import nest_asyncio
from pyngrok import ngrok
import tempfile
import numpy as np
from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.models.vits import Vits
from TTS.utils.audio.numpy_transforms import save_wav
from google.colab import userdata

nest_asyncio.apply()



In [None]:
#download the model and config file
!gdown 'https://drive.google.com/uc?id=1HmvdYfUYOKybF7B9_hjnvSKgVPH-LJFz' -O '/content/'
!gdown "https://drive.google.com/uc?id=1dCt9uGXPgOj41HcpnaSbwOT6b-KaOOOA" -O '/content/'

In [None]:
config_path="/content/config.json"
model_path="/content/coqui_vits.onnx"
config = VitsConfig()
config.load_json(config_path)
vits = Vits.init_from_config(config)
vits.load_onnx(model_path)

In [24]:
def tts_call(text:str):
  text_inputs = np.asarray(
      vits.tokenizer.text_to_ids(text, language="en"),
      dtype=np.int64,
  )[None, :]
  audio = vits.inference_onnx(text_inputs)
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
    out_path = temp_file.name
  save_wav(wav=audio[0], path=out_path,sample_rate=22050)
  return out_path

In [25]:
app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)

In [26]:
class TextRequest(BaseModel):
    text: str

class OutputText(BaseModel):
  answer: str

In [27]:
@app.get("/synthesize_audios")
def synthesize_audio(payload: TextRequest):
    # Call your TTS function to generate the audio file
    audio_file_path = tts_call(payload.text)
    # Return the audio file as the response
    return FileResponse(audio_file_path, media_type="audio/wav")

@app.get("/", response_model=OutputText)
async def root():
    return {"answer": "Hello from TTS"}

In [None]:
NGROK_TOKEN=userdata.get('NGROK_TOKEN')
!ngrok config add-authtoken {NGROK_TOKEN}

In [None]:
ngrok_tunnel = ngrok.connect(8000)
print("Public URL:", ngrok_tunnel.public_url)
uvicorn.run(app, port=8000)