Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge fastapi server repo #3497

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 0 additions & 94 deletions .github/workflows/pypi-release.yml

This file was deleted.

46 changes: 0 additions & 46 deletions .github/workflows/style_check.yml

This file was deleted.

3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,5 @@ wandb
depot/*
coqui_recipes/*
local_scripts/*
coqui_demos/*
coqui_demos/*
fastapi-server/demo_outputs/*
9 changes: 9 additions & 0 deletions fastapi-server/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM python:3.10

RUN mkdir -p /code/test
WORKDIR /code
COPY demo.py test/default_speaker.json /code/
COPY test/ /code/test/
RUN apt-get -y update && apt-get -y install ffmpeg
RUN pip install gradio requests
CMD ["python", "demo.py"]
121 changes: 121 additions & 0 deletions fastapi-server/demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import gradio as gr
import requests
import base64
import tempfile
import json
import os


SERVER_URL = os.environ.get("TTS_API_ENDPOINT", 'http://localhost:8888')
OUTPUT = "./demo_outputs"
cloned_speakers = {}

print("Preparing file structure...")
if not os.path.exists(OUTPUT):
os.mkdir(OUTPUT)
os.mkdir(os.path.join(OUTPUT, "cloned_speakers"))
os.mkdir(os.path.join(OUTPUT, "generated_audios"))
elif os.path.exists(os.path.join(OUTPUT, "cloned_speakers")):
print("Loading existing cloned speakers...")
for file in os.listdir(os.path.join(OUTPUT, "cloned_speakers")):
if file.endswith(".json"):
with open(os.path.join(OUTPUT, "cloned_speakers", file), "r") as fp:
cloned_speakers[file[:-5]] = json.load(fp)
print("Available cloned speakers:", ", ".join(cloned_speakers.keys()))

try:
print("Getting metadata from server ...")
LANUGAGES = requests.get(SERVER_URL + "/languages").json()
print("Available languages:", ", ".join(LANUGAGES))
STUDIO_SPEAKERS = requests.get(SERVER_URL + "/studio_speakers").json()
print("Available studio speakers:", ", ".join(STUDIO_SPEAKERS.keys()))
except:
raise Exception("Please make sure the server is running first.")


def clone_speaker(upload_file, clone_speaker_name, cloned_speaker_names):
files = {"wav_file": ("reference.wav", open(upload_file, "rb"))}
embeddings = requests.post(SERVER_URL + "/clone_speaker", files=files).json()
with open(os.path.join(OUTPUT, "cloned_speakers", clone_speaker_name + ".json"), "w") as fp:
json.dump(embeddings, fp)
cloned_speakers[clone_speaker_name] = embeddings
cloned_speaker_names.append(clone_speaker_name)
return upload_file, clone_speaker_name, cloned_speaker_names, gr.Dropdown(choices=cloned_speaker_names)

def tts(text, speaker_type, speaker_name_studio, speaker_name_custom, lang):
embeddings = STUDIO_SPEAKERS[speaker_name_studio] if speaker_type == 'Studio' else cloned_speakers[speaker_name_custom]
generated_audio = requests.post(
SERVER_URL + "/tts",
json={
"text": text,
"language": lang,
"speaker_embedding": embeddings["speaker_embedding"],
"gpt_cond_latent": embeddings["gpt_cond_latent"]
}
).content
generated_audio_path = os.path.join("demo_outputs", "generated_audios", next(tempfile._get_candidate_names()) + ".wav")
with open(generated_audio_path, "wb") as fp:
fp.write(base64.b64decode(generated_audio))
return fp.name

with gr.Blocks() as demo:
cloned_speaker_names = gr.State(list(cloned_speakers.keys()))
with gr.Tab("TTS"):
with gr.Column() as row4:
with gr.Row() as col4:
speaker_name_studio = gr.Dropdown(
label="Studio speaker",
choices=STUDIO_SPEAKERS.keys(),
value="Asya Anara" if "Asya Anara" in STUDIO_SPEAKERS.keys() else None,
)
speaker_name_custom = gr.Dropdown(
label="Cloned speaker",
choices=cloned_speaker_names.value,
value=cloned_speaker_names.value[0] if len(cloned_speaker_names.value) != 0 else None,
)
speaker_type = gr.Dropdown(label="Speaker type", choices=["Studio", "Cloned"], value="Studio")
with gr.Column() as col2:
lang = gr.Dropdown(label="Language", choices=LANUGAGES, value="en")
text = gr.Textbox(label="text", value="A quick brown fox jumps over the lazy dog.")
tts_button = gr.Button(value="TTS")
with gr.Column() as col3:
generated_audio = gr.Audio(label="Generated audio", autoplay=True)
with gr.Tab("Clone a new speaker"):
with gr.Column() as col1:
upload_file = gr.Audio(label="Upload reference audio", type="filepath")
clone_speaker_name = gr.Textbox(label="Speaker name", value="default_speaker")
clone_button = gr.Button(value="Clone speaker")

clone_button.click(
fn=clone_speaker,
inputs=[upload_file, clone_speaker_name, cloned_speaker_names],
outputs=[upload_file, clone_speaker_name, cloned_speaker_names, speaker_name_custom],
)

tts_button.click(
fn=tts,
inputs=[text, speaker_type, speaker_name_studio, speaker_name_custom, lang],
outputs=[generated_audio],
)

if __name__ == "__main__":
print("Warming up server...")
with open("test/default_speaker.json", "r") as fp:
warmup_speaker = json.load(fp)
resp = requests.post(
SERVER_URL + "/tts",
json={
"text": "This is a warmup request.",
"language": "en",
"speaker_embedding": warmup_speaker["speaker_embedding"],
"gpt_cond_latent": warmup_speaker["gpt_cond_latent"],
}
)
resp.raise_for_status()
print("Starting the demo...")
demo.launch(
share=False,
debug=True,
server_port=3009,
server_name="0.0.0.0",
)
Loading
Loading