# SDialog dependencies

In [None]:
import os
import json
from tqdm import tqdm

In [None]:
import sdialog
from sdialog import Dialog

# Load dialog

In [None]:
original_dialog = Dialog.from_file("dialog_demo.json")
original_dialog.print()

# Audio Generation

### Instanciate voices database

If you get any timeout for the download of the voices database, please use the HugggingFace CLI command `huggingface-cli download --repo-type dataset sdialog/voices-libritts`.

In [None]:
from sdialog.audio.voice_database import HuggingfaceVoiceDatabase
voices_libritts = HuggingfaceVoiceDatabase("sdialog/voices-libritts")
voices_libritts.get_voice(genre="male", age=20)

### Instanciate TTS model

In [None]:
from sdialog.audio.tts_engine import IndexTTS
tts_engine = IndexTTS(device="cpu")

## Setup stage: Audio Dialog and Audio Pipeline

In [None]:
from sdialog.audio.audio_dialog import AudioDialog
from sdialog.audio.audio_pipeline import AudioPipeline

Convert the original dialog into a audio enhanced dialog

In [None]:
dialog: AudioDialog = AudioDialog.from_dialog(original_dialog)

## Step 1 : Concatenated utterances

Instanciate the audio pipeline in order to use `IndexTTS` (`tts_engine`) as the TTS model and save the outputs of all the dialogs into the directory `./outputs`.

The voices are sampled from the `voices_libritts` based on the persona attributes `age` and `gender`, as assigned during the original textual dialog.

In [None]:
os.makedirs("./outputs", exist_ok=True)
audio_pipeline = AudioPipeline(
    voice_database=voices_libritts,
    tts_pipeline=tts_engine,
    dir_audio="./outputs",
)
# audio_pipeline = AudioPipeline() # Can also be used with default values

Perform the inference of the audio pipeline on the previously converted dialog. In this case we will focus on generating the "unprocessed" audio, which consist of the agregation of all utterances from the dialog. Rather than using the dialog identifier as the name of the directory, we are using here a custom directory name `demo_dialog_indextts` which will be saved at `./outputs/demo_dialog_indextts/`. 

In [None]:
# Generate the audio for the dialog
dialog: AudioDialog = audio_pipeline.inference(
    dialog,
    do_step_1=True,
    do_step_2=False,
    do_step_3=False,
    dialog_dir_name="demo_dialog_indextts",
)

# Path to the audio of the first stage of the audio pipeline
print("Audio generated successfully at:", dialog.audio_step_1_filepath)