# SDialog dependencies

In [1]:
# Setup the environment depending on weather we are running in Google Colab or Jupyter Notebook
from IPython import get_ipython


if "google.colab" in str(get_ipython()):
    print("Running on CoLab")

    # Installing Ollama (if you are not planning to use Ollama, you can just comment these lines to speed up the installation)
    !curl -fsSL https://ollama.com/install.sh | sh

    # Installing sdialog
    !git clone https://github.com/qanastek/sdialog.git
    %cd sdialog
    %pip install -e .
    %cd ..
else:
    print("Running in Jupyter Notebook")
    # Little hack to avoid the "OSError: Background processes not supported." error in Jupyter notebooks"
    import os
    get_ipython().system = os.system

Running in Jupyter Notebook


## Locally

Run following commands and then `Restart` your environment.

In [2]:
%%script false --no-raise-error
%pip install -e ..
%pip show sdialog

In order to run the next steps in a fast manner, we will start from an existing dialog generated using previous tutorials. If you haven't download yet `customer_support_dialogue.json` locally, please download it from our GitHub repository using the following command:

In [3]:
# If customer_support_dialogue.json is not present, download it
if os.path.exists("customer_support_dialogue.json"):
    print("customer_support_dialogue.json already exists")
else:
    !wget https://raw.githubusercontent.com/qanastek/sdialog/refs/heads/main/tests/data/customer_support_dialogue.json

customer_support_dialogue.json already exists


In [4]:
import os
import json
from tqdm import tqdm

In [5]:
import sdialog
from sdialog import Dialog

  from .autonotebook import tqdm as notebook_tqdm


# Load dialog

In [6]:
original_dialog = Dialog.from_file("customer_support_dialogue.json")
original_dialog.print()

[1m[95m[dialog_id] [35md9cb91a7-c0bc-4113-9a82-7e48fb3f6e29[0m
[1m[95m[complete] [35mTrue[0m
[1m[95m[model] [35m{'name': 'amazon:anthropic.claude-3-5-sonnet-20240620-v1:0', 'temperature': 0.7, 'max_tokens': 512, 'region_name': 'us-east-1'}[0m
[1m[95m[seed] [35m42[0m
[1m[35m--- Dialogue Begins ---[0m
[31m[John] [0mHello, this is John Smith. I'm calling for the third time this week about my defective product and no one is helping me! This is unacceptable![0m
[94m[Sarah] [0mGood afternoon, Mr. Smith. This is Sarah Johnson from customer support. I sincerely apologize for the frustration you've experienced with your product and the lack of resolution so far. I completely understand how disappointing this must be for you. I'm here to help and I assure you that we'll get to the bottom of this issue today. Could you please provide me with your order number so I can pull up your information and get started on resolving this for you right away?[0m
[31m[John] [0mOrder n

# Tutorial 8: Audio Generation

### Instanciate voices database

In [7]:
from sdialog.audio.voice_database import HuggingfaceVoiceDatabase
dummy_voice_database = HuggingfaceVoiceDatabase("sdialog/voices-kokoro")

[2025-10-13 01:17:33] INFO:root:Voice database populated with 2850 voices


Now we are trying to fetch a voice for a `20` years old `male`:

In [8]:
dummy_voice_database.get_voice(gender="male", age=20)

{'identifier': 'am_echo', 'voice': 'am_echo'}

### Instanciate TTS model

In [9]:
!pip install -q kokoro>=0.9.4 soundfile
!apt-get -qq -y install espeak-ng > /dev/null 2>&1

32512

In [10]:
from sdialog.audio.tts_engine import KokoroTTS
tts_engine = KokoroTTS()



## Setup stage: Audio Dialog and Audio Pipeline

In [11]:
from sdialog.audio.audio_dialog import AudioDialog
from sdialog.audio.audio_pipeline import AudioPipeline

Convert the original dialog into a audio enhanced dialog

In [12]:
dialog: AudioDialog = AudioDialog.from_dialog(original_dialog)

Identify speakers names:

In [13]:
print("Speaker 1:", dialog.speakers_names["speaker_1"])
print("Speaker 2:", dialog.speakers_names["speaker_2"])

Speaker 1: John
Speaker 2: Sarah


## Step 1 : Concatenated utterances

Instanciate the audio pipeline in order to use `Kokoro` (`tts_engine`) as the TTS model and save the outputs of all the dialogs into the directory `./audio_outputs`.

The voices are sampled from the `dummy_voice_database` based on the persona attributes `age` and `gender`, as assigned during the original textual dialog.

In [14]:
os.makedirs("./audio_outputs_customer_support", exist_ok=True)
audio_pipeline = AudioPipeline(
    voice_database=dummy_voice_database,
    tts_pipeline=tts_engine,
    dir_audio="./audio_outputs_customer_support",
)
# audio_pipeline = AudioPipeline() # Can also be used with default values

Perform the inference of the audio pipeline on the previously converted dialog. In this case we will focus on generating the "unprocessed" audio, which consist of the agregation of all utterances from the dialog. Rather than using the dialog identifier as the name of the directory, we are using here a custom directory name `demo_dialog_kokoro` which will be saved at `./audio_outputs_customer_support/demo_dialog_kokoro/`. 

In [15]:
# Generate the audio for the dialog
dialog: AudioDialog = audio_pipeline.inference(
    dialog,
    do_step_1=True,
    do_step_2=False,
    do_step_3=False,
    dialog_dir_name="demo_dialog_kokoro",
)

# Path to the audio of the first stage of the audio pipeline
print("Audio generated successfully at:", dialog.audio_step_1_filepath)

[2025-10-13 01:17:40] INFO:root:Dialog audio dir path: ./audio_outputs_customer_support
[2025-10-13 01:17:40] INFO:root:Generating utterances audios from dialogue d9cb91a7-c0bc-4113-9a82-7e48fb3f6e29
Generating utterances audios:   0%|          | 0/12 [00:00<?, ?it/s][W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.
Generating utterances audios: 100%|██████████| 12/12 [01:12<00:00,  6.04s/it]
[2025-10-13 01:18:53] INFO:root:Step 1 audio saved to ./audio_outputs_customer_support/demo_dialog_kokoro/exported_audios/audio_pipeline_step1.wav
[2025-10-13 01:18:53] INFO:root:Audio dialog saved to the existing file (d9cb91a7-c0bc-4113-9a82-7e48fb3f6e29) successfully at the end of the pipeline!


Audio generated successfully at: ./audio_outputs_customer_support/demo_dialog_kokoro/exported_audios/audio_pipeline_step1.wav


In [16]:
from IPython.display import Audio, display

display(Audio(dialog.audio_step_1_filepath, autoplay=True))