# SDialog dependencies

In [None]:
# If dialog_demo.json is not present, download it
!wget https://raw.githubusercontent.com/qanastek/sdialog/refs/heads/main/misc/audio/dialog_demo.json

In [None]:
# Link the sdialog module to the local directory if not done yet
!ln -s ../src/sdialog ./sdialog

In [None]:
import os
import json
from tqdm import tqdm

In [None]:
import sdialog
from sdialog import Dialog

# Load dialog

In [None]:
original_dialog = Dialog.from_file("dialog_demo.json")
original_dialog.print()

# Audio Generation

### Instanciate voices database

In [None]:
from sdialog.audio.voice_database import DummyKokoroVoiceDatabase
dummy_voice_database = DummyKokoroVoiceDatabase()
dummy_voice_database.get_voice(genre="male", age=20)

### Instanciate TTS model

In [None]:
from sdialog.audio.tts_engine import KokoroTTS
tts_engine = KokoroTTS()

## Setup stage: Audio Dialog and Audio Pipeline

In [None]:
from sdialog.audio.audio_dialog import AudioDialog
from sdialog.audio.audio_pipeline import AudioPipeline

Convert the original dialog into a audio enhanced dialog

In [None]:
dialog: AudioDialog = AudioDialog.from_dialog(original_dialog)

Instanciate the audio pipeline in order to use `Kokoro` (`tts_engine`) as the TTS model and save the audios outputs of all the dialogs into the directory `./audio_outputs`.

The voices are sampled from the `dummy_voice_database` based on the persona attributes `age` and `gender`, as assigned during the original textual dialog.

In [None]:
%%script false --no-raise-error
!git clone https://github.com/cyrta/dscaper.git
%pip install -r ../../../requirements-dscaper.txt

In [None]:
import scaper
DATA_PATH = "./dscaper_data" # Path where the sound events, utterances and timelines database will be saved
os.makedirs(DATA_PATH, exist_ok=True)

In [None]:
dsc = scaper.Dscaper(dscaper_base_path=DATA_PATH)

In [None]:
os.makedirs("./audio_outputs", exist_ok=True)
audio_pipeline = AudioPipeline(
    voice_database=dummy_voice_database,
    tts_pipeline=tts_engine,
    dscaper=dsc,
    dir_audio="./audio_outputs",
)
# audio_pipeline = AudioPipeline() # Can also be used with default values

In [None]:
# Populate the sound events database
audio_pipeline.populate_dscaper(["sdialog/background","sdialog/foreground"])

In [None]:
from sdialog.audio.room_generator import RoomGenerator, RoomRole

In [None]:
room = RoomGenerator().generate(RoomRole.CONSULTATION, room_size=8.0)
print(room)

Perform the inference of the audio pipeline on the previously converted dialog. In this case we will focus on generating the "unprocessed" audio, which consist of the agregation of all utterances from the dialog. Rather than using the dialog identifier as the name of the directory, we are using here a custom directory name `demo_dialog_kokoro` which will be saved at `./audio_outputs/demo_dialog_kokoro/`. 

In [None]:
from sdialog.audio.room import MicrophonePosition

In [None]:
dialog: AudioDialog = audio_pipeline.inference(
    dialog,
    room=room, # Need to provide a room object to trigger the 3rd step of the audio pipeline
    microphone_position=MicrophonePosition.CEILING_CENTERED, # Default is MicrophonePosition.MONITOR
    do_step_1=True,
    do_step_2=True,
    do_step_3=True,
    dialog_dir_name="demo_dialog_room_accoustic",
    room_name="my_room_config_1"
)
print(dialog.audio_step_1_filepath)
print(dialog.audio_step_2_filepath)
print(dialog.audio_step_3_filepaths)