# SDialog dependencies

In [1]:
import os
import json
from tqdm import tqdm

In [2]:
import sdialog
from sdialog import Dialog

  from .autonotebook import tqdm as notebook_tqdm


# Load dialog

In [3]:
original_dialog = Dialog.from_file("dialog_demo.json")
original_dialog.print()

[1m[95m[dialog_id] [35m1752861530588[0m
[1m[95m[model] [35mclient=<botocore.client.BedrockRuntime object at 0x7fc319ec91c0> model_id='anthropic.claude-3-5-sonnet-20240620-v1:0' region_name='us-east-1' provider='anthropic' supports_tool_choice_values=('auto', 'any', 'tool')[0m
[1m[95m[seed] [35m226296126[0m
[1m[35m--- Dialogue Begins ---[0m
[31m[DOCTOR] [0mHello there. I'm Dr. Smith. Welcome to my office. What brings you in to see me today?[0m
[94m[PATIENT] [37mGood morning, Dr. Smith. I'm John Doe. I've been dealing with a persistent cough and feeling pretty tired for about three weeks now. It's starting to wear me down, and I thought I should get it checked out.[0m
[31m[DOCTOR] [0mI'm sorry to hear you've been feeling unwell, Mr. Doe. A persistent cough and fatigue can certainly be troublesome. Let's get some more details. Can you describe your cough? Is it dry or productive?[0m
[94m[PATIENT] [37mWell, Dr. Smith, it's mostly a dry cough. It's particularly bot

# Audio Generation

### Instanciate voices database

In [4]:
from sdialog.audio.voice_database import DummyKokoroVoiceDatabase
dummy_voice_database = DummyKokoroVoiceDatabase()
dummy_voice_database.get_voice(genre="male", age=20)

{'identifier': 'am_fenrir', 'voice': 'am_fenrir'}

### Instanciate TTS model

In [5]:
from sdialog.audio.tts_engine import KokoroTTS
tts_engine = KokoroTTS()



## Setup stage: Audio Dialog and Audio Pipeline

In [6]:
from sdialog.audio.audio_dialog import AudioDialog
from sdialog.audio.audio_pipeline import AudioPipeline

Convert the original dialog into a audio enhanced dialog

In [7]:
dialog: AudioDialog = AudioDialog.from_dialog(original_dialog)

Instanciate the audio pipeline in order to use `Kokoro` (`tts_engine`) as the TTS model and save the outputs of all the dialogs into the directory `./outputs`.

The voices are sampled from the `dummy_voice_database` based on the persona attributes `age` and `gender`, as assigned during the original textual dialog.

In [8]:
%%script false --no-raise-error
!git clone https://github.com/cyrta/dscaper.git
%pip install -r ../../../requirements-dscaper.txt

In [9]:
import scaper
DATA_PATH = "./dscaper_data" # Path where the sound events, utterances and timelines database will be saved
os.makedirs(DATA_PATH, exist_ok=True)

In [10]:
dsc = scaper.Dscaper(dscaper_base_path=DATA_PATH)

In [11]:
os.makedirs("./outputs", exist_ok=True)
audio_pipeline = AudioPipeline(
    voice_database=dummy_voice_database,
    tts_pipeline=tts_engine,
    dscaper=dsc,
    dir_audio="./outputs",
)
# audio_pipeline = AudioPipeline() # Can also be used with default values

In [12]:
# Populate the sound events database
audio_pipeline.populate_dscaper(["sdialog/background","sdialog/foreground"])

Populating dSCAPER with sdialog/background dataset...: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]
Populating dSCAPER with sdialog/foreground dataset...: 100%|██████████| 8/8 [00:00<00:00, 20.87it/s]


{'count_existing_audio_files': 12,
 'count_error_audio_files': 0,
 'count_success_audio_files': 0}

In [13]:
from sdialog.audio.room_generator import RoomGenerator, RoomRole

In [14]:
room = RoomGenerator().generate(RoomRole.CONSULTATION, room_size=8.0)
print(room)

1036:  consultation_room, desc: consultation room (dimentions: dim: [3.577708763999664, 2.23606797749979, 3.0], rt60: 0.5) role: consultation)  


Perform the inference of the audio pipeline on the previously converted dialog. In this case we will focus on generating the "unprocessed" audio, which consist of the agregation of all utterances from the dialog. Rather than using the dialog identifier as the name of the directory, we are using here a custom directory name `demo_dialog_kokoro` which will be saved at `./outputs/demo_dialog_kokoro/`. 

In [15]:
from sdialog.audio.room import MicrophonePosition

In [None]:
dialog: AudioDialog = audio_pipeline.inference(
    dialog,
    room=room, # Need to provide a room object to trigger the 3rd step of the audio pipeline
    microphone_position=MicrophonePosition.CEILING_CENTERED, # Default is MicrophonePosition.MONITOR
    do_step_1=True,
    do_step_2=True,
    do_step_3=True,
    dialog_dir_name="demo_dialog_room_accoustic",
    room_name="my_room_config_1"
)
print(dialog.audio_step_1_filepath)
print(dialog.audio_step_2_filepath)
print(dialog.audio_step_3_filepaths)

[2025-09-29 23:25:56] INFO:root:Dialog audio dir path: ./outputs
[2025-09-29 23:25:56] INFO:root:Audio dialog loaded from the existing file (1752861530588) successfully!
[2025-09-29 23:25:56] INFO:root:Audio sources from dSCAPER loaded in the dialog (1752861530588) successfully!
[2025-09-29 23:25:56] INFO:root:Generating room accoustic for dialogue 1752861530588
[2025-09-29 23:25:56] INFO:root:  Microphone set to position [0.9944271909999159, 0.33541019662496846, 1.2]
[2025-09-29 23:25:56] INFO:root:  Microphone set to position [1.788854381999832, 1.118033988749895, 2.9]
[2025-09-29 23:25:56] INFO:root:✓ Loaded audio file './dscaper_data/timelines/demo_dialog_room_accoustic/generate/b426e1ad-50d5-49a4-96b2-d9f06d3d1732/soundscape_positions/no_type.wav' for 'no_type' with 1918350 samples
[2025-09-29 23:25:56] INFO:root:✓ Loaded audio file './dscaper_data/timelines/demo_dialog_room_accoustic/generate/b426e1ad-50d5-49a4-96b2-d9f06d3d1732/soundscape_positions/doctor-at_desk_sitting.wav' fo

./outputs/demo_dialog_room_accoustic/exported_audios/audio_pipeline_step1.wav
./outputs/demo_dialog_room_accoustic/exported_audios/audio_pipeline_step2.wav


NameError: name 'jdialog' is not defined