# [THIS TUTORIAL IS UNDER CONSTRUCTION]

# SDialog dependencies

In [None]:
# Setup the environment depending on weather we are running in Google Colab or Jupyter Notebook
import os
from IPython import get_ipython

if "google.colab" in str(get_ipython()):
    print("Running on CoLab")

    # Installing Ollama (if you are not planning to use Ollama, you can just comment these lines to speed up the installation)
    !curl -fsSL https://ollama.com/install.sh | sh

    # Installing sdialog
    !git clone https://github.com/qanastek/sdialog.git
    %cd sdialog
    %pip install -e .
    %cd ..
else:
    print("Running in Jupyter Notebook")
    # Little hack to avoid the "OSError: Background processes not supported." error in Jupyter notebooks"
    get_ipython().system = os.system

## Local installation

Create a `.venv` using the root `requirement.txt` file and Python `3.11.14`

In [None]:
from sdialog import Dialog

# Load an existing dialogue

In order to run the next steps in a fast manner, we will start from an existing dialog generated using previous tutorials:

In [None]:
original_dialog = Dialog.from_file("../tests/data/demo_dialog_doctor_patient.json")
original_dialog.print()

# Tutorial 11: Advanced room accoustics

### Instanciate voices database

In [None]:
from sdialog.audio.voice_database import HuggingfaceVoiceDatabase
dummy_voice_database = HuggingfaceVoiceDatabase("sdialog/voices-kokoro")
dummy_voice_database.get_voice(gender="male", age=20)

### Instanciate TTS model

In [None]:
!pip install -q kokoro>=0.9.4 soundfile
!apt-get -qq -y install espeak-ng > /dev/null 2>&1

In [None]:
from sdialog.audio.tts_engine import KokoroTTS
tts_engine = KokoroTTS()

## Setup stage: Audio Dialog and Audio Pipeline

In [None]:
from sdialog.audio.audio_dialog import AudioDialog
from sdialog.audio.audio_pipeline import AudioPipeline

Convert the original dialog into a audio enhanced dialog

In [None]:
audio_dialog: AudioDialog = AudioDialog.from_dialog(original_dialog)

Instanciate the audio pipeline in order to use `Kokoro` (`tts_engine`) as the TTS model and save the audios outputs of all the dialogs into the directory `./audio_outputs`.

The voices are sampled from the `dummy_voice_database` based on the persona attributes `age` and `gender`, as assigned during the original textual dialog.

In [None]:
!git clone https://github.com/cyrta/dscaper.git

In [None]:
!pip install -e ./dscaper/
!pip install scaper

In [None]:
import scaper
DATA_PATH = "./dscaper_data_multiple_rooms" # Path where the sound events, utterances and timelines database will be saved
os.makedirs(DATA_PATH, exist_ok=True)

In [None]:
dsc = scaper.Dscaper(dscaper_base_path=DATA_PATH)

In [None]:
os.makedirs("./audio_outputs_multiple_rooms", exist_ok=True)
audio_pipeline = AudioPipeline(
    voice_database=dummy_voice_database,
    tts_pipeline=tts_engine,
    dscaper=dsc,
    dir_audio="./audio_outputs_multiple_rooms",
)
# audio_pipeline = AudioPipeline() # Can also be used with default values

In [None]:
# Populate the sound events database
audio_pipeline.populate_dscaper(["sdialog/background","sdialog/foreground"])

In [None]:
import time
import random
from IPython.display import Audio, display
from sdialog.audio.room import Room, Dimensions3D

Perform the inference of the audio pipeline on the previously converted dialog. In this case we will focus on generating the "unprocessed" audio, which consist of the agregation of all utterances from the dialog. Rather than using the dialog identifier as the name of the directory, we are using here a custom directory name `demo_dialog_advanced_room_accoustic` which will be saved at `./audio_outputs/demo_dialog_advanced_room_accoustic/`. 

In [None]:
reverbs = [0.3, 0.5]
DIR_NAME = f"demo_dialog_advanced_room_accoustic_{time.time()}"
print(DIR_NAME)

In [None]:
# Generate stage 1 and 2
dialog: AudioDialog = audio_pipeline.inference(
    audio_dialog,
    do_step_1=True,
    do_step_2=True,
    do_step_3=False,
    dialog_dir_name=DIR_NAME,
)
print(f"Generating audio of stage 1 and 2 done!")

In [None]:
print(dialog.get_audio_sources())
print("#"*25)
display(Audio(dialog.audio_step_1_filepath, autoplay=False, rate=24000))
display(Audio(dialog.audio_step_2_filepath, autoplay=False, rate=24000))

In [None]:
from sdialog.audio.jsalt import MedicalRoomGenerator

In [None]:
NUMBER_OF_ROOMS = 20

In [None]:
# Generate audio of stage 3 for each setup
for _ in range(NUMBER_OF_ROOMS):

    print("Generating room n°", _)

    room = MedicalRoomGenerator().generate(args={"room_type": "random"})
    room.name = f"room_{time.time_ns()}"

    dialog: AudioDialog = audio_pipeline.inference(
        dialog,
        environment={
            "room": room,
        },
        do_step_1=False,
        do_step_2=False,
        do_step_3=True,
        dialog_dir_name=DIR_NAME,
        room_name=room.name
    )

In [None]:
print("-"*25)
print("- Room Configurations")
print("-"*25)
for config_name in dialog.audio_step_3_filepaths:
    print(f"> Room Configuration: {config_name}")
    display(Audio(dialog.audio_step_3_filepaths[config_name]["audio_path"], autoplay=False, rate=24000))