# SDialog dependencies

In [None]:
# Setup the environment depending on weather we are running in Google Colab or Jupyter Notebook
import os
from IPython import get_ipython

if "google.colab" in str(get_ipython()):
    print("Running on CoLab")

    # Installing sdialog
    !git clone https://github.com/qanastek/sdialog.git
    %cd sdialog
    %pip install -e .
    %cd ..
else:
    print("Running in Jupyter Notebook")
    # Little hack to avoid the "OSError: Background processes not supported." error in Jupyter notebooks"
    get_ipython().system = os.system

## Local installation

Create a `.venv` using the root `requirement.txt` file and Python `3.11.14`

In [None]:
from sdialog import Dialog
from IPython.display import display

# Load an existing dialogue

In order to run the next steps in a fast manner, we will start from an existing dialog generated using previous tutorials:

In [None]:
path_dialog = "../tests/data/demo_dialog_doctor_patient.json"

if not os.path.exists(path_dialog) and not os.path.exists("./demo_dialog_doctor_patient.json"):
    !wget https://raw.githubusercontent.com/qanastek/sdialog/refs/heads/main/tests/data/demo_dialog_doctor_patient.json
    path_dialog = "./demo_dialog_doctor_patient.json"

original_dialog = Dialog.from_file(path_dialog)
original_dialog.print()

# Tutorial 17: Impulse response and recording devices simulation

The key objective of this tutorial is to apply different microphone impulse responses to the audio obtains after the accoustics simulation of the room, allowing you to hear how the dialogue would sound as if recorded on various real-world devices.

In [None]:
from sdialog.audio.dialog import AudioDialog
from sdialog.audio.pipeline import AudioPipeline

Convert the original dialog into a audio enhanced dialog

In [None]:
dialog: AudioDialog = AudioDialog.from_dialog(original_dialog)

Load dSCAPER:

In [None]:
import scaper
DATA_PATH = "./dscaper_data_impulse_response"
os.makedirs(DATA_PATH, exist_ok=True)
dsc = scaper.Dscaper(dscaper_base_path=DATA_PATH)

Load the database of impulse reponses files available on our HuggingFace. You can also here create you own local database of IR files by using `LocalImpulseResponseDatabase`:

In [None]:
from sdialog.audio.impulse_response_database import HuggingFaceImpulseResponseDatabase
impulse_response_database = HuggingFaceImpulseResponseDatabase("sdialog/impulse-responses")

As we usually done in the previous tutorials, we are instantiating an `AudioPipeline` with `dscaper` since we are running all 3 steps of the pipeline, while also adding the new parameter `impulse_response_database` for the microphone simulation:

In [None]:
os.makedirs("./audio_outputs_impulse_response", exist_ok=True)
audio_pipeline = AudioPipeline(
    dir_audio="./audio_outputs_impulse_response",
    dscaper=dsc,
    impulse_response_database=impulse_response_database
)

In [None]:
audio_pipeline.populate_dscaper(["sdialog/background","sdialog/foreground"])

Now let's generate a simple examination room:

In [None]:
from sdialog.audio.room import DirectivityType
from sdialog.audio.utils import SourceVolume, SourceType
from sdialog.audio.jsalt import MedicalRoomGenerator, RoomRole

In [None]:
room = MedicalRoomGenerator().generate(args={"room_type": RoomRole.EXAMINATION})
img = room.to_image()
display(img)
img.save("room.png")

And place speakers around the desk:

In [None]:
from sdialog.audio.room import SpeakerSide, Role, RoomPosition

In [None]:
room.place_speaker_around_furniture(speaker_name=Role.SPEAKER_1, furniture_name="desk", max_distance=1.0, side=SpeakerSide.FRONT)
room.place_speaker_around_furniture(speaker_name=Role.SPEAKER_2, furniture_name="desk", max_distance=1.0, side=SpeakerSide.BACK)

You can visualize the new positions of the speakers:

In [None]:
img = room.to_image()
display(img)

And finally, we are generating the audios with room accoustics and the impulse response from a `SHURE_SM57` microphone:

In [None]:
from sdialog.audio.processing import RecordingDevice

In [None]:
dialog: AudioDialog = audio_pipeline.inference(
    dialog,
    environment={
        "room": room,
        "background_effect": "white_noise",
        "foreground_effect": "ac_noise_minimal",
        "foreround_effect_position": RoomPosition.TOP_RIGHT,
        "source_volumes": {
            SourceType.ROOM: SourceVolume.HIGH,
            SourceType.BACKGROUND: SourceVolume.VERY_LOW
        },
        "kwargs_pyroom": {
            "ray_tracing": True,
            "air_absorption": True
        }
    },
    do_step_1=True,
    do_step_2=True,
    do_step_3=True,
    dialog_dir_name="demo_impulse_response",
    room_name="my_room_demo_shure",
    re_sampling_rate=16000,
    recording_devices=[RecordingDevice.SHURE_SM57]
)

The recording devices are simulated after the 3rd step and re-sampling on the `room_name` you specified. All audios are saved within the step 3 metadata as paths (`audio_paths_post_processing`) pointing directly to them:

In [None]:
dialog.display()

### You can also perform microphone simulation by using the `to_audio` function from the `Dialog` class:

The first snippet are generating audios for the same room, with the same settings, but using two differents microphones configurations: `SENNHEISER_E906` and `SHURE_SM57`

In [None]:
my_audio_dialog = original_dialog.to_audio(
    do_step_1=True,
    do_step_2=True,
    do_step_3=True,
    dir_audio="./audio_outputs_impulse_response",
    dialog_dir_name="demo_impulse_response_to_audio",
    room_name="my_room_demo_shure_to_audio_3",
    recording_devices=[RecordingDevice.SENNHEISER_E906, RecordingDevice.SHURE_SM57]
)

This second snippet is generating another room, with only one microphone (`SHURE_SM57`):

In [None]:
my_audio_dialog = original_dialog.to_audio(
    do_step_1=True,
    do_step_2=True,
    do_step_3=True,
    dir_audio="./audio_outputs_impulse_response",
    dialog_dir_name="demo_impulse_response_to_audio",
    room_name="my_room_demo_shure_to_audio_4",
    recording_devices=[RecordingDevice.SHURE_SM57]
)

In [None]:
my_audio_dialog.display()

### Local impulse response database

Impulse response database can be loaded from remote HF storage or locally. To do so, you need to give a `metadata_file` (at CSV / TSV / JSON format) which contains all information about the data and where they are stored. The required columns are `identifier,file_name,cab,speaker,microphone`.

The paths need to be relative to `directory`, like so `./audio/my_ir.wav`.

In [None]:
from sdialog.audio.impulse_response_database import LocalImpulseResponseDatabase

You can start by downloading and extracting the archive that contains one example of impulse response like so:

In [None]:
import os

# If directory my_custom_voices is not present, download it
if os.path.exists("my_custom_ir"):
    print("my_custom_ir already exists")
else:
    !wget https://raw.githubusercontent.com/qanastek/sdialog/refs/heads/main/tests/data/my_custom_ir.zip -d my_custom_ir
    !unzip my_custom_ir.zip
    !rm my_custom_ir.zip

Once done, you can use `LocalImpulseResponseDatabase` to load those data:

In [None]:
local_ir_database = LocalImpulseResponseDatabase(
    metadata_file="/Users/yanislabrak/Downloads/impulse-responses/metadata.json",  # Can be a json, csv, tsv file
    directory="/Users/yanislabrak/Downloads/impulse-responses"
)

In [None]:
print("Number of impulse responses in the database:", len(local_ir_database.get_data()))

In [None]:
my_local_audio_dialog = original_dialog.to_audio(
    do_step_1=True,
    do_step_2=True,
    do_step_3=True,
    dir_audio="./audio_outputs_impulse_response",
    dialog_dir_name="demo_impulse_response_to_audio",
    room_name="my_room_demo_shure_to_audio_5",
    impulse_response_database=local_ir_database,
    recording_devices=[RecordingDevice.SHURE_SM57]
)

In [None]:
my_local_audio_dialog.display()