# SDialog dependencies

In [1]:
import os
import json
from tqdm import tqdm

In [None]:
import sdialog
from sdialog import Dialog
from sdialog.generators import PersonaGenerator
from sdialog.personas import Persona, PersonaAgent, Doctor, Patient, Agent

# Generate Persona

In [None]:
persona_card_folder = "./personas"

sdialog.config.set_llm("aws:anthropic.claude-3-5-sonnet-20240620-v1:0", region_name="us-east-1")

# Generate doctor persona
doctor_persona = Doctor(
    name="Dr. Smith",
    gender="male",
    age=52,
    specialty="Family Medicine"
)
generator_doctor = PersonaGenerator(doctor_persona)
persona_cards = generator_doctor.generate(n=1)
persona_cards.to_file(f"{persona_card_folder}/persona_doctor.json")

# Generate patient persona
patient_persona = Patient(
    name="John Doe",
    gender="male",
    age=62
)
generator_patient = PersonaGenerator(patient_persona)
persona_cards = generator_patient.generate(n=1)
persona_cards.to_file(f"{persona_card_folder}/persona_patient.json")


# Load persona

In [3]:
# Load personas
persona_doctor = Persona.from_file("./personas/persona_doctor.json")
persona_patient = Persona.from_file("./personas/persona_patient.json")

In [None]:
context = "Generate me a 50 turn medical dialogue between patient and doctor, for a primary care visit"

# Create agents
agent1 = PersonaAgent(persona=persona_doctor, name="DOCTOR", dialogue_details=context, response_details="make short turn answers when needed")
agent2 = PersonaAgent(persona=persona_patient, name="PATIENT", dialogue_details=context, response_details="make short turn answers when needed")

In [4]:
save_all = True
GENERATE_PERSONA = True
FORCE_DIALOG_GENERATION = False

os.makedirs("./outputs", exist_ok=True)

In [None]:
if FORCE_DIALOG_GENERATION:

    dialog = agent1.talk_with(agent2, max_turns=3)
    dialog.to_file("dialog_demo.json")

else:
    dialog = Dialog.from_file("dialog_demo.json")

dialog.print()

# Audio dependencies

# Instantiating voices database

In [6]:
from sdialog.audio.voice_database import DummyVoiceDatabase

In [None]:
dummy_voice_database = DummyVoiceDatabase()
print("Instantiating voice database...")

# Instantiating TTS pipeline

In [8]:
from sdialog.audio.tts_engine import KokoroTTS # ChatterboxTTS, XttsTTS

In [None]:
tts_pipeline = KokoroTTS()
# tts_pipeline = ChatterboxTTS()
# tts_pipeline = XttsTTS()
print("Instantiating TTS pipeline...")

In [10]:
from sdialog.audio.audio_dialog import AudioDialog

In [11]:
# Extend the turns with audio attributes
dialog: AudioDialog = AudioDialog.from_dialog(dialog)

In [12]:
from sdialog.audio import audio_pipeline

In [None]:
!pip install git+https://github.com/cyrta/dscaper.git

In [None]:
import scaper
DATA_PATH = "./dscaper_data"
dsc = scaper.Dscaper(dscaper_base_path=DATA_PATH)

In [None]:
from sdialog.audio.audio_pipeline import AudioPipeline

In [None]:
from sdialog.audio.audio_events_enricher import AudioEventsEnricher

In [None]:
audio_pipeline = AudioPipeline(
    voice_database=dummy_voice_database,
    tts_pipeline=tts_pipeline,
    dir_audio="./outputs",
    dscaper=dsc,
    # enricher = AudioEventsEnricher()
)

In [None]:
dialog: AudioDialog = audio_pipeline.inference(dialog)

In [None]:
# Enriching the dialog with audio events and generate the timeline of audio events and utterances
dialog = audio_pipeline.enrich(dialog)
dialog.timeline.print()
dialog.timeline.draw("./outputs/timeline.png")

## Add SNR and room location

In [None]:
# TODO: Add SNR and room position to the dialog

# Audio Evaluation

In [None]:
from sdialog.audio.evaluation import compute_evaluation_utterances, compute_evaluation_audio

In [None]:
# Utterances level evaluation
metrics_utterances_level = compute_evaluation_utterances(dialog)
for key, value in metrics_utterances_level.items():
    print(f"{key}: {value}")

# Audio level evaluation
metrics_audio_level = compute_evaluation_audio(dialog)
for key, value in metrics_audio_level.items():
    print(f"{key}: {value}")