In [1]:
import sys
import os
import re
import datetime
import json

sys.path.append(os.getcwd())

from backend import config
from backend.modelClasses import PromptHandler, LLMWrapper, TeacherAgent
from backend.tts import TTSProcessor
from backend.pipeline import load_scenario, run_pipeline
from backend.utils import reformat_transcript_to_list, upload_to_s3, download_from_s3
from backend.prompts.scenarios import *


  from .autonotebook import tqdm as notebook_tqdm


### Scenario Prep

In [20]:
# Read the scenario from a file
scenario_file_path = "./backend/prompts/scenarios/cafe.txt"
with open(scenario_file_path, "r", encoding="utf-8") as file:
    scenario = file.read()
 

### Transcript Generation

In [12]:
# Create the prompt handler and transcript generator instances
prompt_handler = PromptHandler(config.SYSTEM_PROMPT_TEMPLATE_PATH)
transcript_generator = LLMWrapper(config.MODEL_NAME, temperature=0.0)


language = "French"
country_name = "France"
characters = config.CHARACTER_CONFIG[language][country_name]
character_1 = characters["speaker_1"]
character_2 = characters["speaker_2"]


# Format the prompt and generate the transcript
prompt = prompt_handler.format_prompt(
    scenario=scenario,
    country_name=country_name,
    language=language,
    character_1_name=character_1["name"],
    character_2_name=character_2["name"]
)
print("----- Prompt -----")
print(prompt)
raw_transcript = transcript_generator.generate(prompt)

print("----- Raw Transcript -----")
print(raw_transcript)


----- Prompt -----
You are an expert content creator for French language learners. Your role is to produce a **natural-sounding, French-only conversation** between two characters, strictly following the formatting rules.

## **Scenario:**  
The conversation must be based on the following scenario: **A tourist enters a busy cafe and goes up to the counter to order. 
They don't know exactly what to order and want some advice from the server. 
After ordering and paying with card they take a seat and wait for their order to arrive. 
Once it arrives they interact with the server and finsih their order and then leave.**.

## **Requirements:**
1. **French Only**
   - **Do not include any English text** except for character names.
   - Reflect the **local dialect** and **culture** of France in your French.
   - Add some disfluencies and repetitions to make it sound more human. Use commas (,) and disfluencies like (uh) to set the pace. **This is very important**.

2. **Fixed Characters**
   - Y

In [13]:
# Create the teacher agent instance
teacher_agent = TeacherAgent(
    template_path=config.TEACHER_PROMPT_TEMPLATE_PATH,
    model_name=config.MODEL_NAME,
    temperature=0.0,
)

# Generate the explained transcript using keyword arguments for additional context
explained_transcript = teacher_agent.explain(
    raw_transcript, 
    scenario=scenario, 
    country_name=country_name,
    language=language,
    character_1_name=character_1["name"],  
    character_2_name=character_2["name"]   
)

print("----- Explained Transcript -----")
print(explained_transcript)


----- Explained Transcript -----
Charles: "Bonjour, madame, je suis un touriste et je suis un peu perdu ici. Peut-être que vous pouvez me donner quelques conseils pour mon repas?"

Maestro: Charles is greeting Sophie politely and asking for dining recommendations as a lost tourist. Note the use of the formal "vous" instead of "tu" when addressing someone of unknown age or social status.

Sophie: "Bien sûr, monsieur. Nous avons une variété de plats traditionnels. Vous aimez le fromage ou les poissons?"

Maestro: Sophie is offering her assistance and mentioning the traditional dishes available. She is asking if Charles prefers cheese or fish dishes.

Charles: "Uh, je suis un peu indécis. J'aime les plats simples, mais je suis aussi curieux. Quel est votre recommandation?"

Maestro: Charles is expressing his indecisiveness and curiosity. Note the use of the informal "je suis" instead of "je suis un peu" to emphasize his indecision.

Sophie: "Je recommande le quiche Lorraine. C'est un plat

In [14]:
unique_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
conversation_dir = os.path.join(config.GENERATED_OUTPUT_DIR, unique_id)
os.makedirs(conversation_dir, exist_ok=True)
final_audio_file = os.path.join(conversation_dir, "final_conversation.wav")


In [15]:
final_audio_file

'./data/conversations/20250316_123207/final_conversation.wav'

In [16]:
transcript_lines = reformat_transcript_to_list(explained_transcript)
transcript_lines

['Charles: "Bonjour, madame, je suis un touriste et je suis un peu perdu ici. Peut-être que vous pouvez me donner quelques conseils pour mon repas?"',
 'Maestro: Charles is greeting Sophie politely and asking for dining recommendations as a lost tourist. Note the use of the formal "vous" instead of "tu" when addressing someone of unknown age or social status.',
 'Sophie: "Bien sûr, monsieur. Nous avons une variété de plats traditionnels. Vous aimez le fromage ou les poissons?"',
 'Maestro: Sophie is offering her assistance and mentioning the traditional dishes available. She is asking if Charles prefers cheese or fish dishes.',
 'Charles: "Uh, je suis un peu indécis. J\'aime les plats simples, mais je suis aussi curieux. Quel est votre recommandation?"',
 'Maestro: Charles is expressing his indecisiveness and curiosity. Note the use of the informal "je suis" instead of "je suis un peu" to emphasize his indecision.',
 'Sophie: "Je recommande le quiche Lorraine. C\'est un plat traditionn

In [17]:
tts_processor = TTSProcessor(api_key=config.ELEVEN_API_KEY)

In [18]:
tts_processor.process_transcript(
    transcript_lines=transcript_lines,
    output_dir=conversation_dir,
    final_audio_file=final_audio_file,
    language=language,
    country=country_name,
    characters=characters,
    local_storage=True
    
)

print("Audio generated for each line, and combined audio saved at:")
print(final_audio_file)

Synthesizing line 1 for 'Charles' using voice 'hv6gVog5LgtIUX88Nmq8'...
Synthesizing line 2 for 'Maestro' using voice 'JBFqnCBsd6RMkjVDRZzb'...
Synthesizing line 3 for 'Sophie' using voice 'pVsdIxxCbRrbiwOhiRwg'...
Synthesizing line 4 for 'Maestro' using voice 'JBFqnCBsd6RMkjVDRZzb'...
Synthesizing line 5 for 'Charles' using voice 'hv6gVog5LgtIUX88Nmq8'...
Synthesizing line 6 for 'Maestro' using voice 'JBFqnCBsd6RMkjVDRZzb'...
Synthesizing line 7 for 'Sophie' using voice 'pVsdIxxCbRrbiwOhiRwg'...
Synthesizing line 8 for 'Maestro' using voice 'JBFqnCBsd6RMkjVDRZzb'...
Synthesizing line 9 for 'Charles' using voice 'hv6gVog5LgtIUX88Nmq8'...
Synthesizing line 10 for 'Maestro' using voice 'JBFqnCBsd6RMkjVDRZzb'...
Synthesizing line 11 for 'Sophie' using voice 'pVsdIxxCbRrbiwOhiRwg'...
Synthesizing line 12 for 'Maestro' using voice 'JBFqnCBsd6RMkjVDRZzb'...
Line 13 is not in expected format: (After paying)
Synthesizing line 14 for 'Sophie' using voice 'pVsdIxxCbRrbiwOhiRwg'...
Synthesizing l