In [21]:
from dotenv import load_dotenv
load_dotenv()
import getpass
import os
import glob
import uuid
import requests
import ffmpeg
import json
import time
from elevenlabs.client import ElevenLabs
from pydub import AudioSegment

In [22]:
#ELEVENLABS
elevenlabs_client = ElevenLabs(
  api_key=os.getenv("ELEVENLABS_API_KEY"),
)

In [35]:
voice_mapping = {
    "ETHAN": "29vD33N1CtxCmqQRPOHJ",  # Young and nervous
    "CHARLES MONTAGUE": "N2lVS1w4EtoT3dr4eOWO",  # Booming and imposing
    "SOPHIA": "21m00Tcm4TlvDq8ikWAM"  # Elegant and slightly teasing
}

In [24]:
def generate_dialogue(dialogue_text: str, voice_id: str) -> str:
    response = elevenlabs_client.text_to_speech.convert(
        voice_id=voice_id,
        optimize_streaming_latency="0",
        output_format="mp3_44100_128",
        text=dialogue_text,
        model_id="eleven_turbo_v2",
    )

    # Save the file into the raw_audio directory
    raw_audio_dir = "raw_audio"
    os.makedirs(raw_audio_dir, exist_ok=True)
    
    file_path = os.path.join(raw_audio_dir, f"{uuid.uuid4()}.mp3")

    with open(file_path, "wb") as f:
        for chunk in response:
            if chunk:
                f.write(chunk)

    return file_path

In [32]:
def process_script(script: str):
    lines = script.strip().split("\n")
    current_speaker = None
    dialogue = []
    audio_files = []

    for line in lines:
        line = line.strip()
        if line.isupper():  # Identify speaker names
            if current_speaker and dialogue:  # Process previous speaker's dialogue
                full_dialogue = " ".join(dialogue)
                if current_speaker in voice_mapping:
                    voice_id = voice_mapping[current_speaker]
                    audio_file = generate_dialogue(full_dialogue, voice_id)
                    audio_files.append(audio_file)
                dialogue = []  # Reset for next speaker
            current_speaker = line
        elif current_speaker and line:
            dialogue.append(line)

    # **Process the last dialogue block** (this fixes the issue)
    if current_speaker and dialogue:
        full_dialogue = " ".join(dialogue)
        if current_speaker in voice_mapping:
            voice_id = voice_mapping[current_speaker]
            audio_file = generate_dialogue(full_dialogue, voice_id)
            audio_files.append(audio_file)

    return audio_files


In [33]:
def combine_audio_files(audio_files, output_file="raw_audio/merged_output.mp3"):
    if not audio_files:
        print("No audio files to combine.")
        return None

    combined = AudioSegment.empty()
    for file in audio_files:
        audio = AudioSegment.from_mp3(file)
        combined += audio

    combined.export(output_file, format="mp3")
    print(f"Final script audio saved at {output_file}")

    # Delete individual files after merging

    for file in audio_files:
        os.remove(file)
    
    return output_file

In [36]:
script_text = """
ETHAN
Well, I’m Ethan—Ethan Montague,
your great-grandson.

CHARLES MONTAGUE
Great-grandson? I don’t have any
great-grandchildren. How old do you
think I am, anyway?

CHARLES MONTAGUE
Sophia, come here! I want you to
meet your great-grandson.

SOPHIA
Charles, how much have you been
drinking?

CHARLES MONTAGUE
Why don’t you show our youngest
family member around?
"""

if __name__ == "__main__":
    audio_files = process_script(script_text)
    combine_audio_files(audio_files)


Final script audio saved at raw_audio/merged_output.mp3


In [None]:
class dialogue_schema(BaseModel):
    """Returns all of the dialogue in a scene to be parsed and choose the voices for the characters in a string format"""

    dialogue_script: str = Field(..., description="The parts of the dialogue from each person in the scene")
    character_male: str = Field(..., description="American, Young, Male")
    character_female: str = Field(..., description="Middle-aged, Female, Elegant, Expressive")

In [None]:
@tool("dialogue_gen_tool",args_schema=dialogue_schema)
def generate_audio(dialogue_script: str, character_male: str, character_female: str):
    