In [12]:
constraints = "Write a story in 2024"
language = "German"

In [13]:
import os
import time
import uuid
from typing import List, Type, TypeVar, Literal, TypedDict, Optional, Annotated
import annotated_types
import json
import logging
import random
from concurrent.futures import ThreadPoolExecutor

import replicate
import requests
from dotenv import load_dotenv
from elevenlabs.client import ElevenLabs
from lumaai import LumaAI
from openai import NOT_GIVEN, OpenAI
from pydantic import BaseModel
import boto3
from dataclasses import dataclass, field

load_dotenv()

True

In [14]:
logger = logging.getLogger("file_logger")
file_handler = logging.FileHandler("generate.log")
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
file_handler.setFormatter(formatter)
logger.handlers.clear()
logger.addHandler(file_handler)
logger.setLevel(logging.INFO)

In [15]:
def get_temp_filename(extension: str) -> str:
    return f"./content/{uuid.uuid4()}.{extension}"


openai = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

elevenlabs = ElevenLabs(
    api_key=os.environ.get("ELEVENLABS_API_KEY"),
)

luma = LumaAI(
    auth_token=os.environ.get("LUMAAI_API_KEY"),
)


def generate_text(system_prompt: str, user_prompt: str, json_mode: bool = False, temperature: float = 1) -> str:
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        response_format={"type": "json_object"} if json_mode else NOT_GIVEN,
        temperature=temperature,
    )
    if not response.choices[0].message.content:
        raise Exception("No response from OpenAI")
    logger.info(f"Generated text with system prompt '{system_prompt}' and user prompt '{user_prompt}' with temperature {temperature}: {response.choices[0].message.content}")
    return response.choices[0].message.content


T = TypeVar("T", bound=BaseModel)


def generate_json(system_prompt: str, user_prompt: str, model: Type[T], temperature: float = 1) -> T:
    data = generate_text(system_prompt, user_prompt, json_mode=True, temperature=temperature)
    return model.model_validate_json(data)

def upload_to_r2(file_path: str) -> str:
    session = boto3.session.Session()
    endpoint_url = f"https://{os.environ.get("R2_ACCOUNT_ID")}.r2.cloudflarestorage.com"

    s3_client = session.client(
        "s3",
        region_name="auto",
        endpoint_url=endpoint_url,
        aws_access_key_id=os.environ.get("R2_ACCESS_KEY_ID"),
        aws_secret_access_key=os.environ.get("R2_SECRET_ACCESS_KEY"),
    )

    filename = os.path.basename(file_path)

    s3_client.upload_file(file_path, "test", filename, ExtraArgs={"ACL": "public-read"})

    return f"https://pub-{os.environ.get('R2_BUCKET_PUB_ID')}.r2.dev/{filename}"

def generate_image(prompt: str, character_reference_url: str = None) -> str:
    input = {"prompt": prompt, "aspect_ratio": "16:9"}

    if character_reference_url:
        input["character_reference_url"] = character_reference_url

    output = replicate.run(
        "luma/photon",
        input=input,
    )

    filename = get_temp_filename("jpg")
    with open(filename, "wb") as file:
        file.write(output.read())

    logger.info(
        f"Generated image with prompt '{prompt}' and character reference URL: {character_reference_url}: {filename}"
    )

    return filename


def generate_video(prompt: str, extend_n_times: int = 0) -> str:
    generation = luma.generations.create(
        prompt=prompt,
    )

    completed = False
    while not completed:
        if not generation.id:
            raise RuntimeError("Generation ID is None")
        generation = luma.generations.get(id=generation.id)
        if generation.state == "completed":
            completed = True
        elif generation.state == "failed":
            raise RuntimeError(f"Generation failed: {generation.failure_reason}")
        print("Generating video..")
        time.sleep(3)

    for _ in range(extend_n_times):
        extend_generation = luma.generations.create(
            prompt=prompt,
            keyframes={
                "frame0": {
                    "type": "generation",
                    "id": generation.id
                }
            }
        )
        completed = False
        while not completed:
            if not extend_generation.id:
                raise RuntimeError("Generation ID is None")
            extend_generation = luma.generations.get(id=extend_generation.id)
            if extend_generation.state == "completed":
                completed = True
            elif extend_generation.state == "failed":
                raise RuntimeError(f"Generation failed: {extend_generation.failure_reason}")
            print("Extending video..")
            time.sleep(3)
        generation = extend_generation

    if not generation.assets:
        raise RuntimeError("Generation has no video")

    video_url = generation.assets.video

    if not video_url:
        raise RuntimeError("Generation has no video URL")

    filename = get_temp_filename("mp4")
    response = requests.get(video_url, stream=True)
    with open(filename, "wb") as file:
        file.write(response.content)

    logger.info(f"Generated video for prompt '{prompt}': {filename}")

    return filename


def generate_audio(text: str, voice: str = "Brian") -> str:
    audio = elevenlabs.generate(
        text=text,
        voice=voice,
        model="eleven_multilingual_v2",
    )
    filename = get_temp_filename("mp3")
    with open(filename, "wb") as file:
        for chunk in audio:
            file.write(chunk)

    logger.info(f"Generated audio for text '{text}' and voice '{voice}': {filename}")
    
    return filename


In [16]:
x_faktor_prompt = """Stories in Beyond Belief: Fact or Fiction are characterized by their blend of mystery and the supernatural, often involving unexplained phenomena, eerie coincidences, or paranormal elements. They feature ordinary people encountering extraordinary circumstances, set in relatable, everyday environments. These narratives frequently explore themes of moral or poetic justice, where actions lead to karmic outcomes, and they often conclude with surprising or ironic twist endings. The tone is atmospheric, using suspenseful pacing, eerie music, and dim lighting to enhance intrigue. Stories delve into human nature, touching on themes like trust, fear, belief, and love, and are presented as short, self-contained episodes that vary in genre, from horror to emotional drama to lighthearted mystery. The stories have a fulfilling ending, but the mysteries normally remain mysterious and only partial explanations are given to leaving the audience to draw their own conclusions.

In a typical story the main character or characters (ordinary people, living normal lives) encounter an extraordinary situation."""

idea = generate_text(
    system_prompt=f"""You are a writer for the TV show Beyond Belief: Fact or Fiction, or in German X-Faktor das Unfassbare.

{x_faktor_prompt}

Here are some typical plots:
	1.	“The Apparition” - A young couple sees a ghostly figure warning them about an unsafe bridge, saving their lives.
	2.	“The Motorcycle” - A man dreams of a biker who warns him about an accident, only to find the same biker saved him in real life.
	3.	“The Portrait” - An artist paints a woman’s portrait and later discovers she died on the same night he painted her.
	4.	“The Chess Game” - A man playing chess with a spirit wins, preventing his own death.
	5.	“The Hitchhiker” - A woman picks up a mysterious hitchhiker who vanishes but later finds he saved her from an accident.
(don't copy them, just use them as inspiration)

Generate an idea for a short story for that particular TV show.

The story need to follow the following structure:
1. The main character should be introduced and some background information should be given.
2. The main character should encounter an extraordinary situation.
3. The main character should deal with the consequences of the extraordinary situation.

The story needs to be short, it only has around 2 minutes of screen time.

If not otherwise specified, the main character should be a {random.choice(["male", "female"])} in the age of {random.randint(5, 100)}.

Don't write acts, scenes or anything else. Just return the story idea.""",
    user_prompt=f"Return the idea as short text in {language}. {constraints}",
    temperature=1.2,
)

print(idea)

Titel: "Das musikalische Echo"

1. Einführung: Anna, eine passionierte Cellistin mit einer tiefen Liebe zur klassischen Musik, lebt zurückgezogen in einer kleinen Stadt in Deutschland. Seit dem Verlust ihres Vaters, der ebenfalls Musiker war, hat sie sich in ihre Leidenschaft für Musik geflüchtet. Ihr Cello, ein Erbstück ihres Vaters, ist ihr unbezahlbarer Schatz.

2. Begegnung mit dem Außergewöhnlichen: Eines Abends, nachdem sie eine besonders emotionale Melodie geprobt hat, hört Anna ein leises Echo, das wie eine Gegenstimme zu ihrem Spiel klingt. Zunächst denkt sie, es sei ein Nachklang in ihrem Kopf oder der Wind, der durch das alte Fenster weht, doch dann hört sie die vertraute Melodie klar und präzise gespielt. Die geheimnisvolle Musik scheint aus dem Inneren ihres Cellos zu kommen, ohne dass ihre Finger die Saiten berühren.

3. Die Konsequenzen: Beeindruckt und beunruhigt, folgt sie der Quelle der mystischen Klänge, die sie überallhin begleiten. Sie beginnt, aufgerechnet zusamme

In [17]:
class Character(BaseModel):
    id: str
    name: str
    description: str
    appearance: str
    voice: Literal["Aria", "Roger", "Sarah", "Laura", "Charlie", "George", "Callum", "River", "Liam", "Charlotte", "Alice", "Matilda", "Will", "Jessica", "Eric", "Chris", "Brian", "Daniel", "Lily", "Bill"]

class Characters(BaseModel):
    characters: List[Character]


characters = generate_json(
    system_prompt=f"""Generate the characters for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare.

{x_faktor_prompt}

All characters need to be people. If there are something more abstract, make it a person.

Return the characters as a JSON:
{{
    "characters": [
        {{
            "id": a unique identifier for the character as a string, e.g the name in lower case (must be unique)
            "name": the name of the character
            "description": a short description of the character
            "appearance": a short description of the character's appearance in style of prompt for an image generator, add country of origin, age, gender, hair color, eye color, ethnicity, etc. Also describe the clothing in detail Must be in English, start with A man/women/boy/girl..,
            "voice": chose of voice for the character, you find a list below. Any voice can only be used once.
        }}
    ]
}}
in {language} (except for the voice and appearance).
Here the list of voices: (they can all speak in {language}):
1.	Aria: Expressive and middle-aged American female voice, ideal for social media content.
2.	Roger: Confident middle-aged American male voice, suited for social media applications.
3.	Sarah: Soft, young American female voice, perfect for news delivery.
4.	Laura: Upbeat young American female voice, commonly used in social media settings.
5.	Charlie: Natural middle-aged Australian male voice, excellent for conversational purposes.
6.	George: Warm middle-aged British male voice, great for narration.
7.	Callum: Intense middle-aged Transatlantic male voice, tailored for character-based uses.
8.	River: Confident middle-aged American non-binary voice, designed for social media.
9.	Liam: Articulate young American male voice, often used for narration.
10.	Charlotte: Seductive young Swedish female voice, crafted for character work.
11.	Alice: Confident middle-aged British female voice, suitable for news contexts.
12.	Matilda: Friendly middle-aged American female voice, fitting for narration.
13.	Will: Friendly young American male voice, focused on social media.
14.	Jessica: Expressive young American female voice, ideal for conversational projects.
15.	Eric: Friendly middle-aged American male voice, used for conversational settings.
16.	Chris: Casual middle-aged American male voice, great for conversational uses.
17.	Brian: Deep middle-aged American male voice, primarily used for narration.
18.	Daniel: Authoritative middle-aged British male voice, perfect for news delivery.
19.	Lily: Warm middle-aged British female voice, crafted for narration.
20.	Bill: Trustworthy older American male voice, tailored for narration.""",
    user_prompt=f"The story is about a {idea}. Use common names in that language and region.",
    model=Characters,
    temperature=0.5,
).characters

character_descriptions = ""
for character in characters:
    character_descriptions += (
        f"ID: {character.id}\nName: {character.name}\nDescription: {character.description}\n\n"
    )

print(character_descriptions)

ID: anna
Name: Anna
Description: Anna ist eine leidenschaftliche Cellistin, die nach dem Tod ihres Vaters Trost in der Musik gefunden hat. Sie lebt zurückgezogen und widmet sich ganz ihrem Cello, einem Erbstück ihres Vaters.

ID: vater
Name: Vater (Anna's Father)
Description: Ann's Vater war ein talentierter Musiker, der eine tiefe Bindung zu seiner Tochter hatte. Er vererbte ihr sein geliebtes Cello und seine Liebe zur Musik.




In [18]:
class FileMapping(TypedDict):
    filename: str
    url: str

character_images: dict[str, FileMapping] = {}
for n, character in enumerate(characters):
    print(f"Generating character image for {character.name} ({n + 1} of {len(characters)})")
    file_path = generate_image(f"A portrait of {character.appearance} in front of a white background")
    character_images[character.id] = FileMapping(
        filename=file_path,
        url=upload_to_r2(file_path),
    )

print(character_images)

Generating character image for Anna (1 of 2)
Generating character image for Vater (Anna's Father) (2 of 2)
{'anna': {'filename': './content/f59a78ea-5cb4-4c64-9304-eb5ea459743c.jpg', 'url': 'https://pub-7fdd542f3dda44bcaef4c36bf4bb48e1.r2.dev/f59a78ea-5cb4-4c64-9304-eb5ea459743c.jpg'}, 'vater': {'filename': './content/7b485cb3-d851-4725-96bc-ffcd2360106e.jpg', 'url': 'https://pub-7fdd542f3dda44bcaef4c36bf4bb48e1.r2.dev/7b485cb3-d851-4725-96bc-ffcd2360106e.jpg'}}


In [19]:
class Act(BaseModel):
    description: str


class Acts(BaseModel):
    acts: List[Act]


acts = generate_json(
    system_prompt=f"""Generate the acts for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare.

{x_faktor_prompt}
    
Make it a 3 act play. Make the story suspenseful and gripping.

The first act should establish the main character and set the scene. It should give some background information about the character and the setting.
In the second act the main character should encounter the extraordinary situation.
In the third act the main character should deal with the consequences of the extraordinary situation.

Return the acts as a JSON:
{{
    "acts": [
        {{
            "description": a short description of the act, the act need to be short around 1 minute of screen time. Describe exactly what happens in the act and how it ends. Don't describe the characters.
        }}
    ]
}}
in {language}.""",
    user_prompt=f"The story is about a {idea}. The only characters are {character_descriptions}.",
    model=Acts,
).acts

for n, act in enumerate(acts):
    print(f"Act {n + 1}:")
    print(act.description)


Act 1:
Anna, eine leidenschaftliche Cellistin, lebt in einer kleinen deutschen Stadt. Seit dem Tod ihres Vaters, einem talentierten Musiker, hat sie sich zunehmend in ihre Musik vertieft. Ihr Erbstück, das Cello ihres Vaters, ist ihr wertvollster Besitz. Anna verbringt Stunden damit, emotionale Melodien zu spielen, während sie in Gedanken bei ihrem Vater ist. Die Dunkelheit ihres gemütlichen Wohnzimmers bietet die perfekte Kulisse für ihr Seelenspiel auf dem Cello, das ihr Trost und Nähe zu ihrem verstorbenen Vater gibt.
Act 2:
Eines Abends, nachdem Anna intensiv gespielt hat, hört sie ein seltsames Echo. Erst glaubt sie an einen Nachklang in ihrem Kopf oder den Wind, der durchs Fenster weht. Doch die Melodie, klar und unverkennbar, ertönt wie eine Gegenstimme zu ihrem eigenen Spiel. Erschrocken und fasziniert bemerkt sie, dass die mysteriöse Musik aus ihrem Cello zu kommen scheint, ohne dass sie die Saiten berührt. Die vertrauten Töne spiegeln Melodien wider, die sie einst mit ihrem V

In [20]:
class Scene(BaseModel):
    type: Literal["narration", "dialogue"]
    characters: Annotated[list[str], annotated_types.Len(min_length=1)]
    description: str

class Scenes(BaseModel):
    scenes: List[Scene]

act_scenes: List[List[Scene]] = []

all_acts = ""
for act_n, act in enumerate(acts):
    all_acts += f"Act {act_n + 1}: {act.description}\n"


for act_n, act in enumerate(acts):
    print(f"Generating scenes for act {act_n + 1} of {len(acts)}")
    scenes = generate_json(
        system_prompt=f"""Generate the scenes for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare.

{x_faktor_prompt}

Write 3 scenes for an act. Make it a mix of narration and dialogue. Return the scenes as a JSON:
{{
    "scenes": [
        {{
            "type": "narration", // A narration is kind of a scenic view or an establishing shot with a video in the background and an off-text read by the main character.
            "characters": A list of exactly one character ID that speaks the narration.
            "description": a short description of the scene. It should have around 10 seconds of screen time and can only be in a single location. Describe exactly what in the scene happens and how the scene ends. Don't include characters in the visual description of the scene. Don't write dialogue or off-text. 
        }} | {{
            "type": "dialogue", // A dialogue is a dialogue between two or more people.,
            "characters": a list of the character IDs that are in the scene (at least two).
            "description": a short description of the scene. It should have around 10 seconds of screen time and can only be in a single location. Describe exactly what in the scene happens and how the scene ends. Don't write dialogue or off-text.
        }}
    ]
}}
    in {language}.""",
        user_prompt=f"The story is about a {idea}. The only characters are {character_descriptions}. The acts are {all_acts}. The act you should generate the scenes for is act {act_n + 1} of {len(acts)}, {act.description}. Only generate the scenes for this act and nothing else.",
        model=Scenes,
    ).scenes
    act_scenes.append(scenes)

for n, act in enumerate(act_scenes):
    for m, scene in enumerate(act):
        print(f"Act {n + 1}, Scene {m + 1}:")
        print(scene.type)
        print(scene.description)
        print(scene.characters)


Generating scenes for act 1 of 3
Generating scenes for act 2 of 3
Generating scenes for act 3 of 3
Act 1, Scene 1:
narration
Das gemütliche Wohnzimmer ist in sanftes Kerzenlicht gehüllt. Die Schatten der Flammen tanzen an den Wänden. Die Fenster sind halb geöffnet, der abendliche Wind flüstert leise herein und trägt den Klang der fernen Natur mit sich. Der Ort strahlt eine ruhige, stille Atmosphäre aus, die von der Abenddämmerung verstärkt wird.
['anna']
Act 1, Scene 2:
dialogue
Anna sitzt auf einem gepolsterten Stuhl, ihre Finger ruhen sanft auf den Saiten ihres Cellos. Vor ihr liegt ein Notenblatt, das leicht im Licht flackert. Sie spricht leise zu sich selbst, als würde sie mit einer unsichtbaren Person im Raum kommunizieren, während sie sich auf das Spiel einer melancholischen Melodie vorbereitet.
['anna']
Act 1, Scene 3:
narration
Anna schließt die Augen, als sie die ersten Töne anstimmt. Der Raum wird von den sanften, tragenden Klängen des Cellos ausgefüllt. Die Musik spiegelt An

In [21]:
episode_title = generate_text(
    system_prompt=f"Generate a title for a short story of the series Beyond Belief or X-Faktor das Unfassbare. Return the title in {language}. It must be short and intriguing. Return it without any quotes.",
    user_prompt=f"The story is about a {idea}.",
    temperature=0.5,
).strip('"').strip("'")

print(episode_title)

Das Musikalische Echo


In [22]:
@dataclass
class Audio:
    filename: str


@dataclass
class Content:
    filename: str
    audio: Optional[Audio] = None


@dataclass
class Scene:
    type: str  # "video" or "image"
    content: Content
    fade_out: bool = False


@dataclass
class Appearance:
    start: float  # seconds
    end: float  # seconds


@dataclass
class TextOverlay:
    text: str
    appearance: Appearance


@dataclass
class SoundEffect:
    filename: str
    start: float  # seconds


@dataclass
class Episode:
    backgroundMusic: Audio
    scenes: List[Scene]
    textOverlays: List[TextOverlay] = field(default_factory=list)
    soundEffects: List[SoundEffect] = field(default_factory=list)

    def to_json(self) -> str:
        return json.dumps(self, default=lambda o: o.__dict__, indent=4)

class NarrationResult(BaseModel):
    video_description: str
    monologue: str

class DialogueLine(BaseModel):
    character: str
    text: str

class DialogueResult(BaseModel):
    scene_background_description: str
    lines: List[DialogueLine]

episode = Episode(
    backgroundMusic=Audio(filename="background.mp3"),
    scenes=[],
    textOverlays=[
        TextOverlay(text=episode_title, appearance=Appearance(start=3, end=8)),
        TextOverlay(text="Fabian Neundorf", appearance=Appearance(start=13, end=18)),
        TextOverlay(text="Florian Kaiser", appearance=Appearance(start=23, end=28)),
    ]
)

entire_story = f"""{idea}

Characters:
{character_descriptions}

# Acts
"""

for act_n, act in enumerate(acts):
    entire_story += f"## Act {act_n + 1}\n{act.description}\n\n"
    for scene_n, scene in enumerate(act_scenes[act_n]):
        entire_story += f"### Scene {scene_n + 1}\n{scene.description}\n"
    entire_story += "\n"

def handle_narration_scene(act_n, scene_n, scene):      
    print(f"Generating narration for scene {scene_n + 1}, act {act_n + 1}")
    narration_result = generate_json(
        system_prompt=f"""Generate a off-test spoken by a character for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare. 
        
{x_faktor_prompt}

Return the text as a JSON:
{{
    "video_description": a prompt for a video generator describing the scene. Must be in English. Must be a scene without characters or people in it.,
    "monologue": a short monologue of the narrator. It should be short 10-20 seconds. It's narrated by the character in 1st person and not by a narrator. It should be in the past tense. In the end it should introduce the next scene,
}}
in {language}.""",
        user_prompt=f"The story: {entire_story}. The only characters are {character_descriptions}. The act you is act number {act_n + 1} of {len(act_scenes)}, {acts[act_n].description}. The scene you are generating the narration for is scene number {scene_n + 1} of {len(act)}, {scene.description}. Only write the text for this scene and nothing else. {'Since this is the first scene, the character should make a short introduction of him/herself.' if act_n == 0 and scene_n == 0 else ''}",
        model=NarrationResult,
        temperature=0.5,
    )
    video = generate_video(
        f"A hyper-realistic video of {narration_result.video_description}",
        extend_n_times=min(narration_result.monologue.count(" ") // 20 - 1, 2),
    )
    print(f"Generating audio for scene {scene_n + 1}, act {act_n + 1}")
    audio = generate_audio(
        narration_result.monologue,
        next((item for item in characters if item.id == scene.characters[0]), None).voice,
    )
    return Scene(
        type="video", content=Content(filename=video, audio=Audio(filename=audio))
    )
def handle_dialogue_scene(act_n, scene_n, scene):
    print(f"Generating dialogue for scene {scene_n + 1}, act {act_n + 1}")
    dialogue_result = generate_json(
        system_prompt=f"""Generate a dialogue for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare.

{x_faktor_prompt}

Return the dialogue as a JSON:
{{
    "scene_background_description": a short description of the background of the scene for a photo generator. Only describe the scene and background, don't describe any characters. Must be in English..",
    "lines": {{
        "character": the character ID, cannot be empty or null, can only be one of the character IDs in the scene. Every line must change the character,
        "text": the text of the dialogue line. This is the exact text the actor will say, it cannot contain any directions,
    }}
}}
in {language}.""",
        user_prompt=f"The story: {entire_story}. The only characters are {character_descriptions}. The act you generate the dialogue for is act number {act_n + 1} of {len(act_scenes)}, {acts[act_n].description}. The scene you are generating the dialogue for is scene number {scene_n + 1} of {len(act)}, {scene.description}. The characters in this scene are {', '.join(character for character in scene.characters)}. Only write the dialogue for this scene and nothing else. If a character make the first appearance in this scene, let them make a short introduction. Make the introduction natural, built it into the dialogue.",
        model=DialogueResult,
        temperature=0.5,
    )
    character_images_for_scene = {}
    for n, character in enumerate(scene.characters):
        print(
            f"Generating character image for {character} ({n + 1} of {len(scene.characters)}) for scene {scene_n + 1}, act {act_n + 1}"
        )
        character_images_for_scene[character] = generate_image(f"A portrait of the character in front of {dialogue_result.scene_background_description}", character_images[character]["url"])
    result = []
    for n, line in enumerate(dialogue_result.lines):
        print(
            f"Generating audio for {line.character} ({n + 1} of {len(dialogue_result.lines)}) for scene {scene_n + 1}, act {act_n + 1}"
        )
        audio = generate_audio(
            line.text,
            next((item for item in characters if item.id == line.character), None).voice,
        )
        result.append(
            Scene(
                type="image",
                content=Content(
                    filename=character_images_for_scene[line.character],
                    audio=Audio(filename=audio),
                ),
            )
        )
    return result

futures = []
with ThreadPoolExecutor(max_workers=5) as executor:
    for act_n, act in enumerate(act_scenes):
        for scene_n, scene in enumerate(act):
            if scene.type == "narration":
                futures.append(
                    executor.submit(handle_narration_scene, act_n, scene_n, scene)
                )
            elif scene.type == "dialogue":
                futures.append(
                    executor.submit(handle_dialogue_scene, act_n, scene_n, scene)
                )

for result in [future.result() for future in futures]:
    if isinstance(result, list):
        for r in result:
            episode.scenes.append(r)
    else:
        episode.scenes.append(result)

episode.scenes[-1].fade_out = True

episode.scenes.append(Scene(type="video", content=Content(filename="false.mp4", audio=Audio(filename="false.mp3"))))

filename = f"episode-{uuid.uuid4()}.json"
with open(filename, "w") as file:
    file.write(episode.to_json())

print(f"Episode saved to {filename}, rendering video...")

os.system(f"poetry run python renderer.py {filename}")

print("Done")

Generating narration for scene 1, act 1
Generating dialogue for scene 2, act 1
Generating narration for scene 3, act 1
Generating narration for scene 1, act 2
Generating dialogue for scene 2, act 2
Generating character image for anna (1 of 1) for scene 2, act 1
Generating video..
Generating video..
Generating video..
Generating character image for anna (1 of 2) for scene 2, act 2
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating character image for vater (2 of 2) for scene 2, act 2
Generating audio for anna (1 of 5) for scene 2, act 1
Generating video..
Generating video..
Generating video..
Generating audio for anna (2 of 5) for scene 2, act 1
Generating video..
Generating video..
Generating video..
Generating audio for anna (3 of 5) for scene 2, act 1
Generating video..
Generating video..
Generating

KeyError: 'vater'