In [1]:
constraints = "Lass die Geschichte in Bad Schwartau in 2024 spielen. Die Charakter sollen Fabian, Florian und Mandy sein."
language = "German"

In [2]:
import os
import time
import uuid
from typing import List, Type, TypeVar, Literal, TypedDict, Optional, Annotated
import annotated_types
import json
import logging

import replicate
import requests
from dotenv import load_dotenv
from elevenlabs.client import ElevenLabs
from lumaai import LumaAI
from openai import NOT_GIVEN, OpenAI
from pydantic import BaseModel
import boto3
from dataclasses import dataclass, field

load_dotenv()

True

In [3]:
logger = logging.getLogger("file_logger")
file_handler = logging.FileHandler("generate.log")
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
file_handler.setFormatter(formatter)
logger.handlers.clear()
logger.addHandler(file_handler)
logger.setLevel(logging.INFO)

In [4]:
def get_temp_filename(extension: str) -> str:
    return f"./content/{uuid.uuid4()}.{extension}"


openai = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

elevenlabs = ElevenLabs(
    api_key=os.environ.get("ELEVENLABS_API_KEY"),
)

luma = LumaAI(
    auth_token=os.environ.get("LUMAAI_API_KEY"),
)


def generate_text(system_prompt: str, user_prompt: str, json_mode: bool = False) -> str:
    logger.info(f"Generating text with system prompt: {system_prompt} and user prompt: {user_prompt}")
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        response_format={"type": "json_object"} if json_mode else NOT_GIVEN,
    )
    if not response.choices[0].message.content:
        raise Exception("No response from OpenAI")
    logger.info(f"Response from OpenAI: {response.choices[0].message.content}")
    return response.choices[0].message.content


T = TypeVar("T", bound=BaseModel)


def generate_json(system_prompt: str, user_prompt: str, model: Type[T]) -> T:
    data = generate_text(system_prompt, user_prompt, json_mode=True)
    return model.model_validate_json(data)

def upload_to_r2(file_path: str) -> str:
    session = boto3.session.Session()
    endpoint_url = f"https://{os.environ.get("R2_ACCOUNT_ID")}.r2.cloudflarestorage.com"

    s3_client = session.client(
        "s3",
        region_name="auto",
        endpoint_url=endpoint_url,
        aws_access_key_id=os.environ.get("R2_ACCESS_KEY_ID"),
        aws_secret_access_key=os.environ.get("R2_SECRET_ACCESS_KEY"),
    )

    filename = os.path.basename(file_path)

    s3_client.upload_file(file_path, "test", filename, ExtraArgs={"ACL": "public-read"})

    return f"https://pub-{os.environ.get('R2_BUCKET_PUB_ID')}.r2.dev/{filename}"

def generate_image(prompt: str, character_reference_url: str = None) -> str:
    logger.info(f"Generating image with prompt: {prompt} and character reference URL: {character_reference_url}")

    input = {"prompt": prompt, "aspect_ratio": "16:9"}

    if character_reference_url:
        input["character_reference_url"] = character_reference_url

    output = replicate.run(
        "luma/photon",
        input=input,
    )

    filename = get_temp_filename("jpg")
    with open(filename, "wb") as file:
        file.write(output.read())

    logger.info(f"Generated image: {filename}")

    return filename


def generate_video(prompt: str, extend_n_times: int = 0) -> str:
    logger.info(f"Generating video with prompt: {prompt}")

    generation = luma.generations.create(
        prompt=prompt,
    )

    completed = False
    while not completed:
        if not generation.id:
            raise RuntimeError("Generation ID is None")
        generation = luma.generations.get(id=generation.id)
        if generation.state == "completed":
            completed = True
        elif generation.state == "failed":
            raise RuntimeError(f"Generation failed: {generation.failure_reason}")
        print("Generating video..")
        time.sleep(3)

    for _ in range(extend_n_times):
        extend_generation = luma.generations.create(
            prompt=prompt,
            keyframes={
                "frame0": {
                    "type": "generation",
                    "id": generation.id
                }
            }
        )
        completed = False
        while not completed:
            if not extend_generation.id:
                raise RuntimeError("Generation ID is None")
            extend_generation = luma.generations.get(id=extend_generation.id)
            if extend_generation.state == "completed":
                completed = True
            elif extend_generation.state == "failed":
                raise RuntimeError(f"Generation failed: {extend_generation.failure_reason}")
            print("Extending video..")
            time.sleep(3)
        generation = extend_generation

    if not generation.assets:
        raise RuntimeError("Generation has no video")

    video_url = generation.assets.video

    if not video_url:
        raise RuntimeError("Generation has no video URL")

    filename = get_temp_filename("mp4")
    response = requests.get(video_url, stream=True)
    with open(filename, "wb") as file:
        file.write(response.content)

    logger.info(f"Generated video: {filename}")

    return filename


def generate_audio(text: str, voice: str = "Brian") -> str:
    logger.info(f"Generating audio with text: {text} and voice: {voice}")

    audio = elevenlabs.generate(
        text=text,
        voice=voice,
        model="eleven_multilingual_v2",
    )
    filename = get_temp_filename("mp3")
    with open(filename, "wb") as file:
        for chunk in audio:
            file.write(chunk)

    logger.info(f"Generated audio: {filename}")
    
    return filename


In [5]:
idea = generate_text(
    system_prompt="If you are a screenwriter for the TV show Beyond All Belief, or in German X-Faktor das Unfassbare, generate an idea for a short story for that particular TV show. Don't write acts, scenes or anything else. Just a quick idea. Include a twist.",
    user_prompt=f"Return the idea as short text in {language}. The story should take place in {language}. {constraints}",
)

print(idea)

In dem kleinen Städtchen Bad Schwartau im Jahr 2024 leben die besten Freunde Fabian und Florian, die beide von dem mysteriösen Verschwinden von Mandy, einer Mitschülerin, gehört haben. Die Legende besagt, dass sie vor Jahren in den Wäldern um Bad Schwartau spurlos verschwunden ist, während sie nach einer sagenumwobenen Waldlichtung suchte, die angeblich einen uralten Wunschbaum beherbergte.

Von Abenteuerlust gepackt, machen sich Fabian und Florian auf die Suche nach der Lichtung und dem Baum, überzeugt davon, dass Mandy eines ihrer wenigen verbliebenen Lebenszeichen hinterlassen hat. Im Wald stoßen sie auf rätselhafte Spuren und fühlen sich von unsichtbaren Augen beobachtet. Als die Nacht hereinbricht, erblicken sie im fahlen Mondschein plötzlich die Lichtung mit dem majestätischen Wunschbaum in der Mitte.

Neugierig nähern sie sich, als sie plötzlich eine vertraute Stimme hören – es ist Mandy, unversehrt und lächelnd, umgeben von einer mystischen Aura. Sie erklärt, dass der Baum ihr 

In [6]:
class Character(BaseModel):
    id: str
    name: str
    description: str
    appearance: str
    voice: Literal["Aria", "Roger", "Sarah", "Laura", "Charlie", "George", "Callum", "River", "Liam", "Charlotte", "Alice", "Matilda", "Will", "Jessica", "Eric", "Chris", "Brian", "Daniel", "Lily", "Bill"]

class Characters(BaseModel):
    characters: List[Character]


characters = generate_json(
    system_prompt=f"""Generate the characters for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare.
Return the characters as a JSON:
{{
    "characters": [
        {{
            "id": a unique identifier for the character as a string, e.g the name in lower case (must be unique)
            "name": the name of the character
            "description": a short description of the character
            "appearance": a short description of the character's appearance in style of prompt for an image generator in English, add country of origin, age, gender, hair color, eye color, ethnicity, etc. Also describe the clothing in detail,
            "voice": chose of voice for the character, you find a list below. Any voice can only be used once.
        }}
    ]
}}
in {language}.
Here the list of voices: (they can all speak in {language}):
1.	Aria: Expressive and middle-aged American female voice, ideal for social media content.
2.	Roger: Confident middle-aged American male voice, suited for social media applications.
3.	Sarah: Soft, young American female voice, perfect for news delivery.
4.	Laura: Upbeat young American female voice, commonly used in social media settings.
5.	Charlie: Natural middle-aged Australian male voice, excellent for conversational purposes.
6.	George: Warm middle-aged British male voice, great for narration.
7.	Callum: Intense middle-aged Transatlantic male voice, tailored for character-based uses.
8.	River: Confident middle-aged American non-binary voice, designed for social media.
9.	Liam: Articulate young American male voice, often used for narration.
10.	Charlotte: Seductive young Swedish female voice, crafted for character work.
11.	Alice: Confident middle-aged British female voice, suitable for news contexts.
12.	Matilda: Friendly middle-aged American female voice, fitting for narration.
13.	Will: Friendly young American male voice, focused on social media.
14.	Jessica: Expressive young American female voice, ideal for conversational projects.
15.	Eric: Friendly middle-aged American male voice, used for conversational settings.
16.	Chris: Casual middle-aged American male voice, great for conversational uses.
17.	Brian: Deep middle-aged American male voice, primarily used for narration.
18.	Daniel: Authoritative middle-aged British male voice, perfect for news delivery.
19.	Lily: Warm middle-aged British female voice, crafted for narration.
20.	Bill: Trustworthy older American male voice, tailored for narration.""",
    user_prompt=f"The story should take place in {language}. Use common names in that language and region. The story is about a {idea}.",
    model=Characters,
).characters

character_descriptions = ""
for character in characters:
    character_descriptions += (
        f"ID: {character.id}, Name: {character.name}, Description: {character.description}\n"
    )

print(character_descriptions)

ID: fabian, Name: Fabian, Description: Ein abenteuerlustiger und neugieriger Jugendlicher, der sich von Geschichten über das Übernatürliche angezogen fühlt. Trotz seiner Furchtlosigkeit, hat er einen scharfen Verstand und einen starken Freundschaftssinn.
ID: florian, Name: Florian, Description: Fabians bester Freund, zurückhaltender und praktisch veranlagt, aber stets bereit, Fabian in seine Abenteuer zu folgen. Er hat ein gutes Gespür für Gefahren und bringt often Vernunft in die Unternehmungen der beiden.
ID: mandy, Name: Mandy, Description: Ein einst vermisstes, mysteriöses Mädchen, das für immer jung und glücklich in der Nähe des Wunschbaums geblieben ist. Sie strahlt Ruhe und eine geheimnisvolle Aura aus.



In [7]:
class FileMapping(TypedDict):
    filename: str
    url: str

character_images: dict[str, FileMapping] = {}
for n, character in enumerate(characters):
    print(f"Generating character image for {character.name} ({n + 1} of {len(characters)})")
    file_path = generate_image(f"A neutral portrait of {character.appearance}")
    character_images[character.id] = FileMapping(
        filename=file_path,
        url=upload_to_r2(file_path),
    )

print(character_images)

Generating character image for Fabian (1 of 3)
Generating character image for Florian (2 of 3)
Generating character image for Mandy (3 of 3)
{'fabian': {'filename': './content/f73a1f16-3b76-4efe-9dcc-2c5f61f51079.jpg', 'url': 'https://pub-7fdd542f3dda44bcaef4c36bf4bb48e1.r2.dev/f73a1f16-3b76-4efe-9dcc-2c5f61f51079.jpg'}, 'florian': {'filename': './content/85ccba81-caec-4108-9dff-725312257a29.jpg', 'url': 'https://pub-7fdd542f3dda44bcaef4c36bf4bb48e1.r2.dev/85ccba81-caec-4108-9dff-725312257a29.jpg'}, 'mandy': {'filename': './content/2452971f-e97e-4381-bb3e-0b9f40a09cc8.jpg', 'url': 'https://pub-7fdd542f3dda44bcaef4c36bf4bb48e1.r2.dev/2452971f-e97e-4381-bb3e-0b9f40a09cc8.jpg'}}


In [8]:
class Act(BaseModel):
    description: str


class Acts(BaseModel):
    acts: List[Act]


acts = generate_json(
    system_prompt=f"""Generate the acts for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare. Make it a 3 act play. Return the acts as a JSON:
{{
    "acts": [
        {{
            "description": a short description of the act, the act need to be short around 2-3 minutes of screen time. Describe exactly what in the scene happens and how the scene ends. Don't describe the characters.
        }}
    ]
}}
in {language}.""",
    user_prompt=f"The story should take place in {language}. Use common names in that language and region. The story is about a {idea}. The only characters are {character_descriptions}.",
    model=Acts,
).acts

for n, act in enumerate(acts):
    print(f"Act {n + 1}:")
    print(act.description)


Act 1:
Fabian und Florian befinden sich im dichten Wald von Bad Schwartau, der von geheimnisvollen Geschichten durchzogen ist. Ausgestattet mit Taschenlampen und einer alten Karte, die angeblich zu der sagenumwobenen Lichtung führt, folgen sie einem schmalen Pfad. Die Bäume werfen gespenstische Schatten und das Rascheln der Blätter verstärkt ihre Anspannung. Schließlich erreichen sie eine Lichtung, auf deren Mitte ein imposanter Baum steht, umhüllt von mystischem Leuchten. Fasziniert und zugleich nervös nähern die beiden Freunde sich dem Baum, als plötzlich eine Stimme die Stille durchbricht...
Act 2:
Mandy tritt ins Mondlicht und begrüßt Fabian und Florian. Ihre Erscheinung ist unerwartet ruhig und sie erzählt ihnen von ihrem Wunsch, für immer an diesem glücklichen Ort zu bleiben, der ihr vom Baum erfüllt wurde. Fabian und Florian lauschen gebannt, als Mandy ihnen versichert, dass sie glücklich und unvergessen sei. Doch während sie spricht, beginnen sich die Ränder ihrer Gestalt aufzu

In [9]:
class Scene(BaseModel):
    type: Literal["narration", "dialogue"]
    characters: Annotated[list[str], annotated_types.Len(min_length=1)]
    description: str

class Scenes(BaseModel):
    scenes: List[Scene]

act_scenes: List[List[Scene]] = []

all_acts = ""
for act_n, act in enumerate(acts):
    all_acts += f"Act {act_n + 1}: {act.description}\n"


for act_n, act in enumerate(acts):
    print(f"Generating scenes for act {act_n + 1} of {len(acts)}")
    scenes = generate_json(
        system_prompt=f"""Generate the scenes for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare. Make it 3 scenes. Make it a mix of narration and dialogue. Return the scenes as a JSON:
{{
    "scenes": [
        {{
            "type": "narration", // A narration is kind of a scenic view or an establishing shot with a video in the background and an off-text.
            "characters": A list of exactly one character ID that speaks the narration.
            "description": a short description of the scene. It should have around 10 seconds of screen time and can only be in a single location. Describe exactly what in the scene happens and how the scene ends. Don't include characters in the description. Don't write dialogue or off-text. 
        }} | {{
            "type": "dialogue", // A dialogue is a dialogue between two or more people.,
            "characters": a list of the character IDs that are in the scene (at least two).
            "description": a short description of the scene. It should have around 10 seconds of screen time and can only be in a single location. Describe exactly what in the scene happens and how the scene ends. Don't write dialogue or off-text.
        }}
    ]
}}
    in {language}.""",
        user_prompt=f"The story is about a {idea}. The only characters are {character_descriptions}. The acts are {all_acts}. The act you should generate the scenes for is act {act_n + 1} of {len(acts)}, {act.description}. Only generate the scenes for this act and nothing else.",
        model=Scenes,
    ).scenes
    act_scenes.append(scenes)

for n, act in enumerate(act_scenes):
    for m, scene in enumerate(act):
        print(f"Act {n + 1}, Scene {m + 1}:")
        print(scene.type)
        print(scene.description)
        print(scene.characters)


Generating scenes for act 1 of 3
Generating scenes for act 2 of 3
Generating scenes for act 3 of 3
Act 1, Scene 1:
narration
Die Kamera zeigt Fabian und Florian, wie sie mit Taschenlampen bewaffnet durch den dichten Wald von Bad Schwartau laufen. Die Umgebung wirkt geheimnisvoll, fast gespenstisch, und die Schatten der Bäume tanzen im schwachen Licht der Lampen. Blätter rascheln sanft im Wind und die Spannung der beiden Freunde ist spürbar. Die Szene endet, als sie an einem schmalen Pfad stehen bleiben, der in das Herz des Waldes führt.
['fabian']
Act 1, Scene 2:
dialogue
Fabian und Florian stehen zusammen auf dem schmalen Pfad, die alten Karte in den Händen, während sie sich leise über die nächsten Schritte austauschen. Die Nervosität steht ihnen ins Gesicht geschrieben, doch die Neugier treibt sie weiter. Schließlich nicken sie entschlossen und bewegen sich weiter, stets darauf bedacht, den Zeichen auf der Karte zu folgen, um die sagenumwobene Lichtung zu finden.
['fabian', 'florian'

In [10]:
episode_title = generate_text(
    system_prompt=f"Generate a title for a short story of the series Beyond Belief or X-Faktor das Unfassbare. Return the title in {language}. It must be short. Return it without any quotes.",
    user_prompt=f"The story is about a {idea}.",
)

print(episode_title)

"Der Preis der Wunschlichtung"


In [14]:
@dataclass
class Audio:
    filename: str


@dataclass
class Content:
    filename: str
    audio: Optional[Audio] = None


@dataclass
class Scene:
    type: str  # "video" or "image"
    content: Content


@dataclass
class Appearance:
    start: float  # seconds
    end: float  # seconds


@dataclass
class TextOverlay:
    text: str
    appearance: Appearance


@dataclass
class SoundEffect:
    filename: str
    start: float  # seconds


@dataclass
class Episode:
    backgroundMusic: Audio
    scenes: List[Scene]
    textOverlays: List[TextOverlay] = field(default_factory=list)
    soundEffects: List[SoundEffect] = field(default_factory=list)

    def to_json(self) -> str:
        return json.dumps(self, default=lambda o: o.__dict__, indent=4)

class NarrationResult(BaseModel):
    video_description: str
    monologue: str

class DialogueLine(BaseModel):
    character: str
    text: str

class DialogueResult(BaseModel):
    scene_background_description: str
    lines: List[DialogueLine]

episode = Episode(
    backgroundMusic=Audio(filename="background.mp3"),
    scenes=[],
    textOverlays=[
        TextOverlay(text=episode_title, appearance=Appearance(start=3, end=8)),
        TextOverlay(text="Fabian Neundorf", appearance=Appearance(start=13, end=18)),
        TextOverlay(text="Florian Kaiser", appearance=Appearance(start=23, end=28)),
    ]
)

entire_story = f"""{idea}

Characters:
"""
{character_descriptions}

for act_n, act in enumerate(acts):
    entire_story += f"Act {act_n + 1}: {act.description}\n"
    for scene_n, scene in enumerate(act_scenes[act_n]):
        entire_story += f"Scene {scene_n + 1}: {scene.description}\n"

for act_n, act in enumerate(act_scenes):
    for scene_n, scene in enumerate(act):
        if scene.type == "narration":
            print(f"Generating narration for scene {scene_n + 1}, act {act_n + 1}")
            narration_result = generate_json(
                system_prompt=f"""Generate a narration for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare. Return the narration as a JSON:
{{
    "video_description": a prompt for a video generator describing the scene. Must be in English. Must be a scene without characters or people in it.,
    "monologue": a short monologue of the narrator. It should be short 10-20 seconds. It's narrated by the character,
}}
in {language}.""",
                user_prompt=f"The story: {entire_story}. The only characters are {character_descriptions}. The act you is act number {act_n + 1} of {len(act_scenes)}, {acts[act_n].description}. The scene you are generating the narration for is scene number {scene_n + 1} of {len(act)}, {scene.description}. Only write the narration for this scene and nothing else. {'Since this is the first scene, the narrator should make a short introduction.' if act_n == 0 and scene_n == 0 else ''}",
                model=NarrationResult,
            )
            video = generate_video(f"A hyper-realistic video of {narration_result.video_description}", extend_n_times=1)
            print("Generating audio ..")
            audio = generate_audio(
                narration_result.monologue,
                next((item for item in characters if item.id == scene.characters[0]), None).voice,
            )
            episode.scenes.append(Scene(type="video", content=Content(filename=video, audio=Audio(filename=audio))))
        elif scene.type == "dialogue":
            print(f"Generating dialogue for scene {scene_n + 1}, act {act_n + 1}")
            dialogue_result = generate_json(
                system_prompt=f"""Generate a dialogue for a short story in style of the TV show Beyond Belief or X-Faktor das Unfassbare. Return the dialogue as a JSON:
{{
    "scene_background_description": a short description of the scene for a photo generator. Only describe the scene and background, don't describe the characters. Must be in English..",
    "lines": {{
        "character": the character ID, cannot be empty or null, can only be one of the character IDs in the scene,
        "text": the text of the dialogue line,
    }}
}}
in {language}.""",
                user_prompt=f"The story: {entire_story}. The only characters are {character_descriptions}. The act you generate the dialogue for is act number {act_n + 1} of {len(act_scenes)}, {acts[act_n].description}. The scene you are generating the dialogue for is scene number {scene_n + 1} of {len(act)}, {scene.description}. The characters in this scene are {', '.join(character for character in scene.characters)}. Only write the dialogue for this scene and nothing else. If a character make the first appearance in this scene, let them make a short introduction. Make the introduction natural, built it into the dialogue.",
                model=DialogueResult,
            )
            character_images_for_scene = {}
            for n, character in enumerate(scene.characters):
                print(f"Generating character image for {character} ({n + 1} of {len(scene.characters)})")
                character_images_for_scene[character] = generate_image(f"A portrait of the character in {dialogue_result.scene_background_description}", character_images[character]["url"])
            for n, line in enumerate(dialogue_result.lines):
                print(f"Generating audio for {line.character} ({n + 1} of {len(dialogue_result.lines)})")
                audio = generate_audio(
                    line.text,
                    next((item for item in characters if item.id == line.character), None).voice,
                )
                episode.scenes.append(
                    Scene(
                        type="image",
                        content=Content(
                            filename=character_images_for_scene[line.character],
                            audio=Audio(filename=audio),
                        ),
                    )
                )

episode.scenes.append(Scene(type="video", content=Content(filename="false.mp4", audio=Audio(filename="false.mp3"))))

with open("episode.json", "w") as file:
    file.write(episode.to_json())


Generating narration for scene 1, act 1
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Generating video..
Extending video..
Extending video..
Extending video..
Extending video..
Extending video..
Extending video..
Extending video..
Extending video..
Extending video..
Extending video..
Extending video..
Generating audio ..
Generating dialogue for scene 2, act 1
Generating character image for fabian (1 of 2)
Generating character image for florian (2 of 2)
Generating audio for fabian (1 of 8)
Generating audio for florian (2 of 8)
Generating audio for fabian (3 of 8)
Generating audio for florian (4 of 8)
Generating audio for fabian (5 of 8)
Generating audio for florian (6 of 8)
Generating audio for fabian (7 of 8)
Generating audio for florian (8 of 8)
Generating narration for scene 3, act 1
Generating video..
Generating video..
Ge

AttributeError: 'NoneType' object has no attribute 'voice'