In [1]:
import site
site.addsitedir('Lib/site-packages')
from dotenv import load_dotenv
load_dotenv()
import getpass
import os
import requests
import ffmpeg
import json
import time
import math
import uuid
#import asyncio
from pydantic import BaseModel, Field
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, START, StateGraph, MessagesState
from langgraph.prebuilt import ToolNode
from lumaai import LumaAI
from typing import Literal, TypedDict
from elevenlabs.client import ElevenLabs

In [2]:
#LUMA
client = LumaAI(
    auth_token=os.environ.get("LUMAAI_API_KEY"),
)
#OPENAI
if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")
#ELEVENLABS
elevenlabs_client = ElevenLabs(
  api_key=os.getenv("ELEVENLABS_API_KEY"),
)
from langchain.chat_models import init_chat_model
#LANGCHAIN llms
supervisor_llm = init_chat_model("gpt-4o", model_provider="openai")
video_gen_llm = init_chat_model("gpt-4o", model_provider="openai")
storyboard_llm = init_chat_model("gpt-4o", model_provider="openai")
audio_gen_llm = init_chat_model("gpt-4o", model_provider="openai")
#editor_llm = init_chat_model("gpt-4o-mini", model_provider="openai")

generatingVid = False
fullDialogueString = ""

## TOOLS

### Video Worker

In [3]:
#SCHEMAS
class video_gen_schema(BaseModel):
    """Generates a video using text input and returns a filepath to the video"""
    vid_prompt: str = Field(..., description="The textual prompt used in generating the video")
    use_9s: bool = Field(..., description="Whether to generate a 9 second long video. If set to FALSE, this will generate a 5 second long video")


@tool("video_gen_tool",args_schema=video_gen_schema)   
def generate_vid(vid_prompt: str, use_9s: bool) -> str:
    global generatingVid
    if(generatingVid):
        return "Failed to generate video, there is currently another video being generated."
    dur = "5s"
    if(use_9s):
        dur = "9s"
    print(f"VIDEOWORKER PROMPT: {dur}\n------------------------------\n" + vid_prompt)
    generatingVid = True
    generation = client.generations.create(
        prompt=vid_prompt,
        model="ray-2",
        resolution="720p",
        duration=dur
    )
    completed = False
    print("generating vid")
    while not completed:
      generation = client.generations.get(id=generation.id)
      if generation.state == "completed":
        completed = True
      elif generation.state == "failed":
        raise RuntimeError(f"Generation failed: {generation.failure_reason}")
      print("Dreaming, state:" + generation.state)
      time.sleep(5)
    video_url = generation.assets.video
    # download the video
    response = requests.get(video_url, stream=True)
    with open(f'staticVid1/{generation.id}.mp4', 'wb') as file:
        file.write(response.content)
    print(f"Video generated in staticVid1/{generation.id}.mp4")
    generatingVid = False
    return f"Video generated in staticVid1/{generation.id}.mp4"

#class extend_video_schema(BaseModel):
video_tools = [generate_vid]
video_tool_node = ToolNode(video_tools)
video_worker = video_gen_llm.bind_tools(video_tools)

### Storyboard Worker

In [4]:
#SCHEMA
class step_by_step_output_schema(BaseModel):
    """Prints a storyboard for the user to view in string format"""
    story_description: str = Field(..., description="A 200-250 word description of the story")
    character_details: str = Field(..., description="A 200-300 word description of each character in the scene")
    background_details: str = Field(..., description="A 50-100 description of the scene")
    auditory_details: str = Field(..., description="A 50-100 description of the voice profile of each character")
    dialogue_details: str = Field(..., description="The parts of the dialogue that require enunciation and emotion")
    num_characters: int = Field(..., description="The number of unique characters present in this scene")
    pure_dialogue: str = Field(..., description="The extracted dialogue from the input scene with character name appended before. Within the dialogue, indicate dialogue details that require enunciation and emotion with descriptors in parentheses Format: {CharacterName}: {Dialogue (Enunciation) Dialogue Detail}")
    dialogue_instances: int = Field(..., description="The number of distinct non-continuous dialogue instances present in this scene")
charProfileQueue = 0
dialogueQueue = 0
@tool("storyboard_tool",args_schema=step_by_step_output_schema)
def generate_storyboard(story_description: str,character_details: str,background_details: str,auditory_details: str,dialogue_details: str, num_characters: int,pure_dialogue: str, dialogue_instances: int) -> str:
    global fullDialogueString
    global charProfileQueue
    global dialogueQueue
    print("storyboarding")
    temp_storyboard = "STORY ANALYSIS\n------------------------------\n"+story_description+"\nCHARACTERS\n------------------------------\n"+character_details
    temp_storyboard+="\nBACKGROUND\n------------------------------\n"+background_details+"\nAUDIO\n------------------------------\n"+auditory_details+"\nDIALOGUE\n------------------------------\n"
    temp_storyboard+=dialogue_details+"\nNumber of characters\n------------------------------\n"+ str(num_characters)+"\nPure Dialogue\n------------------------------\n" + pure_dialogue
    temp_storyboard+="\nUnique Dialogue Instances\n------------------------------\n"+ str(dialogue_instances)
    print(temp_storyboard)
    fullDialogueString = pure_dialogue
    charProfileQueue = num_characters
    dialogueQueue = dialogue_instances
    return temp_storyboard

class character_profile_gen_schema(BaseModel):
    """Generates a character profile for a character and returns the url"""
    character_name: str = Field(..., description="The name of the character, taken from the script")
    character_details: str = Field(..., description="A 200-300 word description of the character in the scene, taken from the script")

@tool("character_profile_tool",args_schema=character_profile_gen_schema)
def generate_character_profile(character_name: str, character_details: str)->str:
    global charProfileQueue
    if(charProfileQueue<=0):
        return "Already generated character profiles!"
    generation = client.generations.image.create(
      prompt="Generate a hyperrealistic, front-facing portrait \
      The image should feature perfectly even, diffused lighting that completely\
      eliminates any shadows on the face. Use a direct, center-camera angle against a neutral,\
      unobtrusive background to ensure absolute consistency. Focus on lifelike details with natural \
      skin textures and realistic, balanced color tones, making the portrait suitable as a reference\
      for video character consistency.: "+character_name+", "+character_details,
    )
    completed = False
    print("generating char profile for: " + character_name)
    while not completed:
      generation = client.generations.get(id=generation.id)
      if generation.state == "completed":
        completed = True
      elif generation.state == "failed":
        raise RuntimeError(f"Generation failed: {generation.failure_reason}")
      print("Dreaming, state:" + generation.state)
      time.sleep(2)
    image_url = generation.assets.image
    print("image_url: " +image_url)
    response = requests.get(image_url, stream=True)
    with open(f'charRef/{generation.id}.jpg', 'wb') as file:
        file.write(response.content)
    print(f"Image generated in charRef/{generation.id}.jpg")
    charProfileQueue-=1;
    return f"Profile generated: {image_url}"

storyboard_tools = [generate_storyboard,generate_character_profile]
storyboard_tool_node = ToolNode(storyboard_tools)
storyboard_worker = storyboard_llm.bind_tools(storyboard_tools)

### Audio Worker

In [5]:
class dialogue_gen_schema(BaseModel):
    """Generates a dialogue mp3 clip and returns the filepath to the audio"""
    character_name: str = Field(..., description="The name of the character talking")
    character_profile: str = Field(..., description="The link to the generated character profile of the character who is talking. Format: {https://storage.cdn-luma.com/dream_machine/../.._result.jpg}")
    dialogue_text: str = Field(..., description="The dialogue text of a SINGLE character for a particular scene.")
    voice_idx: int = Field(..., description="Out of the following voice ids, choose the index of the one that best suits the character who is speaking:\
    [{29vD33N1CtxCmqQRPOHJ: Male young and nervous},{N2lVS1w4EtoT3dr4eOWO: Male booming and imposing},\
    {21m00Tcm4TlvDq8ikWAM: Female elegant and slightly teasing}]")
chronological_dialogue_duration = []
chrono_character=[]
character_profiles_chrono=[]
@tool("dialogue_gen_tool",args_schema=dialogue_gen_schema)
def generate_dialogue(character_name: str,character_profile: str,dialogue_text: str, voice_idx: int) -> str:
    global chronological_dialogue_duration
    global chrono_character
    global character_profiles_chrono
    global dialogueQueue
    print("attempted dialogue: " +dialogue_text+"\nattempted idx: " + str(voice_idx))
    if(dialogueQueue<=0):
        return "Already generated all dialogue!"
    voice_ids = ["29vD33N1CtxCmqQRPOHJ","N2lVS1w4EtoT3dr4eOWO","21m00Tcm4TlvDq8ikWAM"]
    eleven_response = elevenlabs_client.text_to_speech.convert(
        voice_id=voice_ids[voice_idx],
        optimize_streaming_latency="0",
        output_format="mp3_44100_128",
        text=dialogue_text,
        model_id="eleven_turbo_v2",
    )
    
    # Save the file into the raw_audio directory
    raw_audio_dir = "staticAudio1"
    os.makedirs(raw_audio_dir, exist_ok=True)
    file_path = os.path.join(raw_audio_dir, f"{uuid.uuid4()}.mp3")
    with open(file_path, "wb") as f:
        for chunk in eleven_response:
            if chunk:
                f.write(chunk)
    duration = 0
    try:
        probe = ffmpeg.probe(file_path)
        duration = math.ceil(float(probe['format']['duration']))
        chronological_dialogue_duration.append(duration)
        chrono_character.append(character_name)
        character_profiles_chrono.append(character_profile)
        print("duration: " + str(duration) + "\ncharacterSpeaking: " + character_name +"\ncharProfile: " + character_profile)
        dialogueQueue-=1;
        return file_path
    except ffmpeg.Error as e:
        print("Error probing file:", e.stderr)
        raise e
    
audio_tools = [generate_dialogue]
audio_tool_node = ToolNode(audio_tools)
audio_worker = audio_gen_llm.bind_tools(audio_tools)

### Editor Worker

## RUNNING MODEL

In [6]:
members = ["video_worker","storyboard_worker","audio_worker"] #"editor"
supervisor_options = members + [END]
visitedAudioWorker = False
class Supervisor_Router(TypedDict):
    """Worker to route to next to fulfill the user's request. If no workers are needed, route to END."""

    next: Literal[*supervisor_options]

# Define the function that determines whether to continue or not
def go_next(state: MessagesState) -> Literal[*supervisor_options]:
    global visitedAudioWorker
    print(f"traveling")
    if(state["next"]=="video_worker" and not visitedAudioWorker):
        visitedAudioWorker = True
        return "audio_worker"
    return state["next"]


# Define the function that calls the model
def call_supervisor(state: MessagesState):
    print("supervisor")
    messages = state["messages"]
    context_message = {
        "role": "system",
        "content": f"You are a supervisor of a short film project and am in charge of a team of 4. You can delegate relevant tasks to any of these members: {members}."
        "Bob the video_worker is capable of generating high fidelity videos, but requires clear contextual information. Ryan the audio_worker is able to generate character"
        "dialogue audio clips. Steve the storyboard_worker is able to analyze a given input script and break it down into fine details and generate character profiles."
        "\nYou should ALWAYS ensure Steve has generated a STORYBOARD and character profiles for ALL characters analyzed in the storyboard tool response before anything else."
        "\nIMPORTANT!!! MAKE SURE Ryan generates audio clips BEFORE Bob generates video clips. Generate a step-by-step plan from the following prompt and act on it."
        "\nEXAMPLE PLAN: STEVE STORYBOARD -> STEVE CHARACTER PROFILES -> RYAN AUDIO CLIPS -> BOB VIDEOS"
        "When responding, please output a JSON object that follows this schema:\n"
        '{ "next": one of the allowed values: ' + ", ".join(supervisor_options) + " }\n"
        "If no further workers are needed, output 'END' as the next step."
    }
    response = supervisor_llm.with_structured_output(Supervisor_Router).invoke(messages)
    # Wrap the response in a valid message format
    structured_message = {
        "role": "assistant",
        "content": f"Routing to {response['next']}"
    }
    return {"messages": [structured_message], "next": response["next"]}

def call_video_worker(state: MessagesState):
    print("video worker")
    messages = state['messages']
    context_message = {
        "role": "system",
        "content": "You are Bob, a video worker. Process the given request accordingly. Note that you can only generate 5 second or 9 second videos.\
        If you need to generate further content, ensure the characters and the scene in the video are consistent by providing similar prompts.\
        Generate only ONE video at a time. Wait until you have received the filepath of the current video being generated before generating another. [Use Unique Prompts]"
    }
    response = video_worker.invoke([context_message]+messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}

def call_storyboard_worker(state: MessagesState):
    print("storyboard worker")
    messages = state['messages']
    context_message = {
        "role": "system",
        "content": f"You are Steve, a storyboarder. Process the given request accordingly. You have access to two tools: a storyboard generator and a character profile\
        generator. Generate only ONE character profile at a time. You should pass in all character results from the storyboard generator into the character profile generator."
    }
    response = storyboard_worker.invoke([context_message]+messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}

def call_audio_worker(state: MessagesState):
    global fullDialogueString
    print("audio worker")
    messages = state['messages']
    context_message = {
        "role": "system",
        "content": "You are Ryan, a character dialogue generator. Process the given request accordingly. You have access to one tool"
        "which generates a dialogue audio clip for a SINGLE character talking ONLY AS PER THE CHRONOLOGICAL OREDER of the storyboard generated by STEVE."
        "Check if you have already generated a piece of dialogue. If you have, don't generate it again."
        "Parse this string once for each instance with a character talking generate audio:"+ fullDialogueString
    }
    response = audio_worker.invoke([context_message]+messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}


def check_video_tool_calls(state: MessagesState) -> Literal["video_tools","supervisor"]:
    print("check video tool call")
    messages = state['messages']
    last_message = messages[-1]
    # If the LLM makes a tool call, then we route to the "tools" node
    if last_message.tool_calls:
        return "video_tools"
    return "supervisor"

def check_storyboard_tool_calls(state: MessagesState) -> Literal["storyboard_tools","supervisor"]:
    print("check storyboard tool call")
    messages = state['messages']
    last_message = messages[-1]
    # If the LLM makes a tool call, then we route to the "tools" node
    if last_message.tool_calls:
        return "storyboard_tools"
    return "supervisor"
    
def check_audio_tool_calls(state: MessagesState) -> Literal["audio_tools","supervisor"]:
    print("check audio tool call")
    messages = state['messages']
    last_message = messages[-1]
    # If the LLM makes a tool call, then we route to the "tools" node
    if last_message.tool_calls:
        return "audio_tools"
    return "supervisor"
# Define a new graph
workflow = StateGraph(MessagesState)

# Define the two nodes we will cycle between
workflow.add_node("supervisor", call_supervisor)
workflow.add_node("video_worker", call_video_worker)
workflow.add_node("video_tools", video_tool_node)
workflow.add_node("storyboard_worker", call_storyboard_worker)
workflow.add_node("storyboard_tools", storyboard_tool_node)
workflow.add_node("audio_worker", call_audio_worker)
workflow.add_node("audio_tools", audio_tool_node)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow.add_edge(START, "supervisor")
# We now add a conditional edge
workflow.add_conditional_edges(
    "supervisor",
    go_next,
)
workflow.add_conditional_edges(
    "video_worker",
    check_video_tool_calls,
)
workflow.add_conditional_edges(
    "storyboard_worker",
    check_storyboard_tool_calls,
)
workflow.add_conditional_edges(
    "audio_worker",
    check_audio_tool_calls,
)

# We now add a normal edge from `tools` to `agent`.
# This means that after `tools` is called, `agent` node is called next.
workflow.add_edge("video_tools", 'video_worker')
workflow.add_edge("storyboard_tools","storyboard_worker");
workflow.add_edge("audio_tools","audio_worker");
# Initialize memory to persist state between graph runs
checkpointer = MemorySaver()

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable.
# Note that we're (optionally) passing the memory when compiling the graph
app = workflow.compile(checkpointer=checkpointer)

generatingImg = False
generatingVid = False
# Use the agent
final_state = app.invoke(
    {"messages": [{"role": "user", "content": "Generate a STORYBOARD, AUDIO, AND VIDEO about the following scene: INT. MONTAGUE MUSEUM – MODERN DAYThe once-grand family home, now a tour-driven Hearst Castle-like museum.ELIZABETH MONTAGUE, intense and angry, argues with Ethan.ELIZABETHYou know you can’t tell a soul,right?ETHANAre you kidding? I’m going to telleveryone. You can’t hide this."}]},
    config={"configurable": {"thread_id": 42}}
)
final_state["messages"][-1].content

supervisor
traveling
storyboard worker
check storyboard tool call
storyboarding
STORY ANALYSIS
------------------------------
In the grand halls of Montague Museum, Elizabeth Montague and Ethan stand amidst vintage furniture, arguing heatedly. The air is thick with tension, echoing the house's storied past. Once a noble family estate, the Montague home has now transformed into a public museum that draws tourists eager to glimpse into the opulence of yesteryears. The elegant decor and preserved artifacts of the past juxtapose with the fiery confrontation unfolding between Elizabeth and Ethan, set against the backdrop of family secrets threatened to be revealed.

Elizabeth Montague, the last of the Montague lineage, is visibly distraught. Her sharp features mirror her stern demeanor as she holds the weight of her family's legacy on her shoulders. Desperation wraps around her like a cloak as she demands Ethan's silence about a secret scandal that could potentially ruin her family name onc

KeyError: 'next'