# Writers Room AI
## Deep Learning
### Arjun Bedi, Jose Xavier Fuentes, Maxwell Guevarra

In [6]:
from typing import Dict, List, Optional, Tuple
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from dotenv import load_dotenv
import glob
import pandas as pd

load_dotenv()

True

#### Here is a function which parses through our data of movie scripts and structures a given file. Here is an example of a file

In [7]:
dragon_script_path = os.path.join('data', 'manual_annotations', 'manual_annotations', 'How to Train Your Dragon_0892769_manual_anno.txt')
if os.path.exists(dragon_script_path):
    with open(dragon_script_path, 'r', encoding='utf-8') as f:
        dragon_script_content = f.read()
print("\n".join(dragon_script_content.split('\n')))


scene_heading:           EXT. NORTH SEA/VILLAGE - NIGHT

tex:   We skim above a dark, wild ocean. The camera turns toward a
          lone island, Berk. It is a gigantic shard of rock jutting
          straight out of the water.

speaker_heading:           HICCUP (V.O.)

dialog: This, is Berk. It's twelve days
          North of hopeless, and a few
          degrees South of freezing to death.
          It's located solidly on the
          meridian of misery.

text: The camera drifts over rolling hills to reveal a small
          village nestled on an outcropping of sea mounts.

speaker_heading:           HICCUP (V.O.)

dialog:  My village. In a word, sturdy.
          And it's been here for seven
          generations, but every single
          building is new.

text:   The camera drifts closer, circling.

speaker_heading:           HICCUP (V.O.) (CONT'D)

dialog: We have fishing, hunting, and a
          charming view of the sunsets. The
          only problems are the pests. You
 

In [8]:
def parse_annotated_script(file_path):
        """
        Parse an annotated script file into structured format
        Returns a list of dictionaries for each scene in the script
        """
        # Creates structure of output
        scenes = []
        current_scene = {
            "scene_heading": "",
            "description": "",
            "dialog": [],
            "speakers": [],
            "script_id": ""
        }
        script_id = file_path.split('_')[-2]
        # Read file
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        # For each line
        for line in lines:
            line = line.strip()
            if not line:
                continue
            # Split into label and actual text
            if ': ' in line:
                label, content = line.split(': ', 1)
                # If label is a new scene heading, add previous to scene
                if label == 'scene_heading':
                    if current_scene["scene_heading"]:
                        current_scene["script_id"] = script_id
                        scenes.append(current_scene)
                        current_scene = {
                            "scene_heading": content,
                            "description": "",
                            "dialog": [],
                            "speakers": [],
                            "script_id": script_id
                        }
                    else:
                        current_scene["scene_heading"] = content
                        current_scene["script_id"] = script_id
                elif label == 'text':
                    current_scene["description"] += content + " "
                elif label == 'dialog':
                    current_scene["dialog"].append(content)
                elif label == 'speaker_heading':
                    current_scene["speakers"].append(content)
        # Add final scene         
        if current_scene["scene_heading"]:
            current_scene["script_id"] = script_id
            scenes.append(current_scene)
            
        return scenes

#### Here is where the initialization of our agentic crew starts. We start with a RAG agent, which both parses through all scripts in our data, initializes the vector store, and is able to retrieve relevant content based on a user prompt

In [9]:
class RAGAgent:
    def __init__(self, embeddings_model=None, vector_store=None):
        self.embeddings = embeddings_model or OpenAIEmbeddings()
        self.vector_store = vector_store

    def parse_data(self, data_dir: str) -> List[Dict]:
        """
        Load script data from the annotations directory  
        Args:
            data_dir: Base directory (data) containing manual_annotations
        """
        structured_data = []
        # Determine which annotations to use
        anno_path = os.path.join(data_dir, 'manual_annotations', 'manual_annotations', '*.txt')    
        # Load all annotation files
        for anno_file in glob.glob(anno_path):
            # A list of dictionaries explaining the scene
            structured_data.extend(parse_annotated_script(anno_file))
        return structured_data
        
    def initialize_vector_store(self, data_dir: str):
        """Initialize the vector store with script examples from the annotations
        
        Args:
            data_dir: Base directory containing manual_annotations
        """
        structured_data = self.parse_data(data_dir)
        
        # Load metadata
        metadata_path = os.path.join(data_dir, 'movie_meta_data.csv')
        metadata_df = pd.read_csv(metadata_path)
        # Convert imdbid to string with leading zeros to match file naming
        metadata_df['imdbid'] = metadata_df['imdbid'].astype(str).str.zfill(7)
        
        # Prepare documents for indexing
        documents = []
        
        # Convert structured scenes to searchable text
        for scene in structured_data:
            # Extract script filename to get imdbid
            script_id = scene.get('script_id', '')  # You'll need to add this in parse_annotated_script
            
            # Get metadata for this script
            try:
                script_meta = metadata_df[metadata_df['imdbid'] == script_id].iloc[0] if not metadata_df[metadata_df['imdbid'] == script_id].empty else None
            except IndexError:
                print(f"Warning: No metadata found for script ID {script_id}")
                script_meta = None
            
            # Format dialog with speakers for better context
            dialog_with_speakers = []
            for speaker, line in zip(scene['speakers'], scene['dialog']):
                if speaker and line:
                    dialog_with_speakers.append(f"{speaker}: {line}")
            
            # Build metadata section
            metadata_text = ""
            if script_meta is not None:
                metadata_text = f"""
                Title: {script_meta['title']}
                Genre: {script_meta['genres']}
                Plot: {script_meta['plot']}
                Keywords: {script_meta['keywords']}
                Director: {script_meta['directors']}
                Cast: {script_meta['cast']}
                """
            
            scene_text = f"""
            {metadata_text}
            Scene: {scene['scene_heading']}
            Description: {scene['description']}
            Dialog:
            {chr(10).join(dialog_with_speakers)}
            """
            documents.append(scene_text)
        
        # Split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        
        texts = text_splitter.create_documents(documents)
        self.vector_store = FAISS.from_documents(texts, self.embeddings)
    
    def retrieve_relevant_content(self, query: str, k: int = 3) -> List[str]:
        """Retrieve relevant script examples based on the query"""
        results = self.vector_store.similarity_search(query, k=k)
        return [doc.page_content for doc in results]

#### Next, we have our writer agent, which takes in the input of the user, grabs inspiration from examples through our RAG Agent, and writes an initial draft of a scene to fit the given criteria. We use a query based off the user's desired scene, such as idea and setting, while taking in the genre, director style, and length of sccene into account.

In [10]:
class WriterAgent:
    def __init__(self, temperature: float = 0.7):
        self.llm = ChatOpenAI(temperature=temperature)
        self.prompt = PromptTemplate(
            input_variables=["genre", "setting", "idea", "director_style", "length", "examples"],
            template="""You are an experienced screenwriter. Write a compelling scene based on the following criteria:
            Genre: {genre}
            Setting: {setting}
            Core Idea: {idea}
            Director Style: {director_style}
            Length: {length}
            
            Here are some example scenes for reference:
            {examples}
            
            Write a scene that follows proper screenplay format:
            1. Start with a scene heading (INT/EXT, location, time)
            2. Include clear scene descriptions
            3. Format dialog with speaker names in caps
            4. Add parentheticals for important acting cues
            
            Make the scene original and engaging while maintaining professional formatting.
            """
        )
        self.chain = LLMChain(llm=self.llm, prompt=self.prompt)
    
    def write_scene(self, genre: str, setting: str, idea: str, director_style: str, length: str, examples: List[str]) -> str:
        """Generate a scene based on the given criteria and examples"""
        examples_text = "\n\n".join(examples) if examples else "No examples provided."
        return self.chain.run(
            genre=genre,
            setting=setting,
            idea=idea,
            director_style=director_style,
            length=length,
            examples=examples_text
        )

#### Finally, we have our editor agent. Their main goal is to make sure this initial, rough draft of the scene is ready for the big stage. They take into account professional screenplay format, dialogue authenticity, storytelling, consistency, and other factors to make the scene as well-written as possible.

In [11]:
class EditorAgent:
    def __init__(self, temperature: float = 0.3):
        self.llm = ChatOpenAI(temperature=temperature)
        self.prompt = PromptTemplate(
            input_variables=["scene", "genre", "director_style", "length"],
            template="""You are an experienced script editor. Review and improve the following scene:

            Scene:
            {scene}

            Genre: {genre}
            
            Director Style: {director_style}

            Length: {length}

            Improve the scene while maintaining proper screenplay format:
            1. Scene headings (INT/EXT, location, time)
            2. Action descriptions (present tense, visual)
            3. Character names in caps when first introduced
            4. Dialog formatting and parentheticals
            5. Proper spacing and structure

            Focus on:
            1. Dialogue authenticity
            2. Scene pacing
            3. Character development
            4. Visual storytelling
            5. Genre consistency

            Return the improved scene in proper screenplay format.
            """
        )
        self.chain = LLMChain(llm=self.llm, prompt=self.prompt)
    
    def edit_scene(self, scene: str, genre: str, director_style: str, length: str) -> str:
        """Edit and improve the given scene"""
        return self.chain.run(
            scene=scene,
            genre=genre,
            director_style=director_style,
            length=length
        )

#### Now, we initialize our crew

In [12]:
class ScriptGenerationCrew:
    def __init__(self):
        self.rag_agent = RAGAgent()
        self.writer_agent = WriterAgent()
        self.editor_agent = EditorAgent()
        
    def initialize_with_data(self, data_dir: str):
        """Initialize the RAG agent with annotated scripts"""
        self.rag_agent.initialize_vector_store(data_dir)
    
    def generate_scene(self, 
                      genre: str, 
                      setting: str, 
                      idea: str,
                      director_style: str,
                      length: str) -> Dict[str, str]:
        """Generate a complete scene using all agents"""
        
        # 1. Retrieve relevant examples
        relevant_examples = self.rag_agent.retrieve_relevant_content(
            f"{genre} {setting} {idea}"
        )
        
        # 2. Generate initial scene
        initial_scene = self.writer_agent.write_scene(
            genre=genre,
            setting=setting,
            idea=idea,
            director_style=director_style,
            length=length,
            examples=relevant_examples
        )
            
        final_scene = self.editor_agent.edit_scene(
            scene=initial_scene,
            genre=genre,
            director_style=director_style,
            length=length
        )
        
        return {
            "initial_scene": initial_scene,
            "final_scene": final_scene,
            "examples_used": relevant_examples
        }

#### Let's try it out!

In [13]:
crew = ScriptGenerationCrew()
    
# Initialize with annotated data
crew.initialize_with_data(
        data_dir="data"
    )

prompt_idea = "A genius graduate student has done an at home, not supervised lab experiment. He used aritifical intelligence and genetics to bring back a dinosour from extinction. One day he returns from work, and his pet is gone",

result = crew.generate_scene(
        genre="Horrorn",
        setting="",
        idea=prompt_idea,
        director_style="Steven Spielberg",
        length="medium"
    )

  self.chain = LLMChain(llm=self.llm, prompt=self.prompt)
  return self.chain.run(


#### First, lets take a look at the scenes our RAG agent found most similar to this idea

In [32]:
print(result['examples_used'])

['Description:  The mysterious SOUTH AMERICAN JUNGLE. A massive waterfall A painted portrait of a dashing young adventurer.           A massive DIRIGIBLE descends on an airfield. Of everyone watching in the modest, small town theater, no  Young Carl stares, mouth agape, wearing leather flight helmet   NEWSREEL FOOTAGE: the dashing Muntz descends down the           An opulent dining room.   One dog runs suffers through mechanized bath time, while a Cameras flash as Muntz stands heroic, striking his signature In the theater, Young Carl returns the thumbs up.  Muntz speaks to a crowded auditorium, on stage beside a    He pulls away the drape to reveal a GIANT BIRD SKELETON. Young Carl leans forward, eyes bulging. Skeptical scientists analyze the bones. Muntz\'s portrait is removed from a wall of paintings of other    Muntz\'s "Explorer\'s Society" badge is ceremoniously RIPPED In the theater, young Carl smiles. The crowd CHEERS. Carl looks like he just witnessed a miracle.', 'Scene: INT. 

#### Now, lets take a look at the initial scene, and the improvised version

In [24]:
print(result['initial_scene'].replace("\n\n", "\n"))

INT. GENIUS GRADUATE STUDENT'S APARTMENT - NIGHT
The apartment is cluttered with scientific equipment and papers strewn across the floor. The walls are lined with shelves filled with books on genetics and artificial intelligence. A faint glow emanates from the makeshift lab in the corner of the room.
The GENIUS GRADUATE STUDENT enters, looking exhausted from a long day at work. He calls out for his pet dinosaur, but there is no response.
GENIUS GRADUATE STUDENT
(panicked)
Where are you, boy?
He searches the apartment frantically, his heart pounding in his chest. Suddenly, he notices the door to the lab is ajar. He rushes over and pushes it open, only to find the lab empty.
GENIUS GRADUATE STUDENT
(shouting)
No, no, no!
He stumbles back, knocking over a beaker of chemicals. The room fills with smoke as alarms blare in the background.
GENIUS GRADUATE STUDENT
(tearfully)
What have I done?
As the smoke clears, a low growl can be heard coming from behind him. The genius graduate student tur

In [27]:
print(result['final_scene'].replace("\n\n", "\n"))

INT. GENIUS GRADUATE STUDENT'S APARTMENT - NIGHT
The cluttered apartment is filled with scientific equipment and papers scattered across the floor. Shelves line the walls, packed with books on genetics and artificial intelligence. A faint glow emanates from the makeshift lab in the corner.
GENIUS GRADUATE STUDENT, a tired and disheveled man, enters the apartment. He calls out for his pet dinosaur, but there is no response.
GENIUS GRADUATE STUDENT
(panicked)
Where are you, boy?
Frantically searching the apartment, his heart races. He notices the lab door ajar and rushes over, pushing it open to find the lab empty.
GENIUS GRADUATE STUDENT
(shouting)
No, no, no!
He stumbles back, knocking over a beaker of chemicals. Smoke fills the room as alarms blare in the background.
GENIUS GRADUATE STUDENT
(tearfully)
What have I done?
As the smoke clears, a low growl is heard behind him. The genius graduate student turns slowly, dread filling his eyes as he faces his pet dinosaur. But it's not the s