# Setup

In [10]:
%pip install -r requirements.txt

Collecting openai (from -r requirements.txt (line 2))
  Obtaining dependency information for openai from https://files.pythonhosted.org/packages/15/64/db3462b358072387b8e93e6e6a38d3c741a17b4a84171ef01d6c85c63f25/openai-1.63.2-py3-none-any.whl.metadata
  Downloading openai-1.63.2-py3-none-any.whl.metadata (27 kB)
Collecting python-dotenv (from -r requirements.txt (line 3))
  Obtaining dependency information for python-dotenv from https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl.metadata
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting anyio<5,>=3.5.0 (from openai->-r requirements.txt (line 2))
  Obtaining dependency information for anyio<5,>=3.5.0 from https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl.metadata
  Using cached anyio-4.8.0-py3-none-any.whl.metadata (4.6 kB)
Collecti

In [19]:
import requests
import os
import re
from openai import OpenAI
from dotenv import load_dotenv

In [15]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(
    api_key=api_key
)

# Gather Story Summary

## Wikipedia

In [5]:
def get_wikipedia_summary(title):
    """
    Fetches the first extract of a Wikipedia article using the public MediaWiki API.
    Returns a text summary or an empty string if not found.
    """
    base_url = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "extracts",
        "explaintext": True,
        "format": "json",
        "titles": title
    }
    try:
        response = requests.get(base_url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        pages = data.get("query", {}).get("pages", {})
        for page_id, page_content in pages.items():
            if "extract" in page_content:
                # This is a raw textual extract from Wikipedia
                return page_content["extract"]
    except requests.RequestException as e:
        print(f"[Wikipedia] Error fetching summary: {e}")

    return ""

In [8]:
# testing with my fav solarpunk novel
book = "The Windup Girl"
summary = get_wikipedia_summary(book)
print(summary)

The Windup Girl is a biopunk science fiction novel by American writer Paolo Bacigalupi. It was his debut novel and was published by Night Shade Books on September 1, 2009. The novel is set in a future Thailand and covers a number of contemporary issues such as global warming and biotechnology.
The Windup Girl was named as the ninth best fiction book of 2009 by Time magazine. It won the 2010 Nebula Award and the 2010 Hugo Award (tied with The City & the City by China Miéville), both for best novel. The book also won the 2010 Campbell Memorial Award, the 2010 Compton Crook Award and the 2010 Locus Award for best first novel.


== Setting ==
The Windup Girl is set in 23rd-century Thailand. Global warming has raised the levels of world's oceans, carbon fuel sources have become depleted, and manually wound springs are used as energy storage devices. Biotechnology is dominant and megacorporations (called calorie companies) like AgriGen, PurCal and RedStar control food production through "gen

# Pass Book Summary thru the First Agent

In [32]:
def query_openai(system_prompt, user_prompt):
    """Send a prompt to OpenAI and return the response."""
    
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    
    return completion.choices[0].message.content

In [33]:
# Load the system prompt from system_prompts/story_analysis_prompt.md
with open("system_prompts/story_analysis_prompt.md", "r") as file:
    system_prompt = file.read()

# Generate a response using the OpenAI API
trailer_description = query_openai(system_prompt, summary)


# Create Specific clip/audio Prompts from the long description

In [24]:
print(trailer_description)

**[Trailer: The Windup Girl - A 90 Second Teaser]**

**Clip 1: Establishing the World**
- **Visual (0:00-0:10):** A sweeping aerial shot over the flooded, labyrinthine streets of 23rd-century Bangkok. Vibrant yet dystopian, the city is encased by towering levees as faint sunlight pierces through a thick haze.
- **Audio:** Background chatter in Thai and the sound of flowing water, underscored by a haunting synth swell.

**Clip 2: Anderson Lake's Ambition**
- **Visual (0:11-0:21):** Close-up on Anderson Lake's intense eyes as he examines a torn blueprint of a kink-spring. His factory looms in the background, casting long shadows.
- **Audio:** Anderson whispers, "We need the seeds, the real ones... the key to power."

**Clip 3: Emiko's Vulnerability**
- **Visual (0:22-0:32):** Emiko, the windup girl, dances mechanically under dim, flickering lights in a seedy club—her movements graceful yet unsettling. 
- **Audio:** A sultry yet haunting voiceover from Emiko, "I am a ghost, trapped betwee

In [26]:
import re

def time_to_seconds(timestr: str) -> int:
    """
    Convert a time string in MM:SS format to an integer number of seconds.
    E.g. "0:10" -> 10, "1:05" -> 65, etc.
    """
    minutes_str, seconds_str = timestr.split(':')
    return int(minutes_str) * 60 + int(seconds_str)

def parse_trailer_script(script_text: str):
    """
    Parse the LLM-generated trailer script text into a structured list of
    dictionaries. Each dict contains:
      clip_number, clip_title, start_time, end_time,
      visual_description, audio_description, length (seconds).
    """

    # Explanation of this regex:
    #
    # 1. \*\*Clip (\d+): (.*?)\*\*:
    #    - Matches "**Clip 1: Title**"
    #    - group(1) = "1", group(2) = "Establishing the World"
    #
    # 2. \s*-\s*\*\*Visual \((\d+:\d+)-(\d+:\d+)\)\:\*\*\s*(.*?)
    #    - Matches "- **Visual (0:00-0:10):**"
    #      group(3) = "0:00", group(4) = "0:10"
    #      group(5) = everything up until we hit the next "- **Audio:**" line
    #
    # 3. \s*-\s*\*\*Audio:\*\*\s*(.*?)\s*(?=\*\*Clip|\Z)
    #    - Matches "- **Audio:** " line, capturing everything up to the next "**Clip"
    #      or the end of text (\Z).
    #
    # We use DOTALL so that (.*?) can include newlines.
    #
    pattern = re.compile(
        r"\*\*Clip (\d+): (.*?)\*\*\s*"                # e.g. **Clip 1: Establishing the World**
        r"-\s*\*\*Visual \((\d+:\d+)-(\d+:\d+)\)\:\*\*\s*(.*?)\s*"  
        r"-\s*\*\*Audio:\*\*\s*(.*?)\s*(?=\*\*Clip|\Z)",
        re.DOTALL
    )

    clips = []
    matches = pattern.findall(script_text)
    for match in matches:
        clip_num_str, clip_title, start_str, end_str, visual_desc, audio_desc = match

        start_seconds = time_to_seconds(start_str)
        end_seconds = time_to_seconds(end_str)
        length_seconds = end_seconds - start_seconds

        clip_info = {
            "clip_number": int(clip_num_str),
            "clip_title": clip_title.strip(),
            "start_time": start_str,
            "end_time": end_str,
            "visual_description": visual_desc.strip(),
            "audio_description": audio_desc.strip(),
            "length_seconds": length_seconds,
        }
        clips.append(clip_info)

    return clips

# print the trailer description 
trailer_clips = parse_trailer_script(trailer_description)

In [34]:
for clip in trailer_clips:
    print(f"Clip {clip['clip_number']}: {clip['clip_title']} - {clip['length_seconds']} seconds")
    # print(f"  Visual ({clip['start_time']}-{clip['end_time']}): {clip['visual_description']}")
    print(f"  Audio: {clip['audio_description']}")
    # print(f"  Length: {clip['length_seconds']} seconds")
    print()

Clip 1: Establishing the World - 10 seconds
  Audio: Background chatter in Thai and the sound of flowing water, underscored by a haunting synth swell.

Clip 2: Anderson Lake's Ambition - 10 seconds
  Audio: Anderson whispers, "We need the seeds, the real ones... the key to power."

Clip 3: Emiko's Vulnerability - 10 seconds
  Audio: A sultry yet haunting voiceover from Emiko, "I am a ghost, trapped between two worlds. But freedom... it whispers my name."

Clip 4: Rising Conflict - 10 seconds
  Audio: Sounds of shouting, drum beats echoing like a heart's pulse and a voiceover, "The city's on the brink... of something powerful."

Clip 5: The Diving Plague - 10 seconds
  Audio: A whispered voiceover from Kanya, "The air carries death... and secrets unveil themselves."

Clip 6: Emiko's Liberation - 10 seconds
  Audio: A visceral mix of gasps, a blade slicing the air, and Emiko's cry, "Enough!" crescendos with a brutal snap.

Clip 7: The War for Control - 10 seconds
  Audio: Distant gunfire