In [1]:
# Install all required libraries

print("Installing dependencies...")
!pip install -q nltk google-generativeai \
                diffusers transformers accelerate safetensors \
                ipywidgets

print(" All libraries installed.")

Installing dependencies...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[?25h All libraries installed.


In [2]:
import nltk
import google.generativeai as genai
import torch
from diffusers import AutoPipelineForText2Image
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import base64
from io import BytesIO
import getpass

print("Downloading NLTK resources...")
nltk.download('punkt')
nltk.download('punkt_tab')
print("NLTK resources downloaded.")


try:
    # Check if we're in Colab
    import google.colab
    from google.colab import userdata
    # Try to get the key from Colab secrets (best practice)
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
    if GOOGLE_API_KEY is None:
        # Fallback to getpass if not in secrets
        print("Please paste your Google AI Studio API key:")
        GOOGLE_API_KEY = getpass.getpass()
    genai.configure(api_key=GOOGLE_API_KEY)
    print(" Gemini API Key configured.")

except (ImportError, ModuleNotFoundError):
    # Fallback for non-Colab environments
    print("Please paste your Google AI Studio API key:")
    GOOGLE_API_KEY = getpass.getpass()
    genai.configure(api_key=GOOGLE_API_KEY)
    print(" Gemini API Key configured.")

Downloading NLTK resources...


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


NLTK resources downloaded.
 Gemini API Key configured.


In [3]:
print("Loading models... This may take a few minutes.")

# 1. Load the Gemini model for prompt engineering
# We use 'gemini-1.5-flash' because it's fast and effective
prompt_refinement_model = genai.GenerativeModel('gemini-1.5-flash')
print("Gemini model loaded.")

# 2. Load the text-to-image model (Stable Diffusion)
# We use a base model and move it to the GPU (cuda)
# Using float16 for faster inference on the T4 GPU
image_gen_pipeline = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    variant="fp16"
).to("cuda")

print("Stable Diffusion model loaded and moved to GPU.")


Loading models... This may take a few minutes.
Gemini model loaded.


model_index.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Fetching 19 files:   0%|          | 0/19 [00:00<?, ?it/s]

text_encoder_2/model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

text_encoder/model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/575 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.fp16.safete(…):   0%|          | 0.00/5.14G [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vae_1_0/diffusion_pytorch_model.fp16.saf(…):   0%|          | 0.00/167M [00:00<?, ?B/s]

vae/diffusion_pytorch_model.fp16.safeten(…):   0%|          | 0.00/167M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!


Stable Diffusion model loaded and moved to GPU.


In [4]:
def segment_narrative(text: str) -> list[str]:
    """
    (Req #2) Breaks the input text into logical scenes (sentences).
    """
    return nltk.sent_tokenize(text)

def engineer_prompt(sentence: str, style: str) -> str:
    """
    (Req #3 & Bonus) Uses an LLM to refine the sentence into a
    visually descriptive prompt.
    """
    # This is the prompt for Gemini. It's a "meta-prompt".
    base_prompt = f"""
    You are an expert prompt engineer for a text-to-image AI.
    Your task is to convert a simple narrative sentence into a highly detailed,
    visually descriptive prompt. Focus on the scene, characters, lighting,
    and emotion. The output should be a comma-separated list of keywords.

    Append the following artistic style to the end: {style}

    Sentence: "{sentence}"

    Detailed Visual Prompt:
    """

    try:
        response = prompt_refinement_model.generate_content(base_prompt)
        return response.text.strip()
    except Exception as e:
        print(f"Error engineering prompt: {e}")

        return f"{sentence}, {style}"

def generate_image(prompt: str):
    """
    (Req #4) Generates a single image from a prompt.
    """
    # num_inference_steps=25 is a good balance of speed and quality
    image = image_gen_pipeline(prompt, num_inference_steps=25).images[0]
    return image

def display_storyboard(story_panels: list[dict]):
    """
    (Req #5) Renders the final storyboard as an HTML sequence.
    """
    html_output = "<div style='display: flex; flex-wrap: wrap; justify-content: center; gap: 16px;'>"

    for panel in story_panels:
        # Convert PIL Image to a base64 string to embed in HTML
        buffered = BytesIO()
        panel['image'].save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()

        # HTML for each panel
        html_output += f"""
        <div style='border: 1px solid #ddd; border-radius: 8px; padding: 10px; width: 300px; box-shadow: 2px 2px 5px #ccc;'>
            <img src='data:image/png;base64,{img_str}' style='width: 100%; height: auto; border-radius: 4px;' />
            <p style='font-family: sans-serif; font-size: 14px; margin-top: 10px;'>
                <b>Original:</b> {panel['original_text']}
            </p>
        </div>
        """

    html_output += "</div>"
    display(HTML(html_output))

print("Core functions defined.")

Core functions defined.


In [5]:
# --- Create UI Components ---

# 1. Text Input
text_input = widgets.Textarea(
    value='A young developer sits in a dark room, illuminated only by her laptop screen. She types furiously, a complex network diagram reflected in her glasses. Suddenly, an error message flashes, and she slumps in defeat. After a moment, her eyes light up with an idea, and she begins to code again with renewed energy.',
    placeholder='Enter your 3-5 sentence narrative here...',
    description='Narrative:',
    layout={'width': '80%', 'height': '150px'}
)

# 2. Style Selector
style_input = widgets.Dropdown(
    options=[
        ('Digital Art', 'digital art, vibrant colors, cinematic lighting, concept art'),
        ('Photorealistic', 'photorealistic, 8k, sharp focus, f/1.8, high detail'),
        ('Anime', 'anime style, key visual, beautiful detailed, cinematic'),
        ('Pixel Art', '16-bit pixel art, detailed, retro video game'),
        ('Comic Book', 'comic book style, cel-shaded, bold outlines, dynamic'),
        ('Watercolor', 'watercolor painting, soft edges, blended colors, beautiful')
    ],
    value='digital art, vibrant colors, cinematic lighting, concept art',
    description='Art Style:',
    layout={'width': '80%'}
)

# 3. Generate Button
generate_button = widgets.Button(
    description='Generate Storyboard',
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    icon='magic'
)

# 4. Output Area
# This is where the "Loading..." messages and final storyboard will appear
output_area = widgets.Output()

# --- Define Button Click Logic ---

def on_generate_click(b):
    # Clear previous output and show a loading message
    output_area.clear_output()
    with output_area:
        print("Starting storyboard generation... Please wait.")

        # 1. Get values from the UI
        narrative = text_input.value
        style = style_input.value

        if not narrative.strip():
            print("Error: Please enter a narrative.")
            return

        # 2. Segment narrative
        print(f"Step 1/3: Segmenting narrative...")
        scenes = segment_narrative(narrative)
        if len(scenes) < 1:
             print("Error: Could not find any sentences.")
             return

        storyboard_panels = []
        print(f"Step 2/3: Generating {len(scenes)} panels...")

        # 3. Loop and generate
        for i, scene in enumerate(scenes):
            print(f"  - Panel {i+1}: Refining prompt for '{scene[:30]}...'")
            enhanced_prompt = engineer_prompt(scene, style)

            print(f"  - Panel {i+1}: Generating image...")
            image = generate_image(enhanced_prompt)

            storyboard_panels.append({
                'original_text': scene,
                'image': image
            })

        print("Step 3/3: Rendering final storyboard...")
        # 4. Display the result
        output_area.clear_output() # Clear loading messages
        display_storyboard(storyboard_panels)

# Link the function to the button
generate_button.on_click(on_generate_click)


print("UI components created and click event defined.")

UI components created and click event defined.


In [6]:
# Display the full user interface
print("--- The Pitch Visualizer ---")
display(
    widgets.VBox([
        text_input,
        style_input,
        generate_button,
        output_area
    ])
)

--- The Pitch Visualizer ---


VBox(children=(Textarea(value='A young developer sits in a dark room, illuminated only by her laptop screen. S…

In [12]:
# # Install Gradio
# print("Installing Gradio...")
# !pip install -q gradio
# print("Gradio installed.")

In [13]:
# import gradio as gr

# def storyboard_interface(narrative, style_value):
#     """
#     Function to be used by Gradio. It takes the narrative and style
#     from the Gradio interface and returns the storyboard display.
#     """

#     style_map = dict(style_input.options)
#     style = style_map.get(style_value, 'digital art, vibrant colors, cinematic lighting, concept art') # Default to digital art if not found


#     print("Starting storyboard generation... Please wait.")


#     if not narrative.strip():
#         print("Error: Please enter a narrative.")
#         return "Please enter a narrative." # Return an error message for Gradio

#     # 2. Segment narrative
#     print(f"Step 1/3: Segmenting narrative...")
#     scenes = segment_narrative(narrative)
#     if len(scenes) < 1:
#          print("Error: Could not find any sentences.")
#          return "Could not segment the narrative into sentences." # Return an error message for Gradio

#     storyboard_panels = []
#     print(f"Step 2/3: Generating {len(scenes)} panels...")

#     # 3. Loop and generate
#     for i, scene in enumerate(scenes):
#         print(f"  - Panel {i+1}: Refining prompt for '{scene[:30]}...'")
#         enhanced_prompt = engineer_prompt(scene, style)

#         print(f"  - Panel {i+1}: Generating image...")
#         image = generate_image(enhanced_prompt)

#         storyboard_panels.append({
#             'original_text': scene,
#             'image': image
#         })

#     print("Step 3/3: Rendering final storyboard...")
#     # 4. Display the result using display_storyboard (which uses HTML)

#     html_output = "<div style='display: flex; flex-wrap: wrap; justify-content: center; gap: 16px;'>"

#     for panel in storyboard_panels:
#         # Convert PIL Image to a base64 string to embed in HTML
#         buffered = BytesIO()
#         panel['image'].save(buffered, format="PNG")
#         img_str = base64.b64encode(buffered.getvalue()).decode()

#         # HTML for each panel
#         html_output += f"""
#         <div style='border: 1px solid #ddd; border-radius: 8px; padding: 10px; width: 300px; box-shadow: 2px 2px 5px #ccc;'>
#             <img src='data:image/png;base64,{img_str}' style='width: 100%; height: auto; border-radius: 4px;' />
#             <p style='font-family: sans-serif; font-size: 14px; margin-top: 10px;'>
#                 <b>Original:</b> {panel['original_text']}
#             </p>
#         </div>
#         """

#     html_output += "</div>"
#     return html_output


# # --- Create Gradio Interface ---

# # Get the options for the style dropdown from the existing widget
# style_options = [option[0] for option in style_input.options]
# # Get the initial value for the style dropdown
# initial_style_value = [option[0] for option in style_input.options if option[1] == style_input.value][0]


# iface = gr.Interface(
#     fn=storyboard_interface,
#     inputs=[
#         gr.Textbox(lines=5, label="Narrative", placeholder="Enter your 3-5 sentence narrative here..."),
#         gr.Dropdown(choices=style_options, label="Art Style", value=initial_style_value)
#     ],
#     outputs=gr.HTML(label="Storyboard"), # Use gr.HTML to display the generated HTML
#     title="The Pitch Visualizer",
#     description="Enter a narrative and select an art style to generate a visual storyboard."
# )

# print("Gradio interface created.")

In [14]:

# print("Launching Gradio app...")
# iface.launch(debug=True, share=True)

In [15]:
import base64
from io import BytesIO
import gradio as gr

def storyboard_interface(narrative, style_value):
    style_map = dict(style_input.options)
    style = style_map.get(style_value, 'digital art, vibrant colors, cinematic lighting, concept art')

    if not narrative.strip():
        yield "Error: Please enter a narrative."

    scenes = segment_narrative(narrative)
    if len(scenes) < 1:
        yield "Could not segment the narrative into sentences."

    storyboard_panels = []

    # Start with a "loading" message
    yield "<p>Generating storyboard... Please wait.</p>"

    for i, scene in enumerate(scenes):
        enhanced_prompt = engineer_prompt(scene, style)
        image = generate_image(enhanced_prompt)
        storyboard_panels.append({'original_text': scene, 'image': image})

        # Build partial HTML after each panel
        html_output = "<div style='display: flex; flex-wrap: wrap; justify-content: center; gap: 16px;'>"
        for panel in storyboard_panels:
            buffered = BytesIO()
            panel['image'].save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode()

            html_output += f"""
            <div style='border: 1px solid #ddd; border-radius: 8px; padding: 10px; width: 400px; box-shadow: 2px 2px 5px #ccc;'>
                <img src='data:image/png;base64,{img_str}' style='width: 100%; height: auto; border-radius: 4px;' />
                <p style='font-family: sans-serif; font-size: 14px; margin-top: 10px;'>
                    <b>Original:</b> {panel['original_text']}
                </p>
            </div>
            """
        html_output += "</div>"

        yield html_output  # update Gradio with partial output


# Get the options for the style dropdown from the existing widget
style_options = [option[0] for option in style_input.options]
# Get the initial value for the style dropdown
initial_style_value = [option[0] for option in style_input.options if option[1] == style_input.value][0]


iface = gr.Interface(
    fn=storyboard_interface,
    inputs=[
        gr.Textbox(lines=5, label="Narrative", placeholder="Enter your 3-5 sentence narrative here..."),
        gr.Dropdown(choices=style_options, label="Art Style", value=initial_style_value)
    ],
    outputs=gr.HTML(label="Storyboard"), # Use gr.HTML to display the generated HTML
    title="The Pitch Visualizer",
    description="Enter a narrative and select an art style to generate a visual storyboard."
)

print(" Gradio interface created.")

 Gradio interface created.


In [None]:

print("Launching Gradio app...")
iface.launch(debug=True, share=True)

Launching Gradio app...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://498c3d365246c38cb3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
