In [1]:
from tools import get_image_dimensions

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
get_image_dimensions('img_p0_1.png')

(722, 406, 722, 406)

In [13]:
# outline_generator.py
import os
import json
from typing import List
from pydantic import BaseModel, ValidationError
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI()

# Pydantic models for validation
class SlideDistribution(BaseModel):
    sub_slide: int
    sub_slide_title: str | None = None
    sub_slide_subtitle: str | None = None
    key_points: List[str]

class SectionOutline(BaseModel):
    heading: str
    num_content_slides: int
    slide_distribution: List[SlideDistribution]

class DocumentOutline(BaseModel):
    title: str
    subtitle: str | None = None  # Make subtitle optional with default None
    sections: List[SectionOutline]

# Prompt template
PROMPT_TEMPLATE = """
You are a slide-outline assistant.  
Given the document title and a list of section summaries, produce a JSON object matching this schema:

{{
  "title": <string>,           // main deck title
  "subtitle": <string>,        // optional subtitle or author
  "sections": [
    {{
      "heading": <string>,     // section heading
      "num_content_slides": <integer >= 1>,  
      "slide_distribution": [
        {{
          "sub_slide": <integer>,  // Sub slide number
          "sub_slide_title": <string>,  // Sub slide title
          "sub_slide_subtitle": <string>,  // Sub slide subtitle (optional)
          "key_points": [<string>, ...]  // 1-3 bullet points for this slide. If more than 3, move to next sub-slide
        }},
        ...
      ]
    }},
    …
  ]
}}

Use no extra keys. Constrain total slides (sum of num_content_slides) to be at most {max_slides}.
Here are the inputs:

Document Title:
{title}

Section Summaries:
{summaries}
"""

def generate_outline(title: str, summaries: List[dict], max_slides: int = 15) -> DocumentOutline:
    # Fill prompt
    prompt = PROMPT_TEMPLATE.format(
        title=title,
        summaries="\n".join(f"- {s['title']}: {s['summary']}" for s in summaries),  # Include section titles
        max_slides=max_slides
    )

    # Call the LLM
    resp = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3,
        max_tokens=1200,  # Increased token limit for longer responses
    )

    # Parse JSON
    content = resp.choices[0].message.content.strip()
    try:
        outline_dict = json.loads(content)
    except json.JSONDecodeError as e:
        raise RuntimeError(f"LLM returned invalid JSON:\n{content}") from e

    # Validate with Pydantic
    try:
        outline = DocumentOutline.model_validate(outline_dict)
    except ValidationError as e:
        raise RuntimeError(f"Outline validation failed:\n{e}") from e

    return outline



if __name__ == "__main__":
    # Example usage; replace with your real summaries
    doc_title = "Q2 Marketing Strategy Review"
    section_summaries = [{'title': 'Brief Assessment:', 'summary': 'Yue Sai, a once prestigious Chinese cosmetics brand, lost relevance due to failed repositionings, leading to low awareness among younger consumers. L’Oréal faces the challenge of reviving the brand in a competitive market shaped by rising local players and digital consumption. Younger Chinese consumers prefer skin care rooted in tradition. Competitors like Herborist succeed with modern branding rooted in traditional Chinese medicine. Digital platforms and experiential retail offer better engagement than TV ads. L’Oréal must align its city-tier strategy with a clear brand identity to meet evolving consumer expectations.'}, {'title': 'Decision Problem:', 'summary': "To reposition Yue Sai in China's cosmetics market, L'Oréal must conduct thorough market research to understand current trends and consumer preferences. By leveraging Yue Sai's heritage and reputation, L'Oréal can revamp the brand's image to appeal to modern Chinese consumers. Implementing innovative marketing strategies and product offerings tailored to the local market will be crucial in regaining relevance and increasing market share."}, {'title': 'Criteria:', 'summary': "This slide examines Yue Sai's sales performance and product-market fit. Sales data is analyzed to project revenue, while market research assesses how well products align with target segment needs. Factors like local relevance and demographic responsiveness are considered to ensure products meet customer preferences."}, {'title': '3. Strategic Fit', 'summary': "This slide evaluates the alignment of alternatives with L'Oréal's brand vision and mission. Each alternative is scored based on its compatibility with the company's brand values."}, {'title': '4. Risk of Attrition', 'summary': "Learn how to measure customer loyalty and growth with a focus on retaining existing customers and attracting new ones. Evaluate customer retention metrics, purchase behavior, and changes in demographics to ensure the product's success in the market."}, {'title': '5. Execution Complexity', 'summary': 'This slide evaluates the implementation difficulty of the proposed alternative by assessing costs related to resources, budget constraints, production, and logistics. It highlights the importance of considering these factors when making decisions.'}, {'title': 'Moderate Risk', 'summary': 'In a highly competitive digital space, the brand risks alienating older loyalists while facing strong local rivals like Herborist and Chando.'}, {'title': 'Attrition Risk', 'summary': 'Appealing to a broader audience can lead to increased retention rates, but may also dilute brand prestige.'}, {'title': 'Execution', 'summary': 'To achieve moderate growth, implement targeted digital campaigns, invest in TCM research and development, and expand offerings in premium tiers.'}, {'title': 'Very High', 'summary': 'Managing multiple channels can significantly raise complexity and costs.'}, {'title': 'Recommendation: Alternative 1 - Yue Sai for Modern Young Women', 'summary': "Alternative 1 offers a clear strategy for Yue Sai to rebuild its brand with luxury appeal and cultural relevance through TCM, targeting modern, health-conscious young women in China. This unique position within L'Oreal's portfolio distinguishes Yue Sai from other luxury brands, ensuring long-term growth and market relevance."}]
    outline = generate_outline(doc_title, section_summaries, max_slides=12)

In [14]:
def save_outline(outline: DocumentOutline, file_path: str):
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(outline.model_dump(), f, indent=2, ensure_ascii=False)


In [15]:
save_outline(outline, "outline.json")

In [16]:
import json

In [17]:
def load_outline(file_path: str) -> DocumentOutline:
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return DocumentOutline.model_validate(data)


In [18]:
outline = load_outline("outline.json")

In [19]:
with open("outline.json", "r", encoding="utf-8") as f:
    outline = json.load(f)

In [20]:
import json
from typing import List, Literal, Optional
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field, ValidationError

# ------------------------------
# 🎯 Pydantic Models
# ------------------------------
class SlideContent(BaseModel):
    bullets: List[str] = Field(..., min_items=1, max_items=7)
    speaker_notes: str
    
    image_caption: List[str] = Field(default_factory=list)
    flowchart: Literal["True", "False"]
    flowchart_description: Optional[str] = None

class SlideOutput(BaseModel):
    section: str
    sub_slide: int
    slide_content: SlideContent

# ------------------------------
# 🧠 Prompt Template
# ------------------------------
prompt = ChatPromptTemplate.from_template("""
You are an AI assistant generating PowerPoint slide content. Your goal is to make the presentation look professional and engaging. Avoid suggesting images with charts or graphs, unless the slide is about a chart or graph.

Slide Title: {slide_title}
Slide Subtitle: {slide_subtitle}
Key Points:
{key_ideas}

Your task:
1. Rewrite the key points into 3–5 concise bullet points suitable for a slide.
2. Write speaker notes that expand on the bullet points.
3. Determine if an image will help explain the slide.
    If yes, Set "image_caption" to a list containing a 1-sentence caption for the image (e.g., ["A skincare shelf in a Chinese department store"], ["Flowchart showing digital vs retail touchpoints"], ["Logo of Yue Sai brand"])
    Captions should be clear, descriptive, and useful for image search.
    If no, Set "image_caption" to an empty list.

4. Decide if a flowchart is helpful. If yes:
   - Set "flowchart" to "True"
   - Provide a 1-sentence flowchart_description
   If not:
   - Set "flowchart" to "False"
   - flowchart_description may be null

Return only a **valid JSON** with the following structure:

{{
  "bullets": ["..."],
  "speaker_notes": "...",
  "image_caption": ["..."],     
  "flowchart": "True" or "False",
  "flowchart_description": "..."
}}

Do NOT use markdown formatting or backticks. Return raw JSON only.
                                          
Return only a valid JSON object. Do not include extra text. Use double quotes. Ensure proper commas and no trailing commas.
""")

# ------------------------------
# ⚙️ Initialize LangChain
# ------------------------------
llm = ChatOpenAI(temperature=0.4, model="gpt-4o")
slide_chain = LLMChain(llm=llm, prompt=prompt)

# ------------------------------
# 📂 Load Outline
# ------------------------------
with open("outline.json", "r", encoding="utf-8") as f:
    outline = json.load(f)

# ------------------------------
# 🚀 Generate Slide Content
# ------------------------------
results = []

def safe_parse_response(response_str: str) -> Optional[SlideContent]:
    try:
        response_json = json.loads(response_str)
        return SlideContent.model_validate(response_json)
    except (json.JSONDecodeError, ValidationError) as e:
        print(f"\n❌ Validation failed:\n{e}\n→ Raw response:\n{response_str}\n{'-'*60}")
        return None

for section in outline["sections"]:
    for slide in section["slide_distribution"]:
        slide_title = section["heading"]
        slide_subtitle = f"Slide {slide['sub_slide']}"
        key_ideas = "\n".join(f"- {pt}" for pt in slide["key_points"])

        try:
            # Run the LLM
            response_str = slide_chain.run({
                "slide_title": slide_title,
                "slide_subtitle": slide_subtitle,
                "key_ideas": key_ideas
            })

            # Parse and validate
            slide_content = safe_parse_response(response_str)

            if slide_content:
                result = SlideOutput(
                    section=slide_title,
                    sub_slide=slide["sub_slide"],
                    slide_content=slide_content
                )
                results.append(result.model_dump())

        except Exception as e:
            print(f"❌ Error on slide '{slide_title} - {slide['sub_slide']}': {e}")

# ------------------------------
# 💾 Save Results
# ------------------------------
with open("generated_slide_content.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

print(f"✅ Saved {len(results)} slides to 'generated_slide_content.json'")


  slide_chain = LLMChain(llm=llm, prompt=prompt)
  response_str = slide_chain.run({


✅ Saved 11 slides to 'generated_slide_content.json'


In [4]:
import json
# Load the generated slide content
with open("generated_slide_content.json", "r", encoding="utf-8") as f:
    slide_content = json.load(f)

# Print the loaded content for verification
print(f"Loaded {len(slide_content)} slides from generated_slide_content.json")


Loaded 11 slides from generated_slide_content.json


In [23]:
import json
# Load the generated slide content
with open("updated_slide_content.json", "r", encoding="utf-8") as f:
    updated_slide_content = json.load(f)

# Print the loaded content for verification
print(f"Loaded {len(updated_slide_content)} slides from updated_slide_content.json")


Loaded 11 slides from updated_slide_content.json


In [11]:
import os
import requests
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import os
import requests
from tavily import TavilyClient
from typing import Optional

load_dotenv()

TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")  # Or replace with your string
tavily = TavilyClient(api_key=TAVILY_API_KEY)

# ------------------------------
# 📥 Image Search + Download
# ------------------------------
def search_and_download_image_from_web(
    query: str,
    output_dir: str = "images",
    filename: Optional[str] = None,
    index: int = 0
) -> Optional[str]:
    """
    Search Tavily for an image and download the first result.

    Args:
        query (str): The image search query (e.g., image caption).
        output_dir (str): Directory to save the downloaded image.
        filename (str, optional): Filename for the image (defaults to slugified query).
        index (int): Which image result to download (0 = top result).

    Returns:
        str: Path to the saved image, or None if no image found.
    """
    try:
        print(f"🔍 Searching for image: {query}")
        results = tavily.search(query, include_images=True)

        if not results or not results.get("images"):
            print("⚠️ No images found for this query.")
            return None

        image_url = results["images"][index]
        print(f"📸 Found image URL: {image_url}")

        # Prepare save path
        os.makedirs(output_dir, exist_ok=True)
        safe_filename = filename or f"{query.lower().replace(' ', '_')[:50]}.jpg"
        image_path = safe_filename

        # Download and save image
        img_data = requests.get(image_url).content
        with open(image_path, "wb") as f:
            f.write(img_data)

        print(f"✅ Image saved at: {image_path}")
        return image_path

    except Exception as e:
        print(f"❌ Failed to download image: {e}")
        return None

In [16]:
from typing import List, Dict
import requests
from PIL import Image
from io import BytesIO
from tools import get_best_image
from multimodal_rag import build_image_index
import copy

image_index = build_image_index("./images")
used_images = set()

# Create a deep copy of slide_content to avoid modifying the original
updated_slide_content = copy.deepcopy(slide_content)

for slide in updated_slide_content:
    captions = slide['slide_content']['image_caption']
    if not captions:
        continue
        
    for caption in captions:
        # Try to get image from RAG first
        image_path, confidence = get_best_image(caption, image_index)

        # if confidence is less than 0.30, get from web
        if confidence < 0.30:
            image_path = search_and_download_image_from_web(caption)
            
        if image_path:
            used_images.add(image_path)
            # Add image path to slide content
            if 'image_paths' not in slide['slide_content']:
                slide['slide_content']['image_paths'] = []
            slide['slide_content']['image_paths'].append(image_path)
            print(f"Caption: {caption}")
            print(f"Image path: {image_path}\n")
            print(f"Confidence: {confidence}")

# Save updated slide content to a new JSON file
with open("updated_slide_content.json", "w", encoding="utf-8") as f:
    json.dump(updated_slide_content, f, indent=2, ensure_ascii=False)


**************************************************
Building image index...
**************************************************


TypeError: string indices must be integers, not 'str'

In [None]:
from tools import get_all_image_dimensions
get_all_image_dimensions()

In [34]:
def update_slide_content_with_dimensions(slide_content):
    """
    Update slide content with image dimensions from image_metadata.json
    
    Args:
        slide_content (list): List of slide content dictionaries
        
    Returns:
        list: Updated slide content with image dimensions
    """
    updated_content = copy.deepcopy(slide_content)
    
    for slide in updated_content:
        if 'slide_content' in slide and 'image_paths' in slide['slide_content']:
            image_paths = slide['slide_content']['image_paths']
            dimensions = []
            
            for img_path in image_paths:
                img_info = get_image_dimensions(img_path)
                if img_info:
                    name, width, height = img_info
                    dimensions.append({
                        'path': img_path,
                        'width': width,
                        'height': height
                    })
            
            slide['slide_content']['image_dimensions'] = dimensions
    
    return updated_content

# Update the slide content with dimensions
updated_slide_content = update_slide_content_with_dimensions(updated_slide_content)

# Save updated content with dimensions
with open("updated_slide_content.json", "w", encoding="utf-8") as f:
    json.dump(updated_slide_content, f, indent=2, ensure_ascii=False)


milgya
milgya
milgya
milgya
milgya
milgya
milgya


In [40]:
from openai import OpenAI
import json
from tools import get_image_dimensions

def get_slide_layout(slide_content):
    client = OpenAI()
    
    # Check if slide has images
    has_images = bool(slide_content['slide_content'].get('image_paths', []))
    
    prompt = f"""
    You are a professional presentation designer. Design a layout for this slide content. ENSURE THERE IS NO OVERLAP OF TEXT AND IMAGES.
    
    Guidelines:
    - Create a balanced layout that emphasizes key points
    - Position text and images strategically
    - Use appropriate font sizes (12-24)
    - Consider visual hierarchy
    - Ensure readability
    - {'Maintain image aspect ratio' if has_images else 'Center text in the slide since there are no images'}
    - Standard PowerPoint slide dimensions are (10x5.625 inches)
    - Ensure the text does not overflow the slide.
    
    Slide Content:
    Section: {slide_content['section']}
    Sub-slide: {slide_content['sub_slide']}
    Bullets: {slide_content['slide_content']['bullets']}
    {f"Image Caption: {slide_content['slide_content'].get('image_caption', [])}" if has_images else ""}
    {f"Image Paths: {slide_content['slide_content'].get('image_paths', [])}" if has_images else ""}
    {f"Image Dimensions: {slide_content['slide_content'].get('image_dimensions', [])}" if has_images else ""}
    
    Return the layout in this JSON format:
    {{
        "slide_dimensions": {{
            "width": 10,
            "height": 5.625
        }},
        "title_box": {{
            "x": number,
            "y": number,
            "width": number,
            "height": number,
            "font_size": number,
            "padding": number
        }},
        "subtitle_box": {{
            "x": number,
            "y": number,
            "width": number,
            "height": number,
            "font_size": number,
            "padding": number
        }},
        "bulleted": boolean,
        "content_font_size": number,
        "text_box": {{
            "layout": "left/right/top/bottom",
            "x": number,
            "y": number,
            "width": number,
            "height": number,
            "padding": number
        }},
        "image_paths": ["string"],
        "image_boxes": [
            {{
                "layout": "left/right/top/bottom",
                "x": number,
                "y": number,
                "width": number,
                "height": number,
                "padding": number
            }}
        ]
    }}
    """
    
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3,
        max_tokens=1000,
    )
    
    try:
        layout = json.loads(response.choices[0].message.content.strip())
        # Add title and text from slide_content
        layout["title"] = slide_content['section']
        layout["subtitle"] = f"Sub-slide {slide_content['sub_slide']}" if slide_content['sub_slide'] > 1 else ""
        layout["text"] = slide_content['slide_content']['bullets']
        return layout
    except json.JSONDecodeError:
        print(f"Error parsing layout for slide {slide_content['section']} {slide_content['sub_slide']}")
        return None

# Process each slide and generate layouts
slide_layouts = []
for slide in updated_slide_content:
    layout = get_slide_layout(slide)
    if layout:
        slide_layouts.append(layout)

# Save layouts to JSON file
with open("slide_layouts.json", "w", encoding="utf-8") as f:
    json.dump(slide_layouts, f, indent=2, ensure_ascii=False)

print("Slide layouts have been generated and saved to slide_layouts.json")


Error parsing layout for slide Brief Assessment 1
Error parsing layout for slide Decision Problem 1
Error parsing layout for slide Criteria 1
Error parsing layout for slide Strategic Fit 1
Error parsing layout for slide Risk of Attrition 1
Error parsing layout for slide Execution Complexity 1
Error parsing layout for slide Moderate Risk 1
Error parsing layout for slide Attrition Risk 1
Error parsing layout for slide Execution 1
Error parsing layout for slide Very High 1
Error parsing layout for slide Recommendation 1
Slide layouts have been generated and saved to slide_layouts.json
