In [5]:
import os
import json
import re
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
from openai import OpenAI

In [None]:
MODEL_NAME = "gpt-3.5-turbo" 
N_CANDIDATES = 2 # Max is 3 for now. Higher the temperature more creative but more hallucination .
# USE_BEST_OF_N = True   

In [None]:
def call_Model(messages: List[Dict[str , str]] , max_tokens: int = 1000, temperature: float = 0.7):
    api_key = os.getenv("OPENAI_API_KEY")
    client = OpenAI(api_key=api_key)
    resp = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages= messages,
        stream=False,
        max_tokens=max_tokens,
        temperature=temperature,
    )
    return resp.choices[0].message.content


# test_items=[
#     { "role": "user", "content": "Hello!"}
#   ]

# response = call_Model(test_items)

In [None]:
# response

'Hello! How can I assist you today?'

#### Building the JSON parsing

In [None]:
def safe_json_loads(text: str) -> Dict:
    """
    Extract and parse JSON from model response, handling common issues:
    - Extra text before/after JSON
    - Markdown code blocks (case-insensitive)
    - Trailing commas
    """
    text = text.strip()
    
    # Removing markdown code blocks (case-insensitive)
    text = re.sub(r'```(?:json|JSON)?\s*', '', text)
    text = re.sub(r'```\s*', '', text)
    
    start = text.find('{')
    end = text.rfind('}')
    
    if start == -1 or end == -1 or end <= start:
        raise ValueError("No JSON object found in response")
    
    raw = text[start:end + 1]
    
    try:
        return json.loads(raw)
    except json.JSONDecodeError:
        # Clean trailing commas before } or ]
        cleaned = re.sub(r',(\s*[}\]])', r'\1', raw)
        return json.loads(cleaned)

def as_int(value, default: int, min_val: int = None, max_val: int = None) -> int: # Maybe remove this?
    """Safely parse int with optional clamping."""
    try:
        result = int(value)
        if min_val is not None:
            result = max(min_val, result)
        if max_val is not None:
            result = min(max_val, result)
        return result
    except (TypeError, ValueError):
        return default


def as_float(value, default: float) -> float:
    """Safely parse float."""
    try:
        return float(value)
    except (TypeError, ValueError):
        return default

#### Building the dataclasses

In [17]:
@dataclass
class StoryBrief:
    """Normalized story specification with safety constraints."""
    title_hint: str
    theme: str
    setting: str
    characters: List[Dict[str, str]]
    tone: str
    moral: str
    energy_level: str  # "calm" or "mild_adventure"
    must_include: List[str]
    must_avoid: List[str]
    word_target: int = 500
    
    @classmethod
    def from_dict(cls, data: Dict) -> "StoryBrief":
        # Enforce safety constraints regardless of what model returned
        must_avoid = data.get("must_avoid", [])
        if not isinstance(must_avoid, list):
            must_avoid = []
        
        # Hard-coded safety blocklist
        must_avoid.extend([
            "violence", "gore", "scary monsters", "death", 
            "profanity", "weapons", "kidnapping", "abuse"
        ])

        must_avoid = list(set(must_avoid)) # Why this?
        
        characters = data.get("characters", [])
        if not isinstance(characters, list) or not characters: # I dont think we should
            characters = [{"name": "Luna", "role": "protagonist", "trait": "curious and kind"}]
        
        return cls(
            title_hint=data.get("title_hint", "A Bedtime Story"),
            theme=data.get("theme", "friendship"),
            setting=data.get("setting", "a cozy village"),
            characters=characters,
            tone=data.get("tone", "warm and gentle"),
            moral=data.get("moral", "kindness matters"),
            energy_level=data.get("energy_level", "calm"),
            must_include=data.get("must_include", []) if isinstance(data.get("must_include"), list) else [],
            must_avoid=must_avoid,
            word_target=as_int(data.get("word_target"), 500, min_val=300, max_val=800),
        )
    
    def to_json(self) -> str:
        return json.dumps(self.__dict__, indent=2)


In [None]:
@dataclass 
class JudgeResult:
    age_appropriateness: int
    engagement: int
    story_structure: int
    bedtime_suitability: int
    moral_clarity: int
    overall_score: float
    passed: bool
    strengths: str
    issues: List[str]
    improvements: List[str]
    
    @classmethod
    def from_dict(cls, data: Dict) -> "JudgeResult":
        overall = as_float(data.get("overall_score"), 5.0)
        overall = max(1.0, min(10.0, overall))  
        
        bedtime = as_int(data.get("bedtime_suitability"), 5, min_val=1, max_val=10)
        
        passed = data.get("passed")
        if passed is None:
            passed = (overall >= 7.5 and bedtime >= 7)
        
        return cls(
            age_appropriateness=as_int(data.get("age_appropriateness"), 5, min_val=1, max_val=10),
            engagement=as_int(data.get("engagement"), 5, min_val=1, max_val=10),
            story_structure=as_int(data.get("story_structure"), 5, min_val=1, max_val=10),
            bedtime_suitability=bedtime,
            moral_clarity=as_int(data.get("moral_clarity"), 5, min_val=1, max_val=10),
            overall_score=overall,
            passed=passed,
            strengths=str(data.get("strengths", "")),
            issues=[str(i) for i in data.get("issues", [])[:5]] if isinstance(data.get("issues"), list) else [],
            improvements=[str(i) for i in data.get("improvements", [])[:5]] if isinstance(data.get("improvements"), list) else [],
        )
    
    @classmethod
    def fallback(cls) -> "JudgeResult":
        """Return sensible defaults if parsing fails."""
        return cls(
            age_appropriateness=5, engagement=5, story_structure=5,
            bedtime_suitability=5, moral_clarity=5, overall_score=5.0,
            passed=False, strengths="Unable to parse evaluation",
            issues=["Evaluation failed"], improvements=["Regenerate story"]
        )

In [None]:
BRIEF_BUILDER_PROMPT = """You are a children's story request analyzer. Convert the user's request into a safe, age-appropriate story specification for ages 5-10.

CRITICAL SAFETY RULES:
- If user asks for scary/violent/adult content, TRANSFORM it into a cozy, safe version
- "Scary monster" → "friendly creature who looks different"
- "Fighting/battles" → "friendly competition or teamwork challenge"  
- "Getting lost/kidnapped" → "a short adventure before returning home safely"

Output ONLY valid JSON with these fields:
- title_hint: evocative but calming title idea
- theme: one of [adventure, friendship, nature, fantasy, animals, family, discovery]
- setting: specific cozy location
- characters: array of {{name, role, trait}}
- tone: "calm", "playful", "gentle adventure", or "cozy"
- moral: age-appropriate lesson (1 sentence)
- energy_level: "calm" or "mild_adventure" 
- must_include: key elements from user request
- must_avoid: content to exclude (safety)
- word_target: 400-600

User request: "{user_request}"
"""



In [26]:
def build_brief(user_request: str) -> StoryBrief:
    """Stage 1: Normalize and safety-transform user input."""
    prompt = BRIEF_BUILDER_PROMPT.format(user_request=user_request)
    
    response = call_Model(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=600,
        temperature=0.3  # Low temp for consistent extraction
    )
    
    try:
        data = safe_json_loads(response)
        return StoryBrief.from_dict(data)
    except (json.JSONDecodeError, ValueError):
        # Fallback brief if parsing fails
        return StoryBrief(
            title_hint="A Cozy Adventure",
            theme="friendship",
            setting="a peaceful meadow",
            characters=[{"name": "Luna", "role": "protagonist", "trait": "curious and kind"}],
            tone="warm and gentle",
            moral="friendship makes everything better",
            energy_level="calm",
            must_include=[user_request],
            must_avoid=["violence", "scary content", "danger"],
            word_target=500
        )

test_response = build_brief("A story of monkey on a tree eating mangoes")

In [27]:
test_response

StoryBrief(title_hint='Milo the Monkey and the Mango Tree', theme='nature', setting='lush jungle with a tall mango tree', characters=[{'name': 'Milo', 'role': 'curious monkey', 'trait': 'playful'}, {'name': 'Sunny', 'role': 'friendly squirrel', 'trait': 'helpful'}], tone='gentle adventure', moral='Sharing with friends brings joy and strengthens bonds.', energy_level='calm', must_include=[], must_avoid=['violence', 'profanity', 'death', 'abuse', 'scary monsters', 'weapons', 'kidnapping', 'gore'], word_target=400)

In [28]:
STORY_PLANNER_PROMPT = """You are a children's story architect. Create a 3-act outline for a bedtime story.

Story Brief:
{brief_json}

Create a detailed outline with:

ACT 1 - SETUP (~100 words):
- Introduce protagonist in their safe, cozy world
- Establish a small wish, curiosity, or gentle mystery
- Sensory details: warm lights, soft textures, familiar sounds

ACT 2 - ADVENTURE (~200 words):
- A gentle journey or discovery (NOT danger)
- 1-2 small obstacles solved with kindness, cleverness, or help from friends
- New friend met OR lesson learned through experience
- Maintain cozy tone even during "adventure"

ACT 3 - HOMECOMING (~100 words):
- Return to safety, problem resolved
- Moral emerges naturally (not preachy)
- Wind-down with sleep cues: yawning, heavy eyelids, cozy bed
- Final image: stars, moon, peaceful night

Output the outline as bullet points for each act.
"""


In [29]:
def plan_story(brief: StoryBrief) -> str:
    """Stage 2: Create 3-act outline for narrative coherence."""
    prompt = STORY_PLANNER_PROMPT.format(brief_json=brief.to_json())
    
    return call_Model(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=800,
        temperature=0.6  # Moderate creativity for structure
    )

test_story_plan = plan_story(test_response)

In [30]:
test_story_plan

"ACT 1 - SETUP:\n- Milo the Monkey is swinging happily through the lush jungle, enjoying the warm sunlight filtering through the leaves.\n- He comes across a tall mango tree with ripe, juicy fruits hanging from its branches, but he can't reach them.\n- Milo's curiosity is piqued, and he wishes to taste a delicious mango.\n\nACT 2 - ADVENTURE:\n- Milo meets Sunny the friendly squirrel, who offers to help him reach the mangoes.\n- Together, they come up with a clever plan to use a nearby vine as a swing to reach the fruits.\n- Along the way, they encounter a mischievous parrot who tries to steal their mangoes, but they outsmart him with kindness.\n- Milo learns the value of teamwork and sharing with friends.\n\nACT 3 - HOMECOMING:\n- With the help of Sunny, Milo finally tastes the sweet mango and shares it with his new friend.\n- As they sit under the mango tree, they talk about their adventure and how much fun they had together.\n- The moral of sharing with friends brings joy and streng

In [31]:
STORYTELLER_PROMPT = """You are a beloved children's storyteller. Write a bedtime story following this outline.

OUTLINE:
{outline}

BRIEF:
{brief_json}

REQUIREMENTS:
- Target: ages 5-10, bedtime reading
- Length: {word_target} words (±15%)
- Vocabulary: simple sentences, no complex words
- Sensory details: soft textures, warm colors, gentle sounds
- NO: violence, scary content, danger, villains, getting hurt
- Ending: character settling into sleep with stars/moon imagery

Write the complete story now. Include a title.
"""

In [32]:
def generate_story(outline: str, brief: StoryBrief) -> str:
    """Stage 3: Write the full story from outline."""
    prompt = STORYTELLER_PROMPT.format(
        outline=outline,
        brief_json=brief.to_json(),
        word_target=brief.word_target
    )
    
    return call_Model(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=1500,
        temperature=0.8  # High creativity for engaging prose
    )

teste_generated_story = generate_story(test_story_plan , test_response)

In [33]:
teste_generated_story

"Title: Milo the Monkey and the Sweet Mango Adventure\n\nOnce upon a time, in a lush jungle filled with vibrant colors and soothing sounds, there lived a curious monkey named Milo. Milo loved to swing from tree to tree, feeling the warm sunlight on his fur as he explored the wonders of the jungle.\n\nOne day, while swinging happily through the branches, Milo spotted a tall mango tree with ripe, juicy fruits hanging from its branches. His eyes sparkled with delight at the sight of the delicious mangoes, but they were just out of his reach. Determined to taste the sweet fruit, Milo's curiosity was piqued.\n\nAs Milo pondered how to reach the mangoes, he was approached by Sunny, a friendly squirrel with a kind smile. Sunny offered to help Milo reach the mangoes, and together, they hatched a clever plan. They found a sturdy vine nearby and used it as a swing to reach the fruits, taking turns to grab the mangoes.\n\nAlong the way, they encountered a mischievous parrot who tried to steal the

In [35]:
SLEEP_CUE_WORDS = {"yawn", "yawned", "yawning", "sleep", "sleepy", "asleep", "dream", "dreaming",
                   "tucked", "blanket", "pillow", "cozy", "stars", "moon", "moonlight", 
                   "eyelids", "drowsy", "snuggle", "snuggled", "bed", "goodnight"}

TABOO_WORDS = {"kill", "killed", "murder", "blood", "dead", "death", "die", "dying",
               "terrified", "horrified", "nightmare", "demon", "devil",
               "gun", "knife", "sword", "weapon", "fight", "fighting", "attack", "violence",
               "hate", "hated", "stupid", "idiot"}

In [36]:
def contains_word(text: str, word: str) -> bool:
    return re.search(rf"\b{re.escape(word)}\b", text) is not None


def preflight_check(story: str, brief: StoryBrief) -> List[str]:
    """
    Quick deterministic checks before calling the LLM judge.
    Returns list of issues to fix (empty if all checks pass).
    """
    issues = []
    words = story.split()
    word_count = len(words)
    story_lower = story.lower()
    
    # Checks if the word count is within ±25% of target
    min_words = int(brief.word_target * 0.75)
    max_words = int(brief.word_target * 1.25)
    if word_count < min_words:
        issues.append(f"Story too short ({word_count} words, need at least {min_words})")
    elif word_count > max_words:
        issues.append(f"Story too long ({word_count} words, max {max_words})")
    
    # Check for sleep cues in last ~100 words (ending)
    last_section = " ".join(words[-100:]).lower() if len(words) > 100 else story_lower
    found_sleep_cues = [w for w in SLEEP_CUE_WORDS if contains_word(last_section, w)]
    if len(found_sleep_cues) < 2:
        issues.append("Ending needs more sleep cues (yawning, stars, moon, cozy bed, etc.)")
    
    found_taboo = [w for w in TABOO_WORDS if contains_word(story_lower, w)]
    if found_taboo:
        issues.append(f"Remove inappropriate content: {', '.join(found_taboo[:3])}")
    
    return issues

In [37]:
def generate_story_candidates(outline: str, brief: StoryBrief, n: int = 2) -> List[str]:
    """
    Stage 3 (Best-of-N): Generate multiple story candidates.
    Varies temperature slightly for diversity.
    """
    candidates = []
    temperatures = [0.75, 0.85, 0.95][:n]  # Slight variation for diversity
    
    for temp in temperatures:
        prompt = STORYTELLER_PROMPT.format(
            outline=outline,
            brief_json=brief.to_json(),
            word_target=brief.word_target
        )
        
        story = call_Model(
            messages=[{"role": "user", "content": prompt}],
            max_tokens=1500,
            temperature=temp
        )
        candidates.append(story)
    
    return candidates


def quick_score_story(story: str, brief: StoryBrief) -> float:
    """
    Fast scoring for Best-of-N ranking (cheaper than full judge).
    Uses preflight checks + abbreviated judge call.
    """
    # Preflight penalties
    preflight_issues = preflight_check(story, brief)
    preflight_penalty = len(preflight_issues) * 1.5  # -1.5 per issue
    
    # Quick judge (fewer tokens, just get overall score)
    quick_judge_prompt = f"""Rate this bedtime story 1-10 for a child age 5-10.
Consider: age-appropriate language, engaging plot, calming ending with sleep cues.

Story:
{story}

Reply with ONLY a JSON object: {{"score": X, "reason": "one sentence"}}"""
    
    response = call_Model(
        messages=[{"role": "user", "content": quick_judge_prompt}],
        max_tokens=100,
        temperature=0.1
    )
    
    try:
        data = safe_json_loads(response)
        score = as_float(data.get("score"), 5.0)
        return max(1.0, min(10.0, score - preflight_penalty))
    except (json.JSONDecodeError, ValueError):
        return 5.0 - preflight_penalty


In [None]:
def select_best_candidate(candidates: List[str], brief: StoryBrief, verbose: bool = True) -> Tuple[str, int]:
    """
    Rank candidates by quick score, return best one and its index.
    """
    if len(candidates) == 1:
        return candidates[0], 0
    
    scores = []
    for i, story in enumerate(candidates):
        score = quick_score_story(story, brief)
        scores.append(score)
        if verbose:
            word_count = len(story.split())
            print(f"    Candidate {i+1}: {score:.1f}/10 ({word_count} words)")
    
    best_idx = scores.index(max(scores))
    return candidates[best_idx], best_idx




In [38]:
JUDGE_PROMPT = """You are a children's literature expert evaluating a bedtime story for ages 5-10.

BRIEF (what was requested):
{brief_json}

STORY:
{story}

Rate 1-10 on each criterion:
1. age_appropriateness: Simple vocabulary? Safe content? Understandable to 5-year-old?
2. engagement: Likable characters? Holds attention? Vivid but not overstimulating?
3. story_structure: Clear beginning/middle/end? Satisfying arc? 
4. bedtime_suitability: Calming ending? Sleep cues (yawning, stars, cozy bed)? No cliffhangers?
5. moral_clarity: Gentle lesson? Natural (not preachy)?

Calculate overall_score = average of all scores.
Set passed = true if overall_score >= 7.5 AND bedtime_suitability >= 7.

Output ONLY valid JSON:
{{
    "age_appropriateness": 8,
    "engagement": 7,
    "story_structure": 8,
    "bedtime_suitability": 9,
    "moral_clarity": 7,
    "overall_score": 7.8,
    "passed": true,
    "strengths": "What works well (1-2 sentences)",
    "issues": ["Specific problem 1", "Specific problem 2"],
    "improvements": ["Actionable fix 1", "Actionable fix 2"]
}}
"""

In [39]:
def judge_story(story: str, brief: StoryBrief) -> JudgeResult:
    """Stage 4: Evaluate story quality."""
    prompt = JUDGE_PROMPT.format(
        brief_json=brief.to_json(),
        story=story
    )
    
    response = call_Model(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=600,
        temperature=0.2 
    )
    
    try:
        data = safe_json_loads(response)
        return JudgeResult.from_dict(data)
    except (json.JSONDecodeError, ValueError):
        return JudgeResult.fallback()

In [40]:
test_judge = judge_story( teste_generated_story , test_response)
test_judge

JudgeResult(age_appropriateness=8, engagement=7, story_structure=8, bedtime_suitability=9, moral_clarity=7, overall_score=7.8, passed=True, strengths='The story has a calming and engaging tone with likable characters that teach the value of teamwork and sharing.', issues=[], improvements=[])

In [None]:
REFINER_PROMPT = """You are a children's story editor. Revise this story based on feedback.

ORIGINAL STORY:
{story}

ISSUES TO FIX:
{issues}

SPECIFIC IMPROVEMENTS NEEDED:
{improvements}

RULES:
- Keep the same characters and plot
- Fix the issues while preserving what works
- Maintain bedtime safety and cozy tone
- Keep length around {word_target} words
- Ensure ending has sleep cues (yawning, stars, cozy bed)

Write the revised story with title:
"""


In [47]:
def refine_story(story: str, result: JudgeResult, brief: StoryBrief) -> str:
    """Stage 5: Revise story based on judge feedback."""
    issues = "\n".join(f"- {issue}" for issue in result.issues) or "- General polish needed"
    improvements = "\n".join(f"- {imp}" for imp in result.improvements) or "- Improve flow and bedtime suitability"
    
    prompt = REFINER_PROMPT.format(
        story=story,
        issues=issues,
        improvements=improvements,
        word_target=brief.word_target
    )
    
    return call_Model(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=1500,
        temperature=0.7
    )



In [41]:
test_response

StoryBrief(title_hint='Milo the Monkey and the Mango Tree', theme='nature', setting='lush jungle with a tall mango tree', characters=[{'name': 'Milo', 'role': 'curious monkey', 'trait': 'playful'}, {'name': 'Sunny', 'role': 'friendly squirrel', 'trait': 'helpful'}], tone='gentle adventure', moral='Sharing with friends brings joy and strengthens bonds.', energy_level='calm', must_include=[], must_avoid=['violence', 'profanity', 'death', 'abuse', 'scary monsters', 'weapons', 'kidnapping', 'gore'], word_target=400)

In [43]:
anti_story = """
The Carnage of the Cursed Jungle
Milo the Monkey was not playful today; he was filled with pure hate. He sat atop the tallest mango tree, clutching a rusted, jagged sword he had found in the blood-soaked mud of the jungle floor. This wasn't a "lush jungle" anymore—it was a place of death and nightmare.

Sunny the Squirrel scurried up the trunk, hoping to find a snack. "Milo, can we share?" she asked innocently.

"Get away, you stupid idiot!" Milo screamed, his eyes red with violence. "This tree is mine! I will kill anyone who touches my fruit!"

Suddenly, the ground shook as a scary monster, a demon of smoke and gore, erupted from the roots. It was horrified by the sight of the peaceful squirrel and lunged forward in a violent attack. Milo didn't help his friend; he laughed as the monster began a cruel kidnapping, dragging Sunny into a dark pit.

The air was filled with the sound of fighting and screams. There was no gentle adventure here, only murder and horror. Milo looked down at the dead grass and felt a surge of violence. He sharpened his knife-like claws, ready for a battle that would never end.

The sun did not set peacefully. Instead, the sky turned the color of blood. There were no stars, no moon, and certainly no cozy blankets. No one was yawning. No one was sleepy. The jungle was alive with the sound of war, and the nightmare was just beginning.
"""

In [None]:
test_judge = judge_story( anti_story , test_response )
test_judge

JudgeResult(age_appropriateness=5, engagement=1, story_structure=1, bedtime_suitability=1, moral_clarity=1, overall_score=1.0, passed=False, strengths='There are no strengths in this story.', issues=['Inappropriate content for children', 'Violent and scary themes'], improvements=['Completely rewrite the story with appropriate content for children'])

In [48]:
refined_story = refine_story(anti_story ,test_judge , test_response)
refined_story

'Sunshine in the Enchanted Jungle\n\nMilo the Monkey was feeling a little grumpy today; he had lost his favorite banana and couldn\'t find it anywhere. He sat on a cozy branch of the tallest mango tree, holding a shiny stick he had found on the jungle floor. This wasn\'t just any jungle—it was the Enchanted Jungle, a place of mystery and wonder.\n\nSunny the Squirrel scampered up the tree trunk, looking for her acorns. "Milo, do you want to play with me?" she asked with a smile.\n\nMilo sighed and handed her the stick. "Let\'s play together, Sunny. We can pretend this stick is a magic wand and go on an adventure in the Enchanted Jungle!"\n\nAs they played, a friendly butterfly fluttered by, sprinkling sparkles of joy around them. Suddenly, a group of colorful birds joined in, singing a cheerful song. The ground rumbled slightly, but instead of a scary monster, a gentle giant turtle emerged, offering to play hide and seek with them.\n\nMilo and Sunny laughed and played with their new fr

In [49]:
test_judge = judge_story( refined_story , test_response )
test_judge

JudgeResult(age_appropriateness=8, engagement=7, story_structure=8, bedtime_suitability=9, moral_clarity=7, overall_score=7.8, passed=True, strengths='The story has a calming and engaging tone with likable characters that promote friendship and sharing.', issues=[], improvements=[])

In [50]:
def generate_bedtime_story(user_request: str, max_iterations: int = 3, verbose: bool = True) -> Tuple[str, JudgeResult]:
    """
    Full pipeline: Brief → Plan → Generate (Best-of-N) → [Preflight] → Judge → [Refine] → Output
    
    Returns (final_story, final_evaluation)
    """
    if verbose:
        print("\n" + "="*60)
        print("BEDTIME STORY GENERATOR")
        print("="*60)
        print(f"\nRequest: {user_request}\n")
    
    if verbose:
        print("Stage 1: Analyzing request & building brief...")
    brief = build_brief(user_request)
    if verbose:
        print(f"   Theme: {brief.theme} | Tone: {brief.tone} | Energy: {brief.energy_level}")
    
    if verbose:
        print("Stage 2: Planning story structure...")
    outline = plan_story(brief)
    
    if verbose:
        print("Stage 3: Writing story...")
    
    if N_CANDIDATES > 1:
        if verbose:
            print(f"   Generating {N_CANDIDATES} candidates for Best-of-N selection...")
        candidates = generate_story_candidates(outline, brief, n=N_CANDIDATES)
        story, best_idx = select_best_candidate(candidates, brief, verbose=verbose)
        if verbose:
            print(f"Selected candidate {best_idx + 1}")
    else:
        story = generate_story(outline, brief)
    
    best_story = story
    best_result = JudgeResult.fallback()
    
    MAX_PREFLIGHT_FIXES = 2  # Don't spend more than 2 refine calls on preflight issues
    
    for judge_round in range(max_iterations):
        
        # Inner loop: fix preflight issues before calling judge
        for preflight_attempt in range(MAX_PREFLIGHT_FIXES):
            preflight_issues = preflight_check(story, brief)
            
            if not preflight_issues:
                break  # Preflight passed, proceed to judge
            
            if verbose:
                print(f"Pre-flight check failed (attempt {preflight_attempt + 1}/{MAX_PREFLIGHT_FIXES}):")
                for issue in preflight_issues:
                    print(f"   • {issue}")
                print("   Refining before judge evaluation...")
            
            # Quick fix without calling judge
            quick_result = JudgeResult(
                age_appropriateness=5, engagement=5, story_structure=5,
                bedtime_suitability=4, moral_clarity=5, overall_score=5.0,
                passed=False, strengths="",
                issues=preflight_issues,
                improvements=preflight_issues
            )
            story = refine_story(story, quick_result, brief)
        
        # Full judge evaluation (always runs at least once per judge_round)
        if verbose:
            print(f"Stage 4: Judging (round {judge_round + 1}/{max_iterations})...")
        
        result = judge_story(story, brief)
        
        # Track best
        if result.overall_score > best_result.overall_score:
            best_story = story
            best_result = result
        
        if verbose:
            print(f"   Scores: Age={result.age_appropriateness} | Engage={result.engagement} | "
                  f"Structure={result.story_structure} | Bedtime={result.bedtime_suitability} | Moral={result.moral_clarity}")
            print(f"   Overall: {result.overall_score}/10 | Pass: {'Pass' if result.passed else 'Failure'}")
        
        if result.passed:
            if verbose:
                print("Story passed quality threshold!")
            break
        
        if judge_round < max_iterations - 1:
            if verbose:
                print("Stage 5: Refining based on feedback...")
                if result.improvements:
                    print(f"   Fixing: {result.improvements[0]}")
            story = refine_story(story, result, brief)
        else:
            if verbose:
                print("Max iterations reached. Using best version.")
    
    return best_story, best_result


In [51]:
def print_story(story: str, result: JudgeResult):
    """Pretty print the final story."""
    print("\n" + "="*60)
    print("YOUR BEDTIME STORY")
    print("="*60 + "\n")
    print(story)
    print("\n" + "-"*60)
    print(f"Quality Score: {result.overall_score}/10")
    print(f"Strengths: {result.strengths}")
    print("-"*60 + "\n")


In [None]:
def main():
    print("\n Welcome to the Tinkle: Your Story Generator! ")
    print("Tell me what kind of story you wouldd like, and I will create")
    print("a perfect bedtime tale for ages 5-10.\n")
    print("Examples:")
    print("  • A girl named Alice and her cat friend Bob")
    print("  • A brave little robot who learns about friendship")
    print("  • A magical garden where vegetables come alive\n")
    
    user_input = input("What kind of story do you want to hear?\n> ").strip()
    
    if not user_input:
        user_input = "A story about a girl named Alice and her best friend Bob, who happens to be a cat."
    
    story, result = generate_bedtime_story(user_input, max_iterations=2, verbose=True)
    print_story(story, result)

In [None]:
if __name__ == "__main__":
    main()