In [None]:
from IPython.display import display
from zipfile import ZipFile
from ipywidgets import (Button, Output, Layout, Textarea, FileUpload, Label, 
                        ToggleButtons, HBox, VBox, HTML, Text, AppLayout, Box, 
                        ToggleButton, Audio as WAudio, GridBox, Dropdown)
from PyPDF2 import PdfReader
from io import BytesIO, StringIO
from datetime import datetime, UTC
import string, re, pandas as pd, csv, json, wave, tiktoken
from os import getenv
from base64 import b64encode
from pathlib import Path
from time import sleep
from random import random
from pandas import DataFrame, concat
from dotenv import load_dotenv
from openai import OpenAI

In [None]:
load_dotenv("api.env")

api_key = getenv("OPENAI_API_KEY")
if not api_key:
    raise RuntimeError("OPENAI_API_KEY not found in api.env!")

client = OpenAI(api_key=api_key, timeout=60.0) 
REQUEST_TIMEOUT = 90
# pricing inputs
TTS_PRICE_PER_1K_TOKENS = float(getenv("TTS_PRICE_PER_1K_TOKENS", "0") or 0) 
SECS_PER_TOKEN_EST = float(getenv("TTS_SECS_PER_TOKEN_EST", "0.22"))  # ~0.22s/token baseline
HUMAN_VO_RATE_PER_FINISHED_MIN = float(getenv("HUMAN_VO_RATE_PER_FINISHED_MIN", "0") or 0)  


In [None]:
global_styles = HTML("""
<style>
/* Global reset and background setup */
body {
  margin: 0 !important;
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  background-size: 400% 400%;
  animation: gradientShift 15s ease infinite;
}

@keyframes gradientShift {
  0% { background-position: 0% 50%; }
  50% { background-position: 100% 50%; }
  100% { background-position: 0% 50%; }
}                 

/* Ensure Jupyter elements are transparent */
.jp-Notebook, .jp-NotebookPanel, .jp-Cell, .jp-OutputArea, .jp-OutputArea-child, 
.jp-OutputArea-output, .lm-Widget, .p-Widget, .widget-area, .output_wrapper,
.output, .output_area, .prompt, .container {
  background: transparent !important;
  overflow: visible !important;
}

/* Hide Jupyter UI elements */
.jp-Toolbar, .jp-Cell-inputWrapper, .jp-InputArea, .jp-InputPrompt,
.jp-OutputPrompt, .prompt, #header, #site {
  display: none !important;
}

/* Widget container fix */
.widget-container {
  background: transparent !important;
  width: 100% !important;
  overflow: visible !important;
}
                                          
</style>
""")
display(global_styles)

In [None]:
def _add_class_safe(w, cls):
    if hasattr(w, "add_class"):
        try:
            w.add_class(cls)
            return
        except Exception:
            pass 

    classes = list(getattr(w, "_dom_classes", ()))
    if cls not in classes:
        classes.append(cls)
    w._dom_classes = tuple(classes)

In [None]:
upload = FileUpload(accept = ".pdf", multiple = False)
upload.description = "Upload PDF"  
upload.layout.display = "none"   # start hidden

text_area = Textarea(
    value = "",
    placeholder =       
    "Type or paste your script here:\n"
    "• Each non-empty line becomes one audio clip (first 3 lines preview).\n"
    "• To auto-detect speakers, start lines with either:\n"
    "    [Alice] Hello there    —or—    Alice: Hello there\n"
    "• Tips: keep 1–3 sentences per line; long text auto-chunks.\n"
    "• Set a default voice below; per-speaker voices are in the Speakers tab.\n",
    description = "",
    layout = Layout(width = "100%", height = "200px")
)

choice = ToggleButtons(
    options = [("Use text box", "type"), ("Use PDF",  "upload")], 
    button_style = "primary",    
    tooltips=["Generate audio from written text in text box", "Generate audio from uploaded PDF"],
    value = "type"
)

generate_button = Button(
    description = "Generate Audio", 
    button_style = "primary"
)

output_box = Output() 

OPENAI_TTS_VOICES = [
    ("alloy",   "Alloy"),
    ("ash",     "Ash"),
    ("ballad",  "Ballad"),
    ("coral",   "Coral"),
    ("echo",    "Echo"),
    ("fable",   "Fable"),
    ("nova",    "Nova"),
    ("onyx",    "Onyx"),
    ("sage",    "Sage"),
    ("shimmer", "Shimmer"),
]

VOICE_DESCRIPTIONS = {
    "alloy":"Neutral, warm; general narration",
    "ash":"Bright, light; upbeat explainers",
    "ballad":"Narrative, measured; long reads",
    "coral":"Friendly, clear; training content",
    "echo":"Crisp, precise; technical text",
    "fable":"Storyteller; rich prosody",
    "nova":"Energetic, modern; promos",
    "onyx":"Deep, authoritative; briefings",
    "sage":"Calm, balanced; reports",
    "shimmer":"Expressive, dynamic; emphasis"
}

DEMO_SEED = [
    ("NASA", "National Aeronautics Space Administration"),
    ("AI", "Artificial Intelligence"),
    ("JWST", "James Webb Space Telescope"),
    ("SLS", "Space Launch System")
]

demo_mode = False  # toggled on by the demo button

OPENAI_TTS_VOICE  = getenv("OPENAI_TTS_VOICE", "alloy")
OPENAI_TTS_MODEL  = getenv("OPENAI_TTS_MODEL", "gpt-4o-mini-tts")
SAMPLES_DIR = Path("samples"); 
SAMPLES_DIR.mkdir(exist_ok=True)
CLIPS_DIR   = Path("clips");   
CLIPS_DIR.mkdir(exist_ok=True)
# Map id -> display label (e.g., "alloy" -> "Alloy")
VOICE_NAME = dict(OPENAI_TTS_VOICES)

# Cache folder for previews
VOICE_PREVIEW_DIR = SAMPLES_DIR / "voice_previews"
VOICE_PREVIEW_DIR.mkdir(parents=True, exist_ok=True)

def get_or_build_voice_preview(vid: str) -> str:
    path = VOICE_PREVIEW_DIR / f"{vid}.wav"
    if not path.exists():
        line = f"This is the {VOICE_NAME.get(vid, vid)} voice for the NASA AI Lab Text-to-Speech Studio."
        save_clip(line, str(path), engine="openai", voice=vid, model=OPENAI_TTS_MODEL)  # uses format="wav" now
    return str(path)

def _is_wav(path: str) -> bool:
    try:
        with open(path, "rb") as f:
            return f.read(4) == b"RIFF"
    except Exception:
        return False
    
voice_label = HTML("<b>Voice:</b>")

voice_toggle = ToggleButtons(
    options=[(label, vid) for vid, label in OPENAI_TTS_VOICES],
    value="alloy",
    description="",  
    layout=Layout(width="100%")
)

voice_box = VBox([voice_label, voice_toggle])
voice_toggle.layout = Layout(width="100%")
voice_toggle.add_class("segmented")  # picks up  segmented styling
voice_toggle.tooltips = [VOICE_DESCRIPTIONS.get(vid, vid) for vid, _ in OPENAI_TTS_VOICES]

SPEAKER_MODES = [
    ("Single voice", "single"),
    ("NAME: prefix", "colon"),
    ("[Name] bracketed", "bracket"),
]
speaker_mode = ToggleButtons(options=SPEAKER_MODES, value="single", description="")
speaker_mode.add_class("segmented")

def save_clip(text, out_path, engine="openai", voice=None, model=None, tries=4):
    voice = voice or OPENAI_TTS_VOICE
    model = model or OPENAI_TTS_MODEL
    out_path = str(Path(out_path).with_suffix(".wav"))

    for i in range(tries):
        try:
            with client.audio.speech.with_streaming_response.create(
                model=model,
                voice=voice,
                input=text,
                response_format="wav",
                timeout=REQUEST_TIMEOUT
            ) as resp:
                resp.stream_to_file(out_path)

            # sanity check
            if Path(out_path).exists() and Path(out_path).stat().st_size > 64 and _is_wav(out_path):
                return out_path
            raise RuntimeError("empty or non-WAV audio returned")

        except Exception as e:
            # cleanup corrupt partial
            try:
                Path(out_path).unlink(missing_ok=True)
            except Exception:
                pass
            if i < tries - 1:
                sleep((2**i) * 0.5 + random()*0.5)
            else:
                with output_box:
                    print(f"[TTS ERROR] {e}")
                raise


def now_utc_iso():
    return datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")

def get_current_settings():
    return {"model": OPENAI_TTS_MODEL, "voice": voice_toggle.value}

ENC = tiktoken.get_encoding("cl100k_base")

def estimate_tokens(text: str) -> int:
    return len(ENC.encode(text or ""))

def _zip_as_data_url(paths, extras):
    buf = BytesIO()
    with ZipFile(buf, "w") as z:
        for p in map(Path, paths):
            z.write(p, arcname=p.name)
        if extras:
            for name, data in extras.items():
                if isinstance(data, str):
                    data = data.encode("utf-8")
                z.writestr(name, data)
    buf.seek(0)
    b64 = b64encode(buf.read()).decode("ascii")
    return f"data:application/zip;base64,{b64}"

original = Text(placeholder = "e.g., NASA", description = "Original:", layout = Layout(width = "100%"))
phonetic = Text(placeholder = "e.g., Nass-uh", description = "Phonetic:", layout = Layout(width = "100%"))
save_button = Button(description = "Save to library", button_style = "success")
pronunciation_status = Output()
pronunciation_table = Output()
regeneration_prompt = Output()


In [None]:
def _reset_studio_inputs():
    text_area.value = ""                 # restores the placeholder
    output_box.clear_output()            # clears sample players/status
    choice.value = "type"
    speaker_mode.value = "single"

def load_pronunciation(pronunciation_path):
    path = Path(pronunciation_path)
    
    #if exists, keep only orig and replace
    if path.exists():
        df = pd.read_csv(path)
        if {"original", "replacement"}.issubset(df.columns):
            df = df[["original", "replacement"]].dropna().astype(str) # drop rows w/ missing values, make string
            return df
        else:
            return DataFrame(columns=["original", "replacement"])
    else:
        return DataFrame(columns=["original", "replacement"])
    
def get_active_pronunciations(pronunciation_path="pronunciations.csv"):
    df = load_pronunciation(pronunciation_path)
    if demo_mode:
        seed_df = DataFrame(DEMO_SEED, columns=["original","replacement"])
        df = concat([df, seed_df], ignore_index=True).drop_duplicates(
            subset=["original"], keep="last"
        )
    return df


In [None]:
def save_pronunciation(pronunciation_df, pronunciation_path):
    dropped = pronunciation_df.drop_duplicates(subset = ["original"], keep = "last")
    #returns the cleaned DataFrame that was saved
    dropped.to_csv(pronunciation_path, index = False)
    return dropped

def add_pronunciation(original, replacement, pronunciation_path):
    #normalize inputs
    original = (original or "").strip()
    replacement = (replacement or "").strip()

    if not original or not replacement:
        return False, "Both fields are required."

    df = load_pronunciation(pronunciation_path)

    #case-insensitive matching
    matches = df["original"].str.casefold() == original.casefold()

    if matches.any():
        df.loc[matches, "replacement"] = replacement
    else:
        new = DataFrame([{"original": original, "replacement": replacement}])
        df = concat([df, new], ignore_index = True)

    #cleaned dataframe
    df = save_pronunciation(df, pronunciation_path)

    message = f"Saved: {original} → {replacement}"
    return True, message

def render_pronunciation(pronunciation_df):
    count = len(pronunciation_df)
    preview = pronunciation_df.rename(columns={"original": "Original", "replacement": "Replacement"}).head(10)
    table = preview.to_html(index=False, escape=True, table_id="pronunciation-table")
    return HTML(
        f"<b>Pronunciations in library:</b> {count}"
        "<br><small>(showing up to 10)</small><br><br>"
        f"{table}"
    )

def apply_pronunciation(text, pronunciation_df):
    if not text or pronunciation_df.empty:
        return text
    
    mapping = {}
    for original, replacement in zip(pronunciation_df["original"], pronunciation_df["replacement"]):
        mapping[original.lower()] = replacement #store keys as lowercase
    
    punctuation = set(string.punctuation)

    #tokenize text on whitespace
    result = []
    for raw in text.split():
        #peel leading punctuation
        start = 0
        while start < len(raw) and raw[start] in punctuation:
            start += 1
        
        #peel trailing punctuation
        end = len(raw)
        while end > start and raw[end - 1] in punctuation:
            end -= 1
        
        leading = raw[:start]
        word = raw[start:end]
        trailing = raw[end:]

        #replace if found in mapping
        if word:
            word_lower = word.lower()

            if word_lower in mapping:
                word = mapping[word_lower]
            
            elif word_lower.endswith("s") and word_lower[:-1] in mapping:
                word = mapping[word_lower[:-1]] + "s"
            
            elif word_lower.endswith("es") and word_lower[:-2] in mapping:
                word = mapping[word_lower[:-2]] + "es"

        result.append(f"{leading}{word}{trailing}")
    
    return " ".join(result)

def refresh_pronunciation(pronunciation_path = "pronunciations.csv"):
    #refresh the sidebar table by reloading/re-rendering the store
    pronunciation_table.clear_output()
    df = get_active_pronunciations(pronunciation_path)
    widget = render_pronunciation(df)
    with pronunciation_table:
        display(widget)


In [None]:
SPEAKER_FALLBACK = "Narrator"

def _parse_line_with_mode(line, mode, custom_pat):
    s = line.strip()
    if not s:
        return None, "", False

    if mode == "single":
        return SPEAKER_FALLBACK, s, False

    if mode == "colon":
        # Alice: Hello there
        m = re.match(r"^([A-Za-z][A-Za-z0-9 _'\-]{0,50})\s*:\s*(.*)$", s)
        if m:
            name = m.group(1).strip()
            body = m.group(2).strip()
            return name or SPEAKER_FALLBACK, body, True
        return None, s, False

    if mode == "bracket":
        # [Alice] Hello there
        m = re.match(r"^\s*\[([^\]]+)\]\s*(.*)$", s)
        if m:
            name = (m.group(1) or "").strip()
            body = (m.group(2) or "").strip()
            return name or SPEAKER_FALLBACK, body, True
        return None, s, False

    if mode == "regex":
        try:
            rx = re.compile(custom_pat or "")
        except Exception:
            # Bad pattern -> treat as single speaker
            return SPEAKER_FALLBACK, s, False
        m = rx.match(s)
        if m:
            name = (m.group(1) or "").strip()
            if m.lastindex and m.lastindex >= 2 and m.group(2) is not None:
                # Prefer group 2 if provided as the content
                body = (m.group(2) or "").strip()
            else:
                # Otherwise: content is the rest of the line after the name match
                body = s[m.end(1):].lstrip(": -\t").strip()
            return name or SPEAKER_FALLBACK, body, True
        return None, s, False

    return SPEAKER_FALLBACK, s, False


def build_script_items(lines, mode, custom_pat):
    items = []
    last_speaker = None
    for ln in lines:
        spk, body, matched = _parse_line_with_mode(ln, mode, custom_pat)
        if not body:
            continue
        if matched:
            last_speaker = spk
            items.append({"speaker": spk, "text": body})
        else:
            items.append({"speaker": last_speaker or SPEAKER_FALLBACK, "text": body})
    return items


In [None]:
refresh_pronunciation()

pronunciation_sidebar = VBox([
    HTML("<h3>Pronunciation Library</h3>"),
    original,
    phonetic,
    save_button,
    pronunciation_status,
    HTML("<hr>"),
    pronunciation_table,
    HTML("<hr>"),
    regeneration_prompt
])

left_ui = VBox([
    choice,
    upload,
    text_area,
    voice_box,
    generate_button,
    output_box
])

left_ui.layout = Layout(width="100%", gap="14px")

_add_class_safe(left_ui, "panel")
_add_class_safe(pronunciation_sidebar, "panel")
_add_class_safe(choice, "segmented")
_add_class_safe(output_box, "players")
_add_class_safe(generate_button, "btn")
_add_class_safe(generate_button, "btn-primary")
_add_class_safe(save_button, "btn") 
_add_class_safe(save_button, "btn-success")

original.layout = Layout(width="100%")
phonetic.layout = Layout(width="100%")
upload.layout = Layout(width="100%", display="none")  # keep it hidden on load
text_area.layout = Layout(width = "100%", height = "200px", min_height = "150px")
generate_button.layout = Layout(width = "auto")


In [None]:
def _toggle_inputs():
    if choice.value == "type":
        # Text mode: show textarea, hide upload
        text_area.layout.display = ""          
        upload.layout.display = "none"         
    else:
        # PDF mode: show upload, hide textarea
        upload.layout.display = ""             
        text_area.layout.display = "none"      
        
def _on_choice_change(change):
    _toggle_inputs()

choice.observe(_on_choice_change, names="value")
_toggle_inputs() 


In [None]:
footer = HTML("<div class='app-footer'>© NASA AI Lab — generated locally in browser via data URLs</div>")
title = HTML("<div class='app-header'><b>Text-to-Speech Studio</b></div>")
collapse_btn = ToggleButton(
    value=False,                     # False = library shown, True = collapsed
    description="Hide Library",
    tooltip="Show/Hide Pronunciation Library",
    icon="chevron-right",
    layout=Layout(width="140px", height="32px")
)

home_btn = Button(
    description="Home",
    icon="home",
    tooltip="Return to start",
    layout=Layout(width="96px", height="32px")
)

voices_btn = Button(
    description="Voices",
    icon="music",
    tooltip="Explore voices",
    layout=Layout(width="110px", height="32px")
)

studio_btn = Button(
    description="Studio",
    icon="sliders",
    tooltip="Go to Studio",
    layout=Layout(width="110px", height="32px")
)

speakers_btn = Button(
    description="Speakers",
    icon="users",
    tooltip="Detect speakers & assign voices",
    layout=Layout(width="120px", height="32px")
)
_add_class_safe(speakers_btn, "btn"); _add_class_safe(speakers_btn, "btn-ghost")

_add_class_safe(studio_btn, "btn"); _add_class_safe(studio_btn, "btn-ghost")
_add_class_safe(voices_btn, "btn"); _add_class_safe(voices_btn, "btn-ghost")

header_left = HBox([home_btn, voices_btn, studio_btn, speakers_btn, title],
                   layout=Layout(align_items="center", gap="8px"))
header_bar  = HBox([header_left, collapse_btn],
                   layout=Layout(justify_content="space-between", align_items="center"))
header = header_bar

_add_class_safe(home_btn, "btn"); _add_class_safe(home_btn, "btn-ghost")
_add_class_safe(collapse_btn, "btn")
_add_class_safe(collapse_btn, "btn-primary")

def make_voice_card(vid: str):
    name = VOICE_NAME.get(vid, vid)
    desc = VOICE_DESCRIPTIONS.get(vid, "")
    title = HTML(f"<b style='font-size:14px'>{name}</b><br><small style='color:#64748b'>{desc}</small>")
    out = Output()

    preview = Button(description="Listen", button_style="primary")
    _add_class_safe(preview, "btn"); _add_class_safe(preview, "btn-primary")
    usebtn  = Button(description="Use", button_style="success")
    _add_class_safe(usebtn, "btn"); _add_class_safe(usebtn, "btn-success")

    def _on_preview(_=None):
        with out:
            out.clear_output()
            wav = get_or_build_voice_preview(vid)
            a = WAudio.from_file(wav); a.autoplay = False; a.loop = False
            display(a)

    def _on_use(_=None):
        voice_toggle.value = vid
        _go_studio()  # navigate to Studio

    preview.on_click(_on_preview)
    usebtn.on_click(_on_use)

    card = VBox([title, HBox([preview, usebtn], layout=Layout(gap='8px')), out],
                layout=Layout(padding='12px', border='1px solid #e5e7eb',
                              border_radius='12px', width='100%'))
    card.add_class("panel")
    return card

def _set_tab_active(active_btn):
    for b in (home_btn, voices_btn, studio_btn, speakers_btn):
        cls = [c for c in getattr(b, "_dom_classes", ()) if c not in ("btn-primary","btn-ghost")]
        cls += ["btn","btn-ghost"]
        b._dom_classes = tuple(cls)
    cls = [c for c in getattr(active_btn, "_dom_classes", ()) if c != "btn-ghost"]
    if "btn-primary" not in cls: cls.append("btn-primary")
    active_btn._dom_classes = tuple(cls)

voice_cards = [make_voice_card(v) for v, _ in OPENAI_TTS_VOICES]

voices_grid = GridBox(
    children=voice_cards,
    layout=Layout(grid_template_columns="repeat(3, minmax(240px, 1fr))",
                  grid_gap="12px", width="100%")
)

voices_grid.layout.grid_template_columns = "repeat(auto-fit, minmax(220px, 1fr))"
ab_label = HTML("<b>Quick A/B compare</b>")
ab_a = Dropdown(options=[(VOICE_NAME[v], v) for v, _ in OPENAI_TTS_VOICES], value="alloy")
ab_b = Dropdown(options=[(VOICE_NAME[v], v) for v, _ in OPENAI_TTS_VOICES], value="shimmer")
ab_a.layout = Layout(width="240px", height="36px")
ab_b.layout = Layout(width="240px", height="36px")
ab_play = Button(description="Render A & B", button_style="primary")
ab_out = Output()
_add_class_safe(ab_play, "btn"); _add_class_safe(ab_play, "btn-primary")

def _ab_render(_=None):
    ab_out.clear_output()
    a_wav = get_or_build_voice_preview(ab_a.value)
    b_wav = get_or_build_voice_preview(ab_b.value)
    with ab_out:
        display(HTML(f"<small>A: <b>{VOICE_NAME[ab_a.value]}</b></small>"))
        a_player = WAudio.from_file(a_wav); a_player.autoplay = False; a_player.loop = False
        display(a_player)
        display(HTML(f"<small>B: <b>{VOICE_NAME[ab_b.value]}</b></small>"))
        b_player = WAudio.from_file(b_wav); b_player.autoplay = False; b_player.loop = False
        display(b_player)

ab_play.on_click(_ab_render)

left_shell_studio  = VBox([left_ui]);  _add_class_safe(left_shell_studio,  "bubble-shield"); _add_class_safe(left_shell_studio,  "left-pane")
right_shell_studio = VBox([pronunciation_sidebar]); _add_class_safe(right_shell_studio, "bubble-shield"); _add_class_safe(right_shell_studio, "right-pane")

app_studio = AppLayout(
    header=header, left_sidebar=left_shell_studio, center=None, right_sidebar=right_shell_studio, footer=footer,
    pane_widths=("2fr","0fr","1fr"), pane_heights=("56px","1fr","28px"),
    layout=Layout(width="100%", height="auto")
)
studio_shell = VBox([app_studio]); _add_class_safe(studio_shell, "app-bubble")

# --- VOICES PAGE (full-width center; no right sidebar) ---
voices_panel = VBox([  
    HTML("<h3 style='margin:0 0 6px 0'>Explore voices</h3>"
         "<small>Select a voice to preview, then set it as your default.</small>"),
    voices_grid, HTML("<hr>"),
    VBox([ab_label, HBox([ab_a, ab_b, ab_play], layout=Layout(gap='8px')), ab_out],
         layout=Layout(gap='8px')),
], layout=Layout(width="100%", gap="12px"))
_add_class_safe(voices_panel, "panel")

center_voices = VBox([voices_panel]); _add_class_safe(center_voices, "bubble-shield")

app_voices = AppLayout(
    header=header, left_sidebar=None, center=center_voices, right_sidebar=None, footer=footer,
    pane_widths=("0fr","1fr","0fr"), pane_heights=("56px","1fr","28px"),
    layout=Layout(width="100%", height="auto")
)
voices_shell = VBox([app_voices]); _add_class_safe(voices_shell, "app-bubble")

eyebrow = HTML("<div class='intro-eyebrow'>NASA AI Lab</div>")
intro_title = HTML("<div class='intro-title'>Text-to-Speech Studio</div>")
intro_sub = HTML("<div class='intro-sub'>Generate clear speech from text or PDFs, fine-tune pronunciations, and download selected clips.</div>")

intro_grid = HTML("""
  <div class='intro-grid'>
    <div class='step'><div class='num'>1</div><div><b>Choose input.</b> Toggle between <i>Text</i> and <i>PDF</i>.</div></div>
    <div class='step'><div class='num'>2</div><div><b>Preview 3 samples.</b> We read the first three lines with your pronunciations.</div></div>
    <div class='step'><div class='num'>3</div><div><b>Approve to generate all.</b> Download a ZIP of every line.</div></div>
  </div>
""")

enter_btn = Button(description="Enter Studio", icon="play", button_style="primary")
enter_btn.add_class("btn"); enter_btn.add_class("btn-primary")

demo_btn = Button(description="Load Demo", icon="magic", button_style="success")
demo_btn.add_class("btn"); demo_btn.add_class("btn-success")

intro_actions = HBox([enter_btn, demo_btn]); intro_actions.add_class("intro-actions")

intro_card = VBox([eyebrow, intro_title, intro_sub, intro_grid, intro_actions])
intro_card.add_class("intro-hero")

intro_shell = Box([intro_card], layout=Layout(width="100%", height="100%"))
intro_shell.add_class("intro-shell")

enter_btn.layout = Layout(height="44px", padding="0 18px", width="auto", flex="0 0 auto")
demo_btn.layout  = Layout(height="44px", padding="0 18px", width="auto", flex="0 0 auto")
intro_actions.layout = Layout(justify_content="center", flex_flow="row wrap")

def _go_home(b=None):
    global demo_mode
    demo_mode = False
    _reset_studio_inputs()
    refresh_pronunciation("pronunciations.csv")
    stage.children = [intro_shell]
    collapse_btn.layout.display = "none"
    _set_tab_active(home_btn)

def _go_studio(b=None):
    title.value = "<div class='app-header'><b>Text-to-Speech Studio</b></div>"
    stage.children = [studio_shell]
    collapse_btn.layout.display = ""
    refresh_pronunciation("pronunciations.csv")   
    _set_tab_active(studio_btn)

def _go_voices(b=None):
    global demo_mode
    demo_mode = False
    refresh_pronunciation("pronunciations.csv")
    stage.children = [voices_shell]
    collapse_btn.layout.display = "none"
    _set_tab_active(voices_btn)      

home_btn.on_click(_go_home)
studio_btn.on_click(_go_studio)
voices_btn.on_click(_go_voices)

def enter(b=None):
    refresh_pronunciation("pronunciations.csv")
    if not demo_mode:                    # normal entry
        _reset_studio_inputs()
    _go_studio()
    studio_shell.add_class("fade-in")

def demo(b=None):
    global demo_mode
    demo_mode = True

    speaker_mode.value = "bracket"   # use one of the detection styles
    choice.value = "type"
    text_area.value = (
        "[David] Welcome to the NASA AI Lab demo. Today we will test speaker detection.\n"
        "[Anna] Great! I will introduce the JWST which observes distant galaxies and exoplanets.\n"
        "[Krista] Perfect. The SLS is the primary launch vehicle of the Artemis Moon landing program.\n"
        "[Evan] That concludes our demo.\n"
        "[Susan] I love NASA!"
    )
    refresh_pronunciation("pronunciations.csv")
    enter()


enter_btn.on_click(enter)
demo_btn.on_click(demo)

stage = Box([intro_shell], layout=Layout(
    width="100vw",
    height="auto",            # was "100vh"
    min_height="100vh",
    display="flex",
    justify_content="center",
    align_items="center",
    overflow="visible"
))

_add_class_safe(stage, "app-stage")

display(stage)
_set_tab_active(home_btn)

In [None]:
def _sync_library_visibility(collapsed):
    if collapsed:
        right_shell_studio.layout.display = "none"
        app_studio.pane_widths = ("1fr","0fr","0fr")
        _add_class_safe(left_shell_studio, "solo")
        collapse_btn.description = "Show Library"
        collapse_btn.icon = "chevron-left"
    else:
        right_shell_studio.layout.display = ""
        app_studio.pane_widths = ("2fr","0fr","1fr")
        if hasattr(left_shell_studio, "_dom_classes"):
            left_shell_studio._dom_classes = tuple(c for c in left_shell_studio._dom_classes if c != "solo")
        collapse_btn.description = "Hide Library"
        collapse_btn.icon = "chevron-right"

_sync_library_visibility(collapse_btn.value)

def _on_toggle(change):
    if change["name"] == "value":
        _sync_library_visibility(change["new"])

collapse_btn.observe(_on_toggle, names="value")


In [None]:
ui_theme = HTML("""
<style>
:root{
  --frame: clamp(16px, 6vmin, 36px);                /* equal whitespace all around */
  --bg:#f7f8fb;
  --card:#ffffff;
  --ink:#0f172a;
  --muted:#475569;
  --brand:#2563eb;
  --brand-600:#2563eb;
  --brand-700:#1d4ed8;
  --brand-800:#1e40af;
  --accent:#16a34a;
  --accent-700:#15803d;
  --ring:#93c5fd;
  --shadow: 0 10px 25px rgba(15, 23, 42, 0.08);
  --shadow-lg: 0 18px 45px rgba(15, 23, 42, 0.12);
  --radius: 16px;
  --radius-sm: 12px;
  --radius-xs: 10px;
  --speed: .18s;
}

/* Typography */
html, body, .jp-Notebook, .jp-NotebookPanel, .voila, .widget-container, .panel{
  font-family: "Segoe UI","Inter","Roboto","Helvetica Neue",Arial,sans-serif !important;
  font-size:15px !important; 
  line-height:1.5 !important; 
  color:var(--ink) !important;
}

/* Cards */
.panel{
  background:var(--card);
  border-radius:var(--radius);
  box-shadow:var(--shadow);
  padding:18px 20px;
  transition:box-shadow var(--speed) ease, transform var(--speed) ease;
  width:100% !important;
  max-width:100% !important;
  overflow-x:hidden !important;
}
.panel:hover{ box-shadow:var(--shadow-lg); transform:translateY(-1px); }

/* Inputs */
.panel .widget-text, .panel .widget-textarea, .panel .widget-file-upload{
  width:100% !important;
  border-radius:var(--radius-xs) !important;
  border:1px solid #e5e7eb !important;
  background:#fff !important;
  box-shadow:none !important;
}
.panel .widget-text input{ padding:10px 12px !important; width:100% !important; }
.panel .widget-textarea textarea{
  padding:12px 14px !important;
  line-height:1.45 !important;
  resize:vertical !important;
  min-height:180px !important;
  width:100% !important;
  font-family:"Segoe UI","Inter","Roboto",sans-serif !important;
  font-size:15px !important;
  color:#0f172a !important;
}

/* Buttons */
.btn{
  border:none !important; border-radius:12px !important; font-weight:700 !important;
  transition:transform var(--speed), box-shadow var(--speed), background var(--speed);
  box-shadow:var(--shadow);
}
.btn-ghost{
  background:#eef2f7 !important; 
  color:#0f172a !important;
}
                
.btn{
  display:inline-flex;                 /* so <a> adopts button look */
  align-items:center; justify-content:center;
  padding:10px 14px !important;
  text-decoration:none !important;     /* remove underline on <a> */
}

                
.btn-ghost:hover{ background:#e2e8f0 !important; }
.btn:hover{ transform:translateY(-1px); box-shadow:var(--shadow-lg); }
.btn:active{ transform:translateY(0); }
.btn-primary{ background:var(--brand-600) !important; color:#fff !important; }
.btn-primary:hover{ background:var(--brand-700) !important; }
.btn-success{ background:var(--accent) !important; color:#fff !important; }
.btn-success:hover{ background:var(--accent-700) !important; }
.btn-danger{ background:#dc2626 !important; color:#fff !important; }
.btn-danger:hover{ background:#b91c1c !important; }

/* Focus states */
.btn:focus, .panel .widget-text input:focus, .panel .widget-textarea textarea:focus{
  outline:none !important; box-shadow:0 0 0 3px var(--ring) !important;
}

/* ToggleButtons */
.segmented .widget-toggle-buttons{ background:#eef2ff; border-radius:var(--radius-sm); padding:4px; box-shadow:var(--shadow); width:100% !important; }
.segmented .widget-toggle-button{ border-radius:10px !important; margin:2px !important; transition:background var(--speed),color var(--speed),transform var(--speed); }
.segmented .widget-toggle-button.mod-active{ background:var(--brand) !important; color:#fff !important; transform:translateY(-1px); }

/* Tables */
.panel table, #pronunciation-table{ width:100% !important; table-layout:auto !important; word-wrap:break-word !important; }
                
.panel table td,
#pronunciation-table td{
  overflow-wrap: break-word !important;
}

#pronunciation-table th{
  max-width: none !important;
  white-space: nowrap !important;   /* keep "Original" / "Replacement" readable */
}                
#pronunciation-table th, #pronunciation-table td{ text-align:center !important; vertical-align:middle !important; }
#pronunciation-table th { color: var(--ink) !important; font-weight: 700 !important; }

/* Output and links */
.players .widget-output{ margin-bottom:6px; }
a.download-link{
  color:var(--brand-600) !important; text-decoration:none !important;
  border-bottom:2px solid rgba(37,99,235,.25); padding-bottom:1px;
  transition:border-color var(--speed), color var(--speed); word-break:break-all;
}
a.download-link:hover{ color:var(--brand-700) !important; border-color:rgba(29,78,216,.5); }

/* Audio and misc */
audio{ width:100% !important; max-width:100% !important; margin:4px 0 6px 0 !important; }
.widget-hbox, .widget-vbox{ width:100% !important; max-width:100% !important; overflow-x:hidden !important; }
.widget-checkbox label{ font-weight:700 !important; font-size:15px !important; color:var(--ink) !important; letter-spacing:0.1px; }
.widget-checkbox{ margin:6px 0 16px 0 !important; }

/* Reduced motion */
@media (prefers-reduced-motion: reduce){
  body, body::before, body::after, .app-stage::before, .app-stage::after{ animation: none !important; }
}

/* Ensure no body-level blobs are active */
body::before, body::after{ content:none !important; display:none !important; }

/* ---------- Stage: perfect centering & animated background ---------- */
.app-stage{
  display:flex;
  justify-content:center;
  align-items:flex-start;      /* don't pin content to vertical center */
  width:100vw;
  height:auto;                 /* let it grow with content */
  min-height:100vh;            /* still fill at least one screen */
  padding:var(--frame);
  box-sizing:border-box;
  overflow:visible;            /* allow the page to scroll */
  position:relative;
  z-index:0;
}

/* Livelier but slightly toned-down background */
@keyframes swoopMove{
  0%   { transform: rotate(-12deg) translate(-6vmax,-3vmax) scale(1.00); }
  30%  { transform: rotate(-9deg)  translate( 2vmax, 1vmax)  scale(1.05); }
  70%  { transform: rotate(-15deg) translate(12vmax, 6vmax)  scale(0.97); }
  100% { transform: rotate(-12deg) translate(18vmax, 9vmax)  scale(1.02); }
}
@keyframes swirlSpin{
  0% { transform: rotate(0deg); }
  100%{ transform: rotate(360deg); }
}

/* Big blue swoop (faster, slightly less saturated/opaque) */
.app-stage::before{
  content:"";
  position:absolute;
  width:110vmax; height:64vmax;
  top:-24vmax; left:-34vmax;
  border-radius:50%;
  background: radial-gradient(closest-side, rgba(59,130,246,0.78), rgba(59,130,246,0) 65%);
  filter: blur(70px) saturate(140%);
  opacity:.82;
  transform: rotate(-12deg);
  z-index:0;
  animation: swoopMove 12s ease-in-out infinite alternate;  /* faster */
}

/* Aurora swirl (faster spin, softer colors) */
.app-stage::after{
  content:"";
  position:absolute;
  inset:-20vmax;
  background:
    conic-gradient(
      from 0deg at 70% 120%,
      rgba(99,102,241,0.22),
      rgba(168,85,247,0.26),
      rgba(56,189,248,0.20),
      rgba(16,185,129,0.18),
      rgba(99,102,241,0.22)
    );
  filter: blur(70px) saturate(150%);
  mix-blend-mode: normal;
  z-index:0;
  animation: swirlSpin 25s linear infinite;                 /* faster */
}

.app-bubble{
  width:clamp(860px, 82vw, 1180px);
  max-height:calc(100vh - (2 * var(--frame))) !important;
  border-radius:24px;
  box-shadow:0 25px 50px -12px rgba(0,0,0,.25);
  padding:18px;
  overflow-y: auto !important;
  overflow-x: hidden !important;
  background:transparent;
  isolation:isolate;
  position:relative;
  z-index:1;
}

                
.app-bubble::before{
  content:"";
  position:absolute;
  inset:0;
  background:#f9fafb;   /* very light gray center background */
  border-radius:inherit;
  z-index:0;
}

.app-bubble > *{ position:relative; z-index:1; }

/* ---------- Keep panels/cards white but with consistent corners ---------- */
.panel{
  background:#ffffff !important;
  border-radius:24px !important;   /* match the bubble corners */
  box-shadow:var(--shadow);
  padding:24px 26px;
  transition:box-shadow var(--speed) ease, transform var(--speed) ease;
  width:100% !important;
  max-width:100% !important;
  overflow-x:hidden !important;
}
.panel:hover{ box-shadow:var(--shadow-lg); transform:translateY(-1px); }

/* ---------- Inputs inside panels also inherit the same rounded style ---------- */
.panel .widget-text,
.panel .widget-textarea,
.panel .widget-file-upload{
  border-radius:16px !important;   /* slightly smaller radius for nested inputs */
}

/* Clip list rows */
.playlist-row{
  background:#fff;
  border:1px solid #e5e7eb;
  border-radius:12px;
  padding:12px 14px;         /* was 10px 12px */
  display:flex;
  align-items:center;
  gap:12px;
}
.playlist-title{
  font-weight:600;
  font-size:14px;
  white-space:nowrap;
  overflow:hidden;
  text-overflow:ellipsis;
  max-width: 28ch;
}
.playlist-audio audio{
  width:100% !important;
}
        
.solo {
  max-width: 960px;         /* tweak width you want */
  margin: 0 auto;           /* center inside the center column */
}

.app-bubble{
  background:#fff !important;
}

/* When content is long (e.g., 3 audio rows), leave room above the sticky footer */
            
.app-bubble:has(.playlist-row) .bubble-shield{
  padding-bottom: 72px;   /* tweak value if you want more/less room */
}
                
/* Slightly more space before/after the playlist+download block */
.players .widget-output{ margin-bottom:10px; }  /* was 6px */
                
.players .widget-output:last-child{ margin-bottom:18px !important; }  /* was 16px */             

/* --- Textarea: make it synonymous with rounded look --- */
.panel .widget-textarea textarea{
  border-radius:16px !important;            /* match other controls */
  background:#ffffff !important;
  border:1px solid #e5e7eb !important;
}

/* keep focus ring consistent */
.panel .widget-textarea textarea:focus{
  box-shadow:0 0 0 3px var(--ring) !important;
  outline:none !important;
}

/* Gutter between columns */
.left-pane  { padding-right: 16px; }   /* space on the right of left column */
.right-pane { padding-left: 16px; }    /* space on the left of right column */

/* Tight screens: reduce the gutter a bit */
@media (max-width: 1200px){
  .left-pane  { padding-right: 12px; }
  .right-pane { padding-left: 12px; }
}
                

/* Add a clean gutter between columns */
.left-pane  { padding-right: 20px !important; }
.right-pane { padding-left: 20px !important; }

/* A touch more air inside cards and between playlist rows */
.panel{ padding:24px 26px !important; }
.playlist-row{ padding:12px 14px !important; }

/* Make the outer bubble the only scroll container */
.app-bubble{
  overflow-y: auto !important;
  overflow-x: hidden !important;
  max-height: calc(100vh - (2 * var(--frame))) !important;
}

/* Prevent inner columns from creating their own scrollbars - REMOVED overflow: visible */
.bubble-shield{
  max-height: none !important;
}

/* Keep the footer pinned inside the bubble */
.app-footer{
  position: sticky !important;
  bottom: 0 !important;
}

/* Optional: remove the textarea resize grabber so it never looks like a tiny scrollbar */
.panel .widget-textarea textarea{
  resize: none !important;
}
                
/* --- Intro v2 --- */
.intro-shell{
  width:100%; height:100%;
  display:flex; align-items:center; justify-content:center;
}

.intro-eyebrow{
  display:inline-block;
  padding:6px 10px;
  font-size:12px; letter-spacing:.12em; text-transform:uppercase;
  border-radius:999px; background:rgba(37,99,235,.10);
  color:#1e3a8a; border:1px solid rgba(37,99,235,.25);
  margin-bottom:10px;
}
.intro-title{
  font-size:42px; font-weight:900; letter-spacing:.2px; margin:6px 0 8px 0;
  background:linear-gradient(90deg,#0ea5e9 0%, #6366f1 50%, #a855f7 100%);
  -webkit-background-clip:text; background-clip:text; color:transparent;
}
.intro-sub{
  color:#334155; font-size:16px; margin:0 auto 18px; max-width:62ch;
}
.intro-grid{
  display:grid; grid-template-columns: repeat(3, minmax(0,1fr));
  gap:14px; margin:12px 0 18px;
}
@media (max-width:1100px){ .intro-grid{ grid-template-columns:1fr; } }

.step{
  background:#fff; border:1px solid #e5e7eb; border-radius:16px;
  padding:14px 16px; display:flex; gap:12px; text-align:left; align-items:flex-start;
}
.step .num{
  width:28px; height:28px; border-radius:999px; flex:0 0 28px;
  display:grid; place-items:center; font-weight:800; font-size:14px;
  background:#2563eb; color:#fff; margin-top:2px;
}
.step b{ font-weight:700; }

.intro-actions{ display:flex; gap:12px; justify-content:center; margin-top:8px; }
.intro-actions .widget-button{ height:44px; padding:0 18px; font-weight:800; }

/* subtle entrance */
@keyframes fadeUp { from{opacity:0; transform:translateY(10px)} to{opacity:1; transform:none} }
.intro-hero{ animation:fadeUp .28s ease-out 1; }
                
.app-stage { position: relative; z-index: 0; }
.app-stage::before,
.app-stage::after{
  z-index: 0 !important;
  pointer-events: none !important;   /* clicks pass through */
}

/* Lift the intro above the stage effects */
.intro-hero{
  width: clamp(900px, 72vw, 1200px);
  padding: 36px 40px;
  border-radius: 28px;
  background: #f9fafb;              /* solid light grey */
  border: 1px solid #e5e7eb;
  box-shadow: 0 30px 80px -20px rgba(15,23,42,.25);
  text-align: center;

  /* ensure no background bleed */
  backdrop-filter: none !important;
  -webkit-backdrop-filter: none !important;
}


/* 2) Title spacing/stacking fix */
.intro-title{
  line-height: 1.12 !important;
  margin-top: 4px !important;
  position: relative;
  z-index: 3;
}

/* 3) Make buttons fit and never wrap/cut off */
.intro-actions{
  display: flex; gap: 12px; justify-content: center; flex-wrap: wrap;
}
.intro-actions .widget-button{
  min-width: 200px;           /* room for "Load Demo & Enter" */
  white-space: nowrap;        /* no line wrap inside the button */
  flex: 0 0 auto;             /* do not shrink */
}
                
.intro-hero,
.btn,
.widget-button,
.panel{
  font-family: "Segoe UI","Inter","Roboto","Helvetica Neue",Arial,sans-serif;
}

/* Ensure intro sits above the animated background & stays clickable */
.intro-shell,
.intro-hero{
  position: relative;
  z-index: 2;
}

/* Apply the fade-in you add to app_shell on enter */
.fade-in{ animation: fadeUp .28s ease-out 1; }
                
.app-stage { position: relative; z-index: 0; }
.app-stage::before,
.app-stage::after { z-index: -1; pointer-events: none; }

/* Lift the centered intro card and make it solid */
.intro-hero{
  position: relative;
  z-index: 2;
  background: #f9fafb !important;
  backdrop-filter: none !important;
  -webkit-backdrop-filter: none !important;
  isolation: isolate;
}   

.app-footer{
  position: sticky !important;
  bottom: 0 !important;
  background: transparent !important;   /* remove white slab */
  border: 0 !important;
  box-shadow: none !important;
  padding: 4px 0 !important;            /* tighter */
  border-radius: 0 !important;          /* no rounded bar */
  color: #64748b !important;            /* subtle text */
}
                
.segmented .widget-toggle-buttons{
  background:#eef2ff;
  border-radius:14px;
  padding:4px;
  box-shadow:var(--shadow);
}

.segmented .widget-toggle-button{
  border-radius:10px !important;
  margin:2px !important;
  transition:background var(--speed), color var(--speed), transform var(--speed), box-shadow var(--speed);
  background:#e0e7ff;           /* lighter for unselected */
  color:#0f172a;
}

.segmented .widget-toggle-button:hover{
  background:#c7d2fe;            /* hover for unselected */
}

.segmented .widget-toggle-button.mod-active{
  background:var(--brand-800) !important; /* clearly darker */
  color:#fff !important;
  transform:translateY(-1px);
  box-shadow:0 8px 20px rgba(29,78,216,.25), inset 0 -2px 0 rgba(0,0,0,.18);
}

.segmented .widget-toggle-button.mod-active:hover,
.segmented .widget-toggle-button.mod-active:focus{
  background:var(--brand-700) !important; /* slightly lighter on hover/focus */
}

.spoken-as { color:#64748b; font-size:12px; margin-left:6px; }

.panel { overflow: visible !important; }
.left-pane, .right-pane, .panel, .widget-vbox, .widget-hbox { 
  overflow: visible !important; 
  position: relative !important; 
  z-index: 1 !important;
}

/* Avoid body-level clipping */
body { overflow-y: auto !important; overflow-x: hidden !important; }
                
html, body {
  overflow-x: hidden !important;
  overflow-y: auto !important;
  height: auto !important;
  min-height: 100vh !important;
  position: static !important;   /* undo global fixed body */
}

/* Let the stage grow taller than the viewport if needed */
.app-stage{
  height: auto !important;
  min-height: 100vh !important;
  padding: min(6vmin, 28px) !important;  /* smaller frame on narrow screens */
}

/* Bubble should fit small screens and be scrollable when needed */
.app-bubble{
  width: min(96vw, 1180px) !important;
  /* keep the scrollbar on the bubble itself */
  overflow-y: auto !important;
  overflow-x: hidden !important;
  max-height: calc(100vh - (2 * var(--frame))) !important;
}

/* Keep all columns/panels from clipping child popovers/players */
.left-pane, .right-pane, .panel, .widget-vbox, .widget-hbox{
  overflow: visible !important;
}

/* Make audio rows wrap gracefully on cramped widths */
.playlist-row{
  flex-wrap: wrap;
  align-items: flex-start;
}
.playlist-row .widget-html { flex: 1 1 240px; min-width: 220px; }
.playlist-row .widget-audio { flex: 1 1 320px; min-width: 260px; }

/* Voice pills: full width on small screens */
@media (max-width: 900px){
  .segmented .widget-toggle-buttons{ width: 100% !important; }
}

/* Stack layout nicely on narrow screens: give left pane more room */
@media (max-width: 1024px){
  .right-pane { display: none !important; }  /* hide pronunciation panel */
  .left-pane  { padding-right: 0 !important; }
  .app-bubble { width: 98vw !important; }
}

.app-bubble {
  width: min(1500px, 96vw) !important;   /* allow up to 1500px or 96% of viewport */
  max-width: 1500px !important;
}

.app-stage {
  padding: max(12px, 2vmin) !important;  /* less padding so card isn't squeezed */
}

/* Wider center when Library is hidden */
.solo { max-width: 1200px !important; }  
                
/* 1) Stop the page from scrolling */
html, body {
  height: 100% !important;
  overflow: hidden !important;    /* no page scroll */
}

/* 2) Pin the stage to the viewport and keep the animated bg */
.app-stage {
  position: fixed !important;
  inset: 0 !important;            /* top/right/bottom/left: 0 */
  padding: var(--frame) !important;
  overflow: hidden !important;    /* stage itself never scrolls */
  min-height: 100vh !important;
}

/* 3) Make the bubble the ONLY scroll area */
.app-bubble{
  height: auto !important;                                  /* <- was a fixed calc(...) */
  max-height: calc(100vh - (2 * var(--frame))) !important;  /* cap at viewport */
  overflow-y: auto !important;
  overflow-x: hidden !important;
  display: flex !important;
  flex-direction: column !important;
}

/* Don’t force the AppLayout to fill vertical space (this created the big gap) */
.app-bubble > .widget-app-layout,
.app-bubble > .jupyter-widgets.widget-app-layout,
.app-bubble > .widget-box{
  flex: 0 0 auto !important;   /* was 1 1 auto */
  min-height: auto !important; /* was 0 */
  overflow: visible !important;
}

/* Keep footer neat; it’ll sit after content when short, and stay visible when tall */
.app-footer{
  position: sticky !important;
  bottom: 0 !important;
}
                
.btn-ghost { background:#eef2f7 !important; color:#0f172a !important; }
.btn-ghost:hover { background:#e2e8f0 !important; }
                
.panel .widget-dropdown { 
  font-family:"Segoe UI","Inter","Roboto","Helvetica Neue",Arial,sans-serif !important;
  border:1px solid #e5e7eb !important;
  border-radius:12px !important;
  background:#fff !important;
  box-shadow:var(--shadow);
}
.panel .widget-dropdown select{
  font-family:inherit !important;
  font-size:14px !important;
  height:36px !important;
  padding:8px 12px !important;
  border:0 !important;
  background:transparent !important;
  border-radius:12px !important;
}
.panel .widget-dropdown:focus-within{ box-shadow:0 0 0 3px var(--ring) !important; }
                
/* Speakers tab (scoped so nothing leaks elsewhere) */
.spk-scope .spk-hero{display:flex;align-items:flex-start;gap:12px;margin:0 0 8px}
.spk-scope .spk-hero h3{margin:0;font-size:18px}
.spk-scope .spk-hero p{margin:2px 0 0;color:#475569}
.spk-scope .spk-steps{display:flex;flex-wrap:wrap;gap:8px;margin:8px 0 2px}
.spk-scope .spk-step{display:inline-flex;align-items:center;gap:8px;padding:8px 10px;border:1px dashed #dbeafe;border-radius:999px;background:#eff6ff;color:#1e40af;font-weight:700}
.spk-scope .spk-step .num{display:inline-grid;place-items:center;width:20px;height:20px;border-radius:999px;background:#2563eb;color:#fff;font-size:12px;font-weight:800}

.spk-scope .spk-grid{display:grid;grid-template-columns:1.2fr 1fr;gap:16px}
@media (max-width:980px){ .spk-scope .spk-grid{grid-template-columns:1fr} }

.spk-scope .spk-card{background:#fff;border:1px solid #e5e7eb;border-radius:16px;padding:14px 16px}
.spk-scope .spk-card h4{margin:0 0 8px 0;font-size:15px}

.spk-scope .pill-row{display:flex;flex-wrap:wrap;gap:8px;margin-top:8px}
.spk-scope .pill{display:inline-flex;align-items:center;padding:6px 10px;border-radius:999px;font-size:12px;font-weight:700;border:1px solid #e5e7eb;background:#f8fafc}
.spk-scope .pill-blue{border-color:#bfdbfe;background:#eff6ff;color:#1e3a8a}
.spk-scope .pill-green{border-color:#bbf7d0;background:#ecfdf5;color:#065f46}

.spk-scope .spk-counts table{border-collapse:separate;border-spacing:0 6px}
.spk-scope .spk-counts td{padding:4px 10px;border-radius:10px;background:#f8fafc;border:1px solid #e5e7eb}

/* Speakers: center the numbered pills */
.spk-scope .spk-step{ align-items:center; }
.spk-scope .spk-step .num{
  display:flex; align-items:center; justify-content:center;
  width:22px; height:22px; line-height:22px; margin-top:0;
  font-weight:800; font-size:12px;
}

/* Speakers: hide the thin separator rules */
.spk-scope hr{ display:none !important; }
                
/* Speakers — keep Detection style toggles in one horizontal row */
.spk-scope .widget-toggle-buttons.segmented{
  background: transparent !important;   /* kill blue bar */
  box-shadow: none !important;
  border: 0 !important;
  padding: 0 !important;

  display: flex !important;
  flex-wrap: nowrap !important;
  gap: 8px !important;                  /* space between the buttons */
  margin-bottom: 8px !important;       /* space before Quick tester */
}

.spk-scope .widget-toggle-buttons.segmented > .widget-toggle-button{
  display: inline-flex !important;
  align-items: center !important;
  justify-content: center !important;
  flex: 0 0 auto !important;
  min-height: 36px !important;
  padding: 8px 12px !important;
  white-space: nowrap !important;
}
                
.spk-scope .chips{display:flex;flex-wrap:wrap;gap:8px;margin:8px 0 4px}
.spk-scope .chip{display:inline-flex;align-items:center;gap:8px;padding:6px 10px;border:1px solid #e5e7eb;background:#f8fafc;border-radius:999px;font-weight:700}
.spk-scope .chip-count{
  background: transparent !important;
  color: inherit !important;
  padding: 0 !important;
  border: 0 !important;
  font-weight: 800 !important;
}
.spk-scope .spk-tile{background:#fff;border:1px solid #e5e7eb;border-radius:12px;padding:10px}
.spk-scope .spk-label{font-weight:700;font-size:13px;color:#0f172a}
.spk-scope .spk-tile{display:flex;flex-direction:column;height:100%}

</style>  
""")
display(ui_theme)



In [None]:
def extract_pdf(file):
    reader = PdfReader(file) 
    text = ""
    for page in reader.pages:
        page_text = page.extract_text() or ""
        text += page_text + "\n"
    return text.strip()

def preview_text(t, n = 48):
    t = (t or "").strip()
    if len(t) <= n:
        return t
    return t[:n] + "…"

In [None]:
def conversion_len(text, max_len = 28):
    s = text.lower()

    result = ""
    i = 0
    while i < len(s):
        ch = s[i]
        is_letter = ("a" <= ch <= "z")
        is_digit = ("0" <= ch <= "9")

        if is_letter or is_digit:
            result = result + ch
        else:
            if len(result) == 0:
                result = "-"
            else:
                if result[-1] != "-":
                    result = result + "-"
        i = i + 1

    while len(result) > 0 and result[0] == "-":
        result = result[1:]
    while len(result) > 0 and result[-1] == "-":
        result = result[:-1]

    if len(result) > max_len:
        result = result[:max_len]
        while len(result) > 0 and result[-1] == "-":
            result = result[:-1]

    if len(result) == 0:
        result = "clip"

    return result

In [None]:
def get_input():
    if choice.value == "upload":
        value = upload.value
        if value:
            if isinstance(value, tuple) and len(value) > 0:
                file_info = value[0]
                filename = file_info.get("name") or "uploaded.pdf"
                content = file_info["content"]
            elif isinstance(value, dict):
                first = list(value.values())[0]
                filename = first.get("name") or first.get("metadata", {}).get("name") or "uploaded.pdf"
                content = first["content"]
            else:
                filename = ""
                content = None

            if filename and filename.lower().endswith(".pdf") and content:
                return extract_pdf(BytesIO(content))

        txt = (text_area.value or "").strip()
        if txt:
            return txt
        return ""

    return (text_area.value or "").strip()


In [None]:
def split_script_lines(text, keep_blank_lines= False):
    t = (text or "").replace("\r\n", "\n").replace("\r", "\n")
    lines = t.split("\n")
    result = []
    if keep_blank_lines:
        for ln in lines:
            result.append(ln.rstrip())
        return result
    
    for ln in lines:
        trimmed = ln.strip()
        if trimmed != "":
            result.append(trimmed)

    return result

def split_sentences(text: str):
    t = (text or "").strip()
    if t == "":
        return []
    parts = re.split(r'(?<=[.!?])\s+(?=[A-Z"\'])', t)

    result = []
    for p in parts:
        cleaned = p.strip()
        if cleaned != "":
            result.append(cleaned)
    return result

def group_sentences(sentences, target_chars = 240, hard_max_chars = 500):
    chunks, cur, cur_len = [], [], 0
    for s in sentences:
        s_len = len(s)
        if cur and (cur_len + 1 + s_len > target_chars):
            chunks.append(" ".join(cur))
            cur, cur_len = [s], s_len
        else:
            cur.append(s)
            cur_len += (s_len if not cur_len else s_len + 1)  # +1 for space
        if cur_len >= hard_max_chars:
            chunks.append(" ".join(cur))
            cur, cur_len = [], 0
    if cur:
        chunks.append(" ".join(cur))
    return chunks

SAMPLE_COUNT = 3  # how many samples to preview before approval

def _samples_ui(files, titles):
    rows = []
    for name, title in zip(files, titles):
        label = HTML(f"<span class='playlist-title'>{title}</span>")
        audio = WAudio.from_file(name)
        audio.autoplay = False
        audio.loop = False
        audio.layout = Layout(width='100%', flex='1 1 auto')
        row = HBox([label, audio], layout=Layout(width='100%', align_items='center', gap='12px'))
        row.add_class('playlist-row')
        rows.append(row)
    return VBox(rows, layout=Layout(width='100%', gap='16px'))

def _read_wav_to_np(path):
    import wave, numpy as np
    with wave.open(str(path), "rb") as w:
        nch, sw, sr, nframes = w.getnchannels(), w.getsampwidth(), w.getframerate(), w.getnframes()
        raw = w.readframes(nframes)
    # map sample width -> dtype
    if sw == 1:  # 8-bit unsigned PCM
        a = np.frombuffer(raw, dtype=np.uint8).astype(np.int16) - 128
        a = (a << 8)  # scale to int16 range
        sw_norm = 2
    elif sw == 2:  # 16-bit
        a = np.frombuffer(raw, dtype=np.int16)
        sw_norm = 2
    elif sw == 3:  # 24-bit packed -> int32 then scale
        b = np.frombuffer(raw, dtype=np.uint8).reshape(-1, 3)
        a = (b[:,0].astype(np.int32) | (b[:,1].astype(np.int32) << 8) | (b[:,2].astype(np.int32) << 16))
        a = (a.view(np.int32) << 8) // 256  # approximate to 24->16
        a = a.astype(np.int16)
        sw_norm = 2
    elif sw == 4:  # 32-bit PCM -> down to int16
        a = (np.frombuffer(raw, dtype=np.int32) >> 16).astype(np.int16)
        sw_norm = 2
    else:
        raise RuntimeError(f"unsupported sample width: {sw}")
    # shape to (frames, channels)
    a = a.reshape(-1, nch)
    return a, nch, sw_norm, sr

def _resample_linear(x, src_sr, dst_sr):
    import numpy as np
    if src_sr == dst_sr or x.size == 0:
        return x
    ratio = dst_sr / float(src_sr)
    n_src = x.shape[0]
    n_dst = int(round(n_src * ratio))
    if n_dst <= 1:
        return x[:1]
    src_idx = np.arange(n_dst) / ratio
    lo = np.floor(src_idx).astype(int).clip(0, n_src - 1)
    hi = np.minimum(lo + 1, n_src - 1)
    frac = (src_idx - lo)[:, None]
    return (x[lo] * (1 - frac) + x[hi] * frac).astype(x.dtype)

def _to_channels(x, src_ch, dst_ch):
    import numpy as np
    if src_ch == dst_ch:
        return x
    if dst_ch == 1:
        # mixdown: average channels
        return x.mean(axis=1, keepdims=True).astype(x.dtype)
    if src_ch == 1 and dst_ch == 2:
        # upmix mono -> stereo
        return np.repeat(x, 2, axis=1)
    # generic: tile or truncate to match
    if src_ch < dst_ch:
        reps = (dst_ch + src_ch - 1) // src_ch
        y = np.tile(x, (1, reps))[:, :dst_ch]
    else:
        y = x[:, :dst_ch]
    return y

def concat_wavs_robust(wav_paths, out_path):
    """
    Concatenate WAVs after normalizing each to the first clip's format.
    No ffmpeg. Handles SR/width/channel mismatches.
    """
    from pathlib import Path
    import wave, numpy as np

    paths = [Path(p) for p in wav_paths
            if Path(p).exists() and Path(p).stat().st_size > 64 and _is_wav(p)]
    if not paths:
        raise RuntimeError("no valid WAVs to merge")

    # target format from first clip
    x0, tgt_ch, tgt_sw, tgt_sr = _read_wav_to_np(paths[0])

    chunks = [x0]
    for p in paths[1:]:
        xi, ch, sw, sr = _read_wav_to_np(p)
        # resample then channel-convert; all kept as int16-compatible
        xi = _resample_linear(xi, sr, tgt_sr)
        xi = _to_channels(xi, ch, tgt_ch)
        chunks.append(xi.astype(np.int16))

    full = np.vstack(chunks).astype(np.int16)  # (frames, channels)

    out_path = str(Path(out_path).with_suffix(".wav"))
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    with wave.open(out_path, "wb") as w:
        w.setnchannels(tgt_ch)
        w.setsampwidth(2)          # write as 16-bit
        w.setframerate(tgt_sr)
        w.writeframes(full.tobytes())

    if Path(out_path).stat().st_size < 64:
        raise RuntimeError("full_audio.wav empty")
    return out_path


def _norm_name(s: str) -> str:
    return re.sub(r"\s+", " ", (s or "").strip()).casefold()

def _approve_generate_all(btn):
    s = btn.state
    plan = s["full_plan"]          # (filename, spoken_text, original_text, speaker)
    status = s["status"]; dl_html = s["dl_html"]; regen_btn = s["regen_btn"]

    settings = get_current_settings()
    model = settings["model"]; default_voice = settings["voice"]
    speaker_voices = s.get("speaker_voices", {})
    sv_norm = {_norm_name(k): v for k, v in speaker_voices.items() }

    btn.disabled = True; s["reject_btn"].disabled = True; regen_btn.layout.display = "none"

    for p in CLIPS_DIR.glob("*.wav"):
        try:
            p.unlink()
        except Exception:
            pass
        
    out_files, manifest_rows = [], []
    total_tokens = 0; total_bytes = 0; total_seconds = 0.0
    sr = ch = sw = None  # audio params

    for audio, (orig_filename, spoken_text, original_text, speaker) in enumerate(plan, start=1):
        base_audio = Path(orig_filename).stem
        wav_path = CLIPS_DIR / f"{base_audio}.wav"
        status.value = f"<i>Downloading {audio}/{len(plan)}…</i>"
        key = _norm_name(speaker or SPEAKER_FALLBACK)
        voice_to_use = sv_norm.get(key, default_voice)

        save_clip(spoken_text, str(wav_path), engine="openai", voice=voice_to_use, model=model)

        if _is_wav(str(wav_path)) and Path(wav_path).stat().st_size > 64:
            out_files.append(str(wav_path))
            b = Path(wav_path).stat().st_size
            total_bytes += b
            dur = 0.0
            try:
                with wave.open(str(wav_path), "rb") as w:
                    if sr is None:
                        ch = w.getnchannels(); sw = w.getsampwidth(); sr = w.getframerate()
                    dur = w.getnframes() / float(max(1, w.getframerate()))
            except Exception:
                pass
            total_seconds += dur
        else:
            print(f"[WARN] Skipping non-WAV or empty clip: {wav_path}")
            b = 0
            dur = 0.0

        tks = estimate_tokens(spoken_text); total_tokens += tks

        manifest_rows.append({
            "index": audio,
            "filename": Path(wav_path).name,
            "speaker": speaker or SPEAKER_FALLBACK,
            "original_text": original_text,
            "spoken_text": spoken_text,
            "char_count": len(spoken_text),
            "est_tokens": tks,
            "duration_seconds": round(dur, 3),
            "bytes": b,
            "sample_rate": sr, "channels": ch, "sample_width_bytes": sw,
            "model": model, "voice": voice_to_use,
            "generated_at": now_utc_iso(),
        })

    if not out_files:
        dl_html.value = ""
        return

    # ---- Full audio recording (concatenated from all clips) ----
    status.value = "<i>Building full audio recording…</i>"
    full_audio_path = CLIPS_DIR / "full_audio.wav"
    full_audio_created = False
    full_audio_seconds = 0.0

    try:
        concat_wavs_robust(out_files, full_audio_path)
        with wave.open(str(full_audio_path), "rb") as w:
            full_audio_seconds = w.getnframes() / float(w.getframerate())
        full_audio_created = True
        status.value = f"<i>Full audio created ({full_audio_seconds:.1f}s)…</i>"
    except Exception as e:
        full_audio_created = False
        err = f"Concat failed: {e}"
        status.value = f"<b>{err}</b>"
        with output_box:
            print("[FULL-AUDIO ERROR]", err)

    # ---- manifest.csv ----
    fieldnames = ["index","filename","speaker","original_text","spoken_text","char_count",
                  "est_tokens","duration_seconds","bytes","sample_rate","channels",
                  "sample_width_bytes","model","voice","generated_at"]

    manifest_csv_io = StringIO()
    writer = csv.DictWriter(manifest_csv_io, fieldnames=fieldnames)
    writer.writeheader()
    for row in manifest_rows:
        writer.writerow(row)
    manifest_csv = manifest_csv_io.getvalue()

    # ---- usage.json ----
    pricing_inputs = {}
    if TTS_PRICE_PER_1K_TOKENS > 0: pricing_inputs["tts_price_per_1k_tokens_usd"] = TTS_PRICE_PER_1K_TOKENS
    if HUMAN_VO_RATE_PER_FINISHED_MIN > 0: pricing_inputs["human_vo_rate_per_finished_min_usd"] = HUMAN_VO_RATE_PER_FINISHED_MIN

    usage = {
        "total_clips": len(manifest_rows),
        "total_characters": sum(len(r["spoken_text"]) for r in manifest_rows),
        "total_est_tokens": int(total_tokens),
        "total_audio_seconds": round(total_seconds, 3),
        "total_bytes": int(total_bytes),
        "model": model,
        "default_voice": default_voice,
        "speaker_voices": speaker_voices,
        "audio_params": {"sample_rate": sr, "channels": ch, "sample_width_bytes": sw},
        "generated_at": now_utc_iso(),
    }
    
    # Add full audio info if it was created successfully
    if full_audio_created:
        usage["full_audio_seconds"] = round(full_audio_seconds, 3)
        usage["total_bytes"] += int(full_audio_path.stat().st_size)
    
    if TTS_PRICE_PER_1K_TOKENS > 0:
        usage["est_tts_cost_usd"] = round((total_tokens / 1000.0) * TTS_PRICE_PER_1K_TOKENS, 4)
    if HUMAN_VO_RATE_PER_FINISHED_MIN > 0 and full_audio_created:
        usage["est_human_vo_cost_usd"] = round((full_audio_seconds / 60.0) * HUMAN_VO_RATE_PER_FINISHED_MIN, 2)

    # ---- bundle ----
    pron_df = get_active_pronunciations("pronunciations.csv")
    pronunciations_snapshot = pron_df.to_csv(index=False)
    input_text = "\n".join(s.get("original_lines", [])) if isinstance(s.get("original_lines"), list) else ""

    extras = {
        "manifest.csv": manifest_csv,
        "usage.json": json.dumps(usage, indent=2),
        "pronunciations.csv": pronunciations_snapshot,
        "input_text.txt": input_text,
        "README.txt": (
            "Text-to-Speech Studio bundle\n"
            "--------------------------------\n"
            "- manifest.csv: per-clip metadata (includes speaker & voice used)\n"
            + ("- full_audio.wav: single concatenated recording of entire text\n- full_audio.m3u: playlist pointing at the full recording\n" if full_audio_created else "")
            + "- usage.json: counters, durations, pricing estimates\n"
            "- pronunciations.csv: snapshot used\n"
            "- input_text.txt: exact text/chunks rendered\n"
        ),
    }
    
    # Add playlist file only if full audio was created
    if full_audio_created:
        extras["full_audio.m3u"] = "\n".join([
            "#EXTM3U",
            f"#EXTINF:{int(round(full_audio_seconds))},Full Audio Recording",
            Path(full_audio_path).name
        ]) + "\n"

    # Include full audio in ZIP - this is the key part that ensures it gets included
    files_to_zip = out_files.copy()  # Start with individual clips
    if full_audio_created and full_audio_path.exists():
        files_to_zip.append(str(full_audio_path))  # Add the full audio file

    status.value = "<i>Zipping files…</i>"
    data_url = _zip_as_data_url(files_to_zip, extras=extras)
    dl_html.value = (
        f"<a class='btn btn-success download-link' href='{data_url}' "
        f"download='Audio_{datetime.now().strftime('%Y_%m_%d-%H_%M_%S')}.zip'>"
        f"Download ZIP ({'individual clips + full audio + metadata' if full_audio_created else 'individual clips + metadata only'})</a>"
    )
    
    final_message = "Ready. Full set generated"
    if full_audio_created:
        final_message += f" including full audio file ({full_audio_seconds:.1f}s)."
    else:
        final_message += " with individual clips only."
    
    status.value = f"<b>{final_message}</b>"

def _reject_edit(btn):
    s = btn.state
    s["approve_btn"].disabled = False
    btn.disabled = False
    s["dl_html"].value = ""
    s["status"].value = (
        "Okay — tweak the <b>Pronunciation Library</b> on the right, then click "
        "<b>Regenerate samples</b>."
    )
    s["regen_btn"].layout.display = ""

In [None]:
SPEAKER_PREFS = {}         
SPEAKERS_DD = {}       

detect_btn = Button(description="Detect from current input", button_style="primary")
_add_class_safe(detect_btn, "btn"); _add_class_safe(detect_btn, "btn-primary")

speakers_counts = HTML()
speakers_list_box = VBox([])

def _build_items_for_preview():
    raw_text = get_input()
    if not raw_text:
        return None, "No input found. Enter text or upload a PDF on the Studio tab."
    lines = split_script_lines(raw_text)
    if len(lines) <= 1:
        base = lines[0] if lines else raw_text
        lines = group_sentences(split_sentences(base), 240, 500)
    mode = speaker_mode.value
    custom_pat = None
    items = build_script_items(lines, mode, custom_pat)
    if not items:
        return None, "No lines detected."
    return items, None

def _make_speaker_dropdowns(speakers):
    SPEAKERS_DD.clear()

    # rotate fallback voices so first unsaved speaker != current default
    voice_ids = [v for v, _ in OPENAI_TTS_VOICES]
    default = voice_toggle.value
    rotated = [v for v in voice_ids if v != default] + [default]

    tiles = []
    for idx, spk in enumerate(speakers):
        norm = _norm_name(spk)
        pref = SPEAKER_PREFS.get(norm)                  # use saved mapping if present
        fallback = rotated[idx % len(rotated)] if pref is None else pref

        label = HTML(f"<div class='spk-label'>{spk}</div>")
        dd = Dropdown(
            options=[(VOICE_NAME[v], v) for v, _ in OPENAI_TTS_VOICES],
            value=fallback,
            description=""
        )
        dd.layout = Layout(width="100%", height="36px")

        def _on_change(change, who=spk):
            if change["name"] == "value":
                SPEAKER_PREFS[_norm_name(who)] = change["new"]
        dd.observe(_on_change, names="value")
        SPEAKERS_DD[spk] = dd

        tile = VBox([label, dd], layout=Layout(gap="6px"))
        tile.add_class("spk-tile")
        tiles.append(tile)

    grid = GridBox(
        children=tiles,
        layout=Layout(grid_template_columns="repeat(auto-fit, minmax(220px, 1fr))",
                      grid_gap="10px", width="100%")
    )
    speakers_list_box.children = [grid]

def _render_samples(sample_items, pronunciation_df, label, speaker_voices=None):
    settings = get_current_settings()
    sv_norm = { _norm_name(k): v for k, v in (speaker_voices or {}).items() }

    files, titles = [], []
    for i, item in enumerate(sample_items, start=1):
        orig = item["text"]
        spk  = item.get("speaker") or SPEAKER_FALLBACK
        spoken = apply_pronunciation(orig, pronunciation_df)

        # pick voice per speaker (fallback to current toggle)
        voice_to_use = sv_norm.get(_norm_name(spk), settings["voice"])
        vname = VOICE_NAME.get(voice_to_use, voice_to_use)
        
        fname = SAMPLES_DIR / f"sample_{i:02d}_{conversion_len(orig)}.wav"
        save_clip(spoken, str(fname), voice=voice_to_use, model=settings["model"])

        hint = "" if orig == spoken else f' <span class="spoken-as">({preview_text(spoken,43)})</span>'
        title = f'{label} {i} — <b>[{spk}]</b> “{preview_text(orig,60)}” <span class="spoken-as">({vname})</span>{hint}'
        files.append(str(fname)); titles.append(title)
    return files, titles


def _on_detect_click(_=None):
    items, err = _build_items_for_preview()
    if err:
        speakers_counts.value = f"<i>{err}</i>"
        speakers_list_box.children = []
        return
    speakers = _render_counts(items)
    _make_speaker_dropdowns(speakers)

detect_btn.on_click(_on_detect_click)

save_speakers_btn = Button(description="Save voices to Studio", button_style="success")
_add_class_safe(save_speakers_btn, "btn"); _add_class_safe(save_speakers_btn, "btn-success")
save_msg = HTML()

save_speakers_btn.icon = "save"             
save_speakers_btn.tooltip = "Persist speaker→voice map for Studio"

# size parity with Detect
detect_btn.layout = Layout(height="44px", width="auto", flex="0 0 auto")
save_speakers_btn.layout = Layout(height="44px", width="auto", flex="0 0 auto")

# when saving from Speakers tab
def _on_save_voices(_=None):
    for spk, dd in SPEAKERS_DD.items():
        SPEAKER_PREFS[_norm_name(spk)] = dd.value   # normalized key
    save_msg.value = "<small>Saved. Your per-speaker voices will be used in Studio.</small>"

save_speakers_btn.on_click(_on_save_voices)

def _go_speakers(b=None):
    title.value = "<div class='app-header'><b>Text-to-Speech Studio</b></div>"
    stage.children = [speakers_shell]
    collapse_btn.layout.display = "none"
    _set_tab_active(speakers_btn)
    _on_detect_click()

speakers_btn.on_click(_go_speakers)

# Icons & sizing for action buttons
detect_btn.description = "Detect speakers from current input"
detect_btn.icon = "search"
detect_btn.layout = Layout(height="44px", width="auto", flex="0 0 auto")

# Live regex tester
regex_test_in = Text(
    placeholder="e.g., Alice: Hi, welcome to the show!",
    description="Try a line:",
    layout=Layout(width="100%")
)
regex_test_out = HTML("<small>Type a line to preview parsing.</small>")

def _update_regex_preview(_=None):
    line = (regex_test_in.value or "").strip()
    mode = speaker_mode.value
    pat = None
    spk, body, matched = _parse_line_with_mode(line, mode, pat)
    if not line:
        regex_test_out.value = "<small>Type a line to preview parsing.</small>"
    else:
        spk = spk or SPEAKER_FALLBACK
        body_preview = preview_text(body, 140) if body else "—"
        regex_test_out.value = (
            "<div class='pill-row'>"
            f"<span class='pill pill-blue'>Speaker: {spk}</span>"
            f"<span class='pill pill-green'>Text: {body_preview}</span>"
            "</div>"
        )

regex_test_in.observe(_update_regex_preview, names="value")
speaker_mode.observe(lambda ch: _update_regex_preview(), names="value")
_update_regex_preview()

# Counts box renderer remains; we just wrap it in a styled container when shown
def _render_counts(items):
    counts = {}
    for it in items:
        spk = it.get("speaker") or SPEAKER_FALLBACK
        counts[spk] = counts.setdefault(spk, 0) + 1
    chips = " ".join(
        f"<span class='chip'>{name}<span class='chip-count'>{n}</span></span>"
        for name, n in sorted(counts.items())
    )
    speakers_counts.value = f"<div class='chips'>{chips}</div>"
    return sorted(counts.keys())

steps_row = HTML(
    "<div class='spk-steps'>"
    "<span class='spk-step'><span class='num'>1</span> Pick style</span>"
    "<span class='spk-step'><span class='num'>2</span> Detect</span>"
    "<span class='spk-step'><span class='num'>3</span> Assign voices</span>"
    "<span class='spk-step'><span class='num'>4</span> Save</span>"
    "</div>"
)

quick_card = VBox([
    HTML("<h4>Quick tester</h4><small>Paste a single line from your script to preview how it’s parsed.</small>"),
    regex_test_in, regex_test_out
], layout=Layout(gap="6px", width="100%"))
_add_class_safe(quick_card, "spk-card")      # rounded “bubble”
quick_card.layout.margin = "8px 0 0 0"

speakers_inline = VBox([
    HTML("<h4>Speakers (optional)</h4><small>Choose a detection style, run detection, then assign voices per speaker. Your choices are remembered in Studio.</small>"),
    steps_row,
    HTML("<b>Detection style</b>"),
    speaker_mode,
    quick_card,
    HBox([detect_btn, save_speakers_btn], layout=Layout(gap="8px", flex_flow="row wrap")),
    speakers_counts,          # compact chips row
    speakers_list_box,        # responsive grid of dropdown tiles
    save_msg
], layout=Layout(gap="12px"))
_add_class_safe(speakers_inline, "panel"); _add_class_safe(speakers_inline, "spk-scope")

# speakers page
center_speakers = VBox([speakers_inline]); _add_class_safe(center_speakers, "bubble-shield")

app_speakers = AppLayout(
    header=header, left_sidebar=None, center=center_speakers, right_sidebar=None, footer=footer,
    pane_widths=("0fr","1fr","0fr"), pane_heights=("56px","1fr","28px"),
    layout=Layout(width="100%", height="auto")
)
speakers_shell = VBox([app_speakers]); _add_class_safe(speakers_shell, "app-bubble")

left_ui.children = (choice, upload, text_area, voice_box, generate_button, output_box)


In [None]:
def button_output(b=None):
    generate_button.disabled = True
    old_desc = generate_button.description
    generate_button.description = "Rendering…"
    try:
        raw_text = get_input()
        if not raw_text:
            with output_box:
                print("Please upload a PDF with text or type into the text box.")
            return

        pronunciation_df = get_active_pronunciations("pronunciations.csv")

        # 1) Split input into lines (or chunks if needed)
        lines = split_script_lines(raw_text)
        used_fallback = False
        if len(lines) <= 1:
            used_fallback = True
            base = lines[0] if lines else (raw_text or "")
            sents = split_sentences(base)
            lines = group_sentences(sents, target_chars=240, hard_max_chars=500)

        # 2) Build speaker-tagged items
        mode = speaker_mode.value
        custom_pat = None
        items = build_script_items(lines, mode, custom_pat)

        label = "Chunk" if used_fallback else "Line"
        total = len(items)
        if total == 0:
            with output_box:
                output_box.clear_output(wait=True)
                print("No text found.")
            return

        # 3) Samples (first N items)
        for p in SAMPLES_DIR.glob("sample_*.wav"):
            try: 
                p.unlink()
            except: 
                pass

        sample_items = items[:SAMPLE_COUNT]
        sample_files, sample_titles = _render_samples(
            sample_items, pronunciation_df, label, speaker_voices=SPEAKER_PREFS
        )

        # 4) Plan the full generation with speaker preserved
        full_plan = []
        for i, it in enumerate(items, start=1):
            spoken_line = apply_pronunciation(it["text"], pronunciation_df)
            filename = f"{label.lower()}{i:02d}_{conversion_len(it['text'])}.wav"
            full_plan.append((filename, spoken_line, it["text"], it.get("speaker") or SPEAKER_FALLBACK))

        # Estimates
        est_tokens_total = sum(estimate_tokens(sp) for _, sp, _, _ in full_plan)
        est_seconds_pred = round(est_tokens_total * SECS_PER_TOKEN_EST, 1)
        est_html = HTML(
            f"<small>Estimate: ~{est_seconds_pred}s audio, ~{est_tokens_total} tokens"
            + (f", TTS ≈ ${round((est_tokens_total/1000.0)*TTS_PRICE_PER_1K_TOKENS, 4)}" if TTS_PRICE_PER_1K_TOKENS else "")
            + (f", Human VO ≈ ${round((est_seconds_pred/60.0)*HUMAN_VO_RATE_PER_FINISHED_MIN, 2)}" if HUMAN_VO_RATE_PER_FINISHED_MIN else "")
            + "</small>"
        )

        # Controls & state (no per-speaker dropdowns in Studio)
        approve_btn = Button(description="Sounds good!", button_style="success")
        _add_class_safe(approve_btn, "btn"); _add_class_safe(approve_btn, "btn-success")
        reject_btn = Button(description="Not yet!", button_style="danger")
        _add_class_safe(reject_btn, "btn"); _add_class_safe(reject_btn, "btn-danger")
        regen_btn = Button(description="Regenerate samples", button_style="primary")
        _add_class_safe(regen_btn, "btn"); _add_class_safe(regen_btn, "btn-primary")
        regen_btn.layout.display = "none"
        def _on_regen_click(_=None): button_output()
        regen_btn.on_click(_on_regen_click)

        status = HTML(); dl_html = HTML()
        state = {
            "full_plan": full_plan,
            "status": status,
            "dl_html": dl_html,
            "approve_btn": approve_btn,
            "reject_btn": reject_btn,
            "regen_btn": regen_btn,
            "original_lines": lines,
            # use only saved speakers→voices from the Speakers tab
            "speaker_voices": { _norm_name(k): v for k, v in SPEAKER_PREFS.items() },
        }
        approve_btn.state = state; reject_btn.state = state

        def _on_approve(btn):
            state["speaker_voices"] = { _norm_name(k): v for k, v in SPEAKER_PREFS.items() }
            _approve_generate_all(btn)
        approve_btn.on_click(_on_approve)
        reject_btn.on_click(_reject_edit)

        # Render (no per-speaker grid here)
        with output_box:
            output_box.clear_output(wait=True)
            header = f"Listening to {len(sample_files)} sample(s) out of {total} total."
            display(Label(header))
            display(_samples_ui(sample_files, sample_titles))
            status.value = "<i>Review the samples, then generate the full set.</i>"
            display(VBox([
                est_html,
                HBox([approve_btn, reject_btn, regen_btn], layout=Layout(gap='8px')),
                status,
                dl_html
            ]))
    finally:
        generate_button.description = old_desc
        generate_button.disabled = False


In [None]:
def regeneration_clicked(button = None):
    button_output()
    regeneration_prompt.clear_output()

def not_regeneration_clicked(button = None):
    regeneration_prompt.clear_output()

def save_clicked(button = None, pronunciation_path = "pronunciations.csv"):
    pronunciation_status.clear_output()
    regeneration_prompt.clear_output()

    accepted, message = add_pronunciation(original.value, phonetic.value, pronunciation_path)

    with pronunciation_status:
        print(message)

    refresh_pronunciation(pronunciation_path)

    if accepted:
        prompt = HTML("<b>Regenerate audio with updated pronunciations?</b>")
        yes_button = Button(description = "Yes", button_style = "primary")
        no_button = Button(description = "No", button_style = "danger")
        _add_class_safe(yes_button, "btn")
        _add_class_safe(yes_button, "btn-primary")
        _add_class_safe(no_button, "btn")
        _add_class_safe(no_button, "btn-danger")

        yes_button.on_click(regeneration_clicked)
        no_button.on_click(not_regeneration_clicked)

        with regeneration_prompt:
            display(HBox([prompt, yes_button, no_button]))


In [None]:
save_button.on_click(save_clicked)
generate_button.on_click(button_output)