<a href="https://colab.research.google.com/github/bbanzai88/Book_Writing_Crew/blob/main/Book_writing_Crew_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This code doesnt use crewAI or crewai  flow and seems to produce better  results i.e. it has some character development and the number of chapters produced agrees with what was expected. This version includes:

stripping <think>…</think> everywhere from the output

stronger outline de-dupe

chapter repetition detection + automatic revision

an editor that scores “interestingness” + punch-up pass

cleaner prompts that forbid meta commentary

mixed models: DeepSeek-R1 for planning/eval, Llama 3.1 for prose

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 0) Colab Setup: install & launch Ollama
# ─────────────────────────────────────────────────────────────────────────────
!pip install --quiet langchain-ollama python-docx tqdm

import os, threading, subprocess, time, requests
from google.colab import files

# Unset any OpenAI/LiteLLM envs that could hijack LangChain
for v in [
    "OPENAI_API_KEY",
    "LITELLM_PROVIDER", "LITELLM_MODEL", "LITELLM_BASE_URL",
    "LITELL M_PROVIDER", "LITELL M_MODEL", "LITELL M_BASE_URL"  # catch stray typos
]:
    os.environ.pop(v, None)

os.environ["OLLAMA_HOST"]    = "127.0.0.1:11434"
os.environ["OLLAMA_ORIGINS"] = "*"

# Install & start Ollama
!curl -fsSL https://ollama.com/install.sh -o install.sh
!bash install.sh >/dev/null 2>&1 || true

def _serve_ollama():
    subprocess.Popen(["ollama","serve"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)

threading.Thread(target=_serve_ollama, daemon=True).start()
time.sleep(8)
print("✅ Ollama status:", requests.get("http://127.0.0.1:11434").status_code)

# Pull models: planner/evaluator (DeepSeek-R1) and writer (Llama 3.1)
!ollama pull deepseek-r1:1.5b
!ollama pull llama3.1:8b

# ─────────────────────────────────────────────────────────────────────────────
# 1) Imports & Inputs
# ─────────────────────────────────────────────────────────────────────────────
import json, re
from typing import List, Any, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from tqdm import tqdm
import docx

from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate  # modern import

# ─────────────────────────────────────────────────────────────────────────────
# User parameters
# ─────────────────────────────────────────────────────────────────────────────
NUM_CH       = 70  # set 20–30 for faster iteration
SEED_IDEA    = ("Dr. Lena Park — a brilliant but introverted data scientist at Datum, "
                "a social-media analytics startup. She notices impossible engagement "
                "patterns in a rising star; her investigation unravels a conspiracy "
                "of AI-driven “influencers” masquerading as humans—and she must decide "
                "whether to expose the truth or risk blowing up the platform.")
BOOK_TITLE   = "Artificial Influencers 2"
MODE         = "fiction"  # or "philosophy"

# ─────────────────────────────────────────────────────────────────────────────
# 2) LLM & Prompts (using the | operator, not LLMChain)
# ─────────────────────────────────────────────────────────────────────────────
planner_llm = OllamaLLM(
    model="deepseek-r1:1.5b",
    base_url="http://127.0.0.1:11434",
    temperature=0.5,  # a bit lower to help structure
)
writer_llm  = OllamaLLM(
    model="llama3.1:8b",
    base_url="http://127.0.0.1:11434",
    temperature=0.8,
)

# NDJSON prompts (MUCH easier to parse than JSON arrays)
outline_prompt = PromptTemplate(
    input_variables=["topic","count"],
    template=(
"You are a creative fiction author. Generate exactly {count} UNIQUE chapter seeds "
"for the book below. OUTPUT FORMAT: **NDJSON**, one compact JSON object per line, "
"no leading numbering, no extra text, no trailing commas, no code fences.\n\n"
"Each line MUST be like:\n"
'{{"title":"...","description":"..."}}\n\n'
"Book idea:\n{topic}\n"
    )
)

character_prompt = PromptTemplate(
    input_variables=["outline","num_chars"],
    template=(
"Given this chapter outline (JSON list):\n{outline}\n\n"
"Create exactly {num_chars} main characters.\n"
"OUTPUT FORMAT: **NDJSON**, one compact JSON object per line (no array, no extra text):\n"
'{{"name":"...","role":"...","development_arc":"..."}}\n'
    )
)

chapter_prompt = PromptTemplate(
    input_variables=["title","description","idea"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Chapter description: "{description}"

Constraints:
- NO meta commentary, NO analysis of your process, NO decision making.
- Assume the reader remembers prior chapters; do not re-explain backstory.
- Maintain continuity, but introduce at least one fresh obstacle, one vivid sensory beat, and one believable surprise.
- Use concrete, precise details; avoid clichés.
- End with a small but real unresolved tension.
Return TEXT ONLY.
"""
    )
)

REVISION_PROMPT = PromptTemplate(
    input_variables=["chapter","title","description","ledger"],
    template=(
"""Revise the chapter to reduce repetition with earlier chapters while improving novelty and tension.
Keep the same characters and continuity, but change scene dynamics, setting details, and micro-beats.

Rules:
- Do NOT re-explain backstory already known.
- Add at least one fresh obstacle, one specific sensory detail, and one surprising but plausible turn.
- Preserve voice and POV.
Return TEXT ONLY.

Title: {title}
Description: {description}

Do-not-repeat ledger (phrases/scenes to avoid): {ledger}

Chapter draft:
{chapter}
"""
    )
)

EVAL_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""You are a tough fiction editor. Rate the chapter (1–10) on:
- pacing
- tension
- voice
- imagery
- dialogue
- novelty

Return STRICT JSON only:
{"scores":{"pacing":x,"tension":x,"voice":x,"imagery":x,"dialogue":x,"novelty":x},"one_sentence_note":"...","three_micro_edits":["...","...","..."]}

Chapter:
{chapter}
"""
    )
)

PUNCHUP_PROMPT = PromptTemplate(
    input_variables=["chapter","edits"],
    template=(
"""Apply these micro-edits to strengthen the chapter without changing the plot:
- {edits}

Rules:
- Keep POV, continuity, and length roughly the same (±10%).
- Add concrete sensory details.
- Tighten weak sentences; remove clichés.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

# build chains with the pipe operator (no deprecation warnings)
outline_chain   = outline_prompt   | planner_llm
character_chain = character_prompt | planner_llm
chapter_chain   = chapter_prompt   | writer_llm
revision_chain  = REVISION_PROMPT  | writer_llm
eval_chain      = EVAL_PROMPT      | planner_llm
punchup_chain   = PUNCHUP_PROMPT   | writer_llm

# ─────────────────────────────────────────────────────────────────────────────
# 3) Utilities: strip <think>…</think>, parse NDJSON, similarity guards
# ─────────────────────────────────────────────────────────────────────────────
THINK_RE = re.compile(r"<think>.*?</think>\s*", flags=re.S|re.I)
FENCE_RE = re.compile(r"```(?:json)?|```", flags=re.I)

def strip_think(x: Any) -> str:
    s = x["text"] if isinstance(x, dict) and "text" in x else str(x)
    s = THINK_RE.sub("", s)
    s = FENCE_RE.sub("", s)
    return s.strip()

def parse_ndjson(text: str, expected: int = None) -> List[dict]:
    out = []
    for ln in text.splitlines():
        ln = ln.strip()
        if not ln: continue
        # ignore accidental bullets/numbering
        if ln[:2] in ("- ", "* "): ln = ln[2:].strip()
        if ln and ln[0].isdigit() and ln.lstrip().split(" ",1)[0].rstrip(".").isdigit():
            # "1. {...}" → "{...}"
            ln = re.sub(r"^\d+\.\s*", "", ln)
        try:
            obj = json.loads(ln)
            if isinstance(obj, dict):
                out.append(obj)
        except json.JSONDecodeError:
            # attempt tiny fixes: replace smart quotes, stray trailing commas
            fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
            fix = re.sub(r",\s*}", "}", fix)
            fix = re.sub(r",\s*]", "]", fix)
            try:
                obj = json.loads(fix)
                if isinstance(obj, dict):
                    out.append(obj)
            except Exception:
                continue
    if expected is not None and len(out) != expected:
        # keep best-effort, caller may retry
        pass
    return out

def jaccard(a: str, b: str) -> float:
    A = set(re.findall(r"[a-z0-9']+", a.lower()))
    B = set(re.findall(r"[a-z0-9']+", b.lower()))
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

def too_similar(ch1: Dict[str,str], ch2: Dict[str,str]) -> bool:
    t_sim = jaccard(ch1["title"], ch2["title"])
    d_sim = jaccard(ch1["description"], ch2["description"])
    return (t_sim > 0.65) or (t_sim > 0.45 and d_sim > 0.55)

def bigram_overlap(a: str, b: str) -> float:
    def bigrams(s):
        toks = re.findall(r"[a-z0-9']+", s.lower())
        return set(zip(toks, toks[1:])) if len(toks) > 1 else set()
    A, B = bigrams(a), bigrams(b)
    denom = len(A | B) if (A or B) else 1
    return len(A & B) / denom

def top_trigrams(text: str, k: int = 30) -> List[str]:
    toks = re.findall(r"[a-z0-9']+", text.lower())
    tris = Counter(zip(toks, toks[1:], toks[2:]))
    return [" ".join(t) for t,_ in tris.most_common(k)]

# ─────────────────────────────────────────────────────────────────────────────
# 4) Outline generation with NDJSON + retry + strict dedupe
# ─────────────────────────────────────────────────────────────────────────────
print(f"→ Generating {NUM_CH}-chapter outline in chunks of ~10…")
outline, seen_titles = [], set()
chunk = 10
attempts = 0
MAX_ATTEMPTS = 40

while len(outline) < NUM_CH and attempts < MAX_ATTEMPTS:
    ask = min(chunk, NUM_CH - len(outline))
    raw = outline_chain.invoke({"topic": SEED_IDEA, "count": ask})
    attempts += 1
    lines = parse_ndjson(strip_think(raw), expected=ask)
    if not lines:
        print("⚠️ Outline chunk parse failed (empty); retrying…")
        continue
    added = 0
    for ch in lines:
        cand = {
            "title": (ch.get("title") or "").strip(),
            "description": (ch.get("description") or "").strip()
        }
        if not cand["title"] or not cand["description"]:
            continue
        if cand["title"] in seen_titles:
            continue
        if any(too_similar(cand, existing) for existing in outline):
            continue
        outline.append(cand)
        seen_titles.add(cand["title"])
        added += 1
        if len(outline) >= NUM_CH:
            break
    if added == 0:
        # tighten retry loop if nothing useful came back
        print("ℹ️ No new unique chapters from this chunk; retrying…")

print(f"✔ Final outline: {len(outline)} chapters\n")
if len(outline) < NUM_CH:
    print("⚠️ Could not reach target count; proceeding with what we have.")

# ─────────────────────────────────────────────────────────────────────────────
# 5) Character Bible (NDJSON + retry)
# ─────────────────────────────────────────────────────────────────────────────
NUM_CHAR = max(3, min(10, NUM_CH//8))
print(f"→ Generating {NUM_CHAR} characters…")
characters = []
for _ in range(6):  # a few retries if parsing fails
    raw_chars = character_chain.invoke({
        "outline": json.dumps(outline, ensure_ascii=False),
        "num_chars": NUM_CHAR
    })
    characters = parse_ndjson(strip_think(raw_chars), expected=NUM_CHAR)
    if len(characters) == NUM_CHAR:
        break
print(f"✔ Got {len(characters)} characters\n")

# ─────────────────────────────────────────────────────────────────────────────
# 6) Chapter Generation (+ repetition revision + eval + punch-up)
# ─────────────────────────────────────────────────────────────────────────────
print("→ Generating chapters in parallel…")
chap_texts = [None]*len(outline)
editor_notes = [None]*len(outline)

MAX_WORKERS = 3  # adjust if your session is slower/faster

def write_one(idx):
    meta = outline[idx]
    res = chapter_chain.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "idea": SEED_IDEA
    })
    chapter_txt = strip_think(res)

    # Repetition check against earlier completed chapters
    ledger = []
    for j in range(idx):
        prev = chap_texts[j]
        if not prev: continue
        if bigram_overlap(chapter_txt, prev) > 0.22:
            ledger.extend(top_trigrams(prev, k=10))
    ledger = list(dict.fromkeys(ledger))[:60]  # unique + cap

    if ledger:
        revised = revision_chain.invoke({
            "chapter": chapter_txt,
            "title": meta["title"],
            "description": meta["description"],
            "ledger": "; ".join(ledger)
        })
        chapter_txt = strip_think(revised)

    # Evaluate interestingness
    report_raw = eval_chain.invoke({"chapter": chapter_txt})
    report_txt = strip_think(report_raw)
    data = None
    try:
        # extract last JSON-ish block
        m = re.search(r"\{[\s\S]*\}\s*$", report_txt)
        if m:
            data = json.loads(m.group(0))
    except Exception:
        data = None

    # Punch-up if needed
    if data and "scores" in data:
        scores = data["scores"]
        try:
            avg_score = sum(float(scores[k]) for k in scores)/len(scores)
        except Exception:
            avg_score = 10.0
        if avg_score < 7.5:
            edits = " | ".join(data.get("three_micro_edits", [])) or "Tighten weak lines; add fresh sensory detail; remove clichés."
            punched = punchup_chain.invoke({"chapter": chapter_txt, "edits": edits})
            chapter_txt = strip_think(punched)

    return chapter_txt, data

from concurrent.futures import ThreadPoolExecutor, as_completed
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = { ex.submit(write_one, i): i for i in range(len(outline)) }
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Chapters"):
        idx = futures[fut]
        try:
            ch_txt, notes = fut.result()
        except Exception as e:
            ch_txt, notes = f"[Generation failed: {e}]", None
        chap_texts[idx] = ch_txt
        editor_notes[idx] = notes

# ─────────────────────────────────────────────────────────────────────────────
# 7) Save to Word + Download (includes Editor’s Notes appendix)
# ─────────────────────────────────────────────────────────────────────────────
doc = docx.Document()
doc.add_heading(BOOK_TITLE, 0)
doc.add_paragraph(f"Seed idea: {SEED_IDEA}")

# Character Development Section
if characters:
    doc.add_page_break()
    doc.add_heading("Character Development", level=1)
    for c in characters:
        name = c.get("name","(Unnamed)")
        role = c.get("role","")
        arc  = c.get("development_arc","")
        doc.add_heading(name, level=2)
        if role: doc.add_paragraph(f"Role: {role}")
        if arc:  doc.add_paragraph(arc)

# Chapters
for i, (meta, text) in enumerate(zip(outline, chap_texts), start=1):
    doc.add_page_break()
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=1)
    doc.add_paragraph(meta['description'], style="Intense Quote")
    doc.add_paragraph((text or "").strip())

# Editor’s Notes (Auto-Eval)
doc.add_page_break()
doc.add_heading("Editor’s Notes (Auto-Eval)", level=1)
for i, (meta, notes) in enumerate(zip(outline, editor_notes), start=1):
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=2)
    if not notes or "scores" not in notes:
        doc.add_paragraph("No evaluation available.")
        continue
    scores = notes["scores"]
    one_liner = notes.get("one_sentence_note","")
    edits = notes.get("three_micro_edits", [])
    try:
        avg_score = sum(float(scores[k]) for k in scores)/len(scores)
    except Exception:
        avg_score = None
    doc.add_paragraph("Scores: " + ", ".join(f"{k}: {scores[k]}" for k in scores))
    if avg_score is not None:
        doc.add_paragraph(f"Average: {avg_score:.2f}")
    if one_liner:
        doc.add_paragraph(f"Note: {one_liner}")
    if edits:
        for e in edits:
            doc.add_paragraph(f"• {e}")

fn = BOOK_TITLE.replace(" ", "_") + ".docx"
doc.save(fn)
print(f"📘 Saved {fn}")
files.download(fn)


✅ Ollama status: 200
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l
→ Generating 70-chapter outline in chunks of ~10…
✔ Final outline: 70 chapters

→ Generating 8 characters…
✔ Got 8 characters

→ Generating chapters in parallel…


Chapters: 100%|██████████| 70/70 [18:07:15<00:00, 931.93s/it]


📘 Saved Artificial_Influencers_2.docx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 0) Colab Setup: install & launch Ollama
# ─────────────────────────────────────────────────────────────────────────────
!pip install --quiet langchain-ollama python-docx tqdm

import os, threading, subprocess, time, requests
from google.colab import files

# Unset any OpenAI/LiteLLM envs that could hijack LangChain
for v in [
    "OPENAI_API_KEY",
    "LITELLM_PROVIDER", "LITELLM_MODEL", "LITELLM_BASE_URL",
    "LITELL M_PROVIDER", "LITELL M_MODEL", "LITELL M_BASE_URL"  # catch stray typos
]:
    os.environ.pop(v, None)

os.environ["OLLAMA_HOST"]    = "127.0.0.1:11434"
os.environ["OLLAMA_ORIGINS"] = "*"

# Install & start Ollama
!curl -fsSL https://ollama.com/install.sh -o install.sh
!bash install.sh >/dev/null 2>&1 || true

def _serve_ollama():
    subprocess.Popen(["ollama","serve"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)

threading.Thread(target=_serve_ollama, daemon=True).start()
time.sleep(8)
print("✅ Ollama status:", requests.get("http://127.0.0.1:11434").status_code)

# Pull models: planner/evaluator (DeepSeek-R1) and writer (Llama 3.1)
!ollama pull deepseek-r1:1.5b
!ollama pull llama3.1:8b

# ─────────────────────────────────────────────────────────────────────────────
# 1) Imports & Inputs
# ─────────────────────────────────────────────────────────────────────────────
import json, re
from typing import List, Any, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from tqdm import tqdm
import docx

from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate  # modern import

# ─────────────────────────────────────────────────────────────────────────────
# User parameters
# ─────────────────────────────────────────────────────────────────────────────
NUM_CH       = 20  # set 70 when you're happy with results
SEED_IDEA    = ("Dr. Lena Park — a brilliant but introverted data scientist at Datum, "
                "a social-media analytics startup. She notices impossible engagement "
                "patterns in a rising star; her investigation unravels a conspiracy "
                "of AI-driven “influencers” masquerading as humans—and she must decide "
                "whether to expose the truth or risk blowing up the platform.")
BOOK_TITLE   = "Artificial Influencers 2"
MODE         = "fiction"  # or "philosophy"

# ─────────────────────────────────────────────────────────────────────────────
# 2) LLM & Prompts (using the | operator, not LLMChain)
# ─────────────────────────────────────────────────────────────────────────────
planner_llm = OllamaLLM(
    model="deepseek-r1:1.5b",
    base_url="http://127.0.0.1:11434",
    temperature=0.3,  # lower ⇒ stricter structure for NDJSON
)
writer_llm  = OllamaLLM(
    model="llama3.1:8b",
    base_url="http://127.0.0.1:11434",
    temperature=0.8,
)

# NDJSON prompts (MUCH easier to parse than JSON arrays)
outline_prompt = PromptTemplate(
    input_variables=["topic","count"],
    template=(
"You are a creative fiction author. Generate exactly {count} UNIQUE chapter seeds "
"for the book below. OUTPUT FORMAT: **NDJSON**, one compact JSON object per line, "
"no leading numbering, no extra text, no trailing commas, no code fences.\n\n"
"Each line MUST be like:\n"
'{{\"title\":\"...\",\"description\":\"...\"}}\\n'
"\nBook idea:\n{topic}\n"
    )
)

character_prompt = PromptTemplate(
    input_variables=["outline","num_chars"],
    template=(
"Given this chapter outline (JSON list):\n{outline}\n\n"
"Create exactly {num_chars} main characters.\n"
"OUTPUT FORMAT: **NDJSON**, one compact JSON object per line (no array, no extra text):\n"
'{{\"name\":\"...\",\"role\":\"...\",\"development_arc\":\"...\"}}'
    )
)

chapter_prompt = PromptTemplate(
    input_variables=["title","description","idea"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Chapter description: "{description}"

Constraints:
- NO meta commentary, NO analysis of your process, NO decision making.
- Assume the reader remembers prior chapters; do not re-explain backstory.
- Maintain continuity, but introduce at least one fresh obstacle, one vivid sensory beat, and one believable surprise.
- Use concrete, precise details; avoid clichés.
- End with a small but real unresolved tension.
Return TEXT ONLY.
"""
    )
)

REVISION_PROMPT = PromptTemplate(
    input_variables=["chapter","title","description","ledger"],
    template=(
"""Revise the chapter to reduce repetition with earlier chapters while improving novelty and tension.
Keep the same characters and continuity, but change scene dynamics, setting details, and micro-beats.

Rules:
- Do NOT re-explain backstory already known.
- Add at least one fresh obstacle, one specific sensory detail, and one surprising but plausible turn.
- Preserve voice and POV.
Return TEXT ONLY.

Title: {title}
Description: {description}

Do-not-repeat ledger (phrases/scenes to avoid): {ledger}

Chapter draft:
{chapter}
"""
    )
)

# NOTE: braces escaped below ({{ … }}) so LangChain doesn't treat them as variables
EVAL_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""You are a tough fiction editor. Rate the chapter (1–10) on:
- pacing
- tension
- voice
- imagery
- dialogue
- novelty

Return STRICT JSON only:
{{"scores":{{"pacing":x,"tension":x,"voice":x,"imagery":x,"dialogue":x,"novelty":x}},"one_sentence_note":"...","three_micro_edits":["...","...","..."]}}

Chapter:
{chapter}
"""
    )
)

PUNCHUP_PROMPT = PromptTemplate(
    input_variables=["chapter","edits"],
    template=(
"""Apply these micro-edits to strengthen the chapter without changing the plot:
- {edits}

Rules:
- Keep POV, continuity, and length roughly the same (±10%).
- Add concrete sensory details.
- Tighten weak sentences; remove clichés.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

# build chains with the pipe operator (no deprecation warnings)
outline_chain   = outline_prompt   | planner_llm
character_chain = character_prompt | planner_llm
chapter_chain   = chapter_prompt   | writer_llm
revision_chain  = REVISION_PROMPT  | writer_llm
eval_chain      = EVAL_PROMPT      | planner_llm
punchup_chain   = PUNCHUP_PROMPT   | writer_llm

# ─────────────────────────────────────────────────────────────────────────────
# 3) Utilities: strip <think>…</think>, parse NDJSON, similarity guards
# ─────────────────────────────────────────────────────────────────────────────
THINK_RE = re.compile(r"<think>.*?</think>\s*", flags=re.S|re.I)
FENCE_RE = re.compile(r"```(?:json)?|```", flags=re.I)

def strip_think(x: Any) -> str:
    s = x["text"] if isinstance(x, dict) and "text" in x else str(x)
    s = THINK_RE.sub("", s)
    s = FENCE_RE.sub("", s)
    return s.strip()

def parse_ndjson(text: str, expected: int = None) -> List[dict]:
    out = []
    for ln in text.splitlines():
        ln = ln.strip()
        if not ln:
            continue
        # ignore accidental bullets/numbering
        if ln[:2] in ("- ", "* "):
            ln = ln[2:].strip()
        if ln and ln[0].isdigit() and ln.lstrip().split(" ",1)[0].rstrip(".").isdigit():
            ln = re.sub(r"^\d+\.\s*", "", ln)  # "1. { ... }" → "{ ... }"
        try:
            obj = json.loads(ln)
            if isinstance(obj, dict):
                out.append(obj)
        except json.JSONDecodeError:
            # tiny auto-fix: smart quotes / trailing commas
            fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
            fix = re.sub(r",\s*}", "}", fix)
            fix = re.sub(r",\s*]", "]", fix)
            try:
                obj = json.loads(fix)
                if isinstance(obj, dict):
                    out.append(obj)
            except Exception:
                continue
    # we allow best-effort even if count != expected
    return out

def jaccard(a: str, b: str) -> float:
    A = set(re.findall(r"[a-z0-9']+", a.lower()))
    B = set(re.findall(r"[a-z0-9']+", b.lower()))
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

def too_similar(ch1: Dict[str,str], ch2: Dict[str,str]) -> bool:
    t_sim = jaccard(ch1["title"], ch2["title"])
    d_sim = jaccard(ch1["description"], ch2["description"])
    return (t_sim > 0.65) or (t_sim > 0.45 and d_sim > 0.55)

def bigram_overlap(a: str, b: str) -> float:
    def bigrams(s):
        toks = re.findall(r"[a-z0-9']+", s.lower())
        return set(zip(toks, toks[1:])) if len(toks) > 1 else set()
    A, B = bigrams(a), bigrams(b)
    denom = len(A | B) if (A or B) else 1
    return len(A & B) / denom

def top_trigrams(text: str, k: int = 30) -> List[str]:
    toks = re.findall(r"[a-z0-9']+", text.lower())
    tris = Counter(zip(toks, toks[1:], toks[2:]))
    return [" ".join(t) for t,_ in tris.most_common(k)]

# ─────────────────────────────────────────────────────────────────────────────
# 4) Outline generation with NDJSON + retry + strict dedupe
# ─────────────────────────────────────────────────────────────────────────────
print(f"→ Generating {NUM_CH}-chapter outline in chunks of ~10…")
outline, seen_titles = [], set()
chunk = 10
attempts = 0
MAX_ATTEMPTS = 40

while len(outline) < NUM_CH and attempts < MAX_ATTEMPTS:
    ask = min(chunk, NUM_CH - len(outline))
    raw = outline_chain.invoke({"topic": SEED_IDEA, "count": ask})
    attempts += 1
    lines = parse_ndjson(strip_think(raw), expected=ask)
    if not lines:
        print("⚠️ Outline chunk parse failed (empty); retrying…")
        continue
    added = 0
    for ch in lines:
        cand = {
            "title": (ch.get("title") or "").strip(),
            "description": (ch.get("description") or "").strip()
        }
        if not cand["title"] or not cand["description"]:
            continue
        if cand["title"] in seen_titles:
            continue
        if any(too_similar(cand, existing) for existing in outline):
            continue
        outline.append(cand)
        seen_titles.add(cand["title"])
        added += 1
        if len(outline) >= NUM_CH:
            break
    if added == 0:
        print("ℹ️ No new unique chapters from this chunk; retrying…")

print(f"✔ Final outline: {len(outline)} chapters\n")
if len(outline) < NUM_CH:
    print("⚠️ Could not reach target count; proceeding with what we have.")

# ─────────────────────────────────────────────────────────────────────────────
# 5) Character Bible (NDJSON + retry)
# ─────────────────────────────────────────────────────────────────────────────
NUM_CHAR = max(3, min(10, NUM_CH//8))
print(f"→ Generating {NUM_CHAR} characters…")
characters = []
for _ in range(6):  # a few retries if parsing fails
    raw_chars = character_chain.invoke({
        "outline": json.dumps(outline, ensure_ascii=False),
        "num_chars": NUM_CHAR
    })
    characters = parse_ndjson(strip_think(raw_chars), expected=NUM_CHAR)
    if len(characters) == NUM_CHAR:
        break
print(f"✔ Got {len(characters)} characters\n")

# ─────────────────────────────────────────────────────────────────────────────
# 6) Chapter Generation (+ repetition revision + eval + punch-up) with guards
# ─────────────────────────────────────────────────────────────────────────────
print("→ Generating chapters in parallel…")
chap_texts = [None]*len(outline)
editor_notes = [None]*len(outline)

MAX_WORKERS = 3  # adjust if your session is slower/faster

def write_one(idx):
    meta = outline[idx]
    # First draft
    res = chapter_chain.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "idea": SEED_IDEA
    })
    chapter_txt = strip_think(res)

    # Repetition check against earlier completed chapters
    ledger = []
    for j in range(idx):
        prev = chap_texts[j]
        if not prev: continue
        if bigram_overlap(chapter_txt, prev) > 0.22:
            ledger.extend(top_trigrams(prev, k=10))
    ledger = list(dict.fromkeys(ledger))[:60]  # unique + cap

    if ledger:
        revised = revision_chain.invoke({
            "chapter": chapter_txt,
            "title": meta["title"],
            "description": meta["description"],
            "ledger": "; ".join(ledger)
        })
        chapter_txt = strip_think(revised)

    # Evaluate interestingness (GUARDED)
    data = None
    try:
        report_raw = eval_chain.invoke({"chapter": chapter_txt})
        report_txt = strip_think(report_raw)
        m = re.search(r"\{[\s\S]*\}\s*$", report_txt)
        if m:
            data = json.loads(m.group(0))
        if data and "scores" in data:
            scores = data["scores"]
            avg_score = sum(float(scores[k]) for k in scores)/len(scores)
            if avg_score < 7.5:
                edits = " | ".join(data.get("three_micro_edits", [])) or \
                        "Tighten weak lines; add fresh sensory detail; remove clichés."
                punched = punchup_chain.invoke({"chapter": chapter_txt, "edits": edits})
                chapter_txt = strip_think(punched)
    except Exception:
        data = None  # keep chapter as-is if eval/punch-up fails

    return chapter_txt, data

from concurrent.futures import ThreadPoolExecutor, as_completed
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = { ex.submit(write_one, i): i for i in range(len(outline)) }
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Chapters"):
        idx = futures[fut]
        try:
            ch_txt, notes = fut.result()
        except Exception as e:
            ch_txt, notes = f"[Generation failed: {e}]", None
        chap_texts[idx] = ch_txt
        editor_notes[idx] = notes

# ─────────────────────────────────────────────────────────────────────────────
# 7) Save to Word + Download (includes Editor’s Notes appendix)
# ─────────────────────────────────────────────────────────────────────────────
doc = docx.Document()
doc.add_heading(BOOK_TITLE, 0)
doc.add_paragraph(f"Seed idea: {SEED_IDEA}")

# Character Development Section
if characters:
    doc.add_page_break()
    doc.add_heading("Character Development", level=1)
    for c in characters:
        name = c.get("name","(Unnamed)")
        role = c.get("role","")
        arc  = c.get("development_arc","")
        doc.add_heading(name, level=2)
        if role: doc.add_paragraph(f"Role: {role}")
        if arc:  doc.add_paragraph(arc)

# Chapters
for i, (meta, text) in enumerate(zip(outline, chap_texts), start=1):
    doc.add_page_break()
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=1)
    doc.add_paragraph(meta['description'], style="Intense Quote")
    doc.add_paragraph((text or "").strip())

# Editor’s Notes (Auto-Eval)
doc.add_page_break()
doc.add_heading("Editor’s Notes (Auto-Eval)", level=1)
for i, (meta, notes) in enumerate(zip(outline, editor_notes), start=1):
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=2)
    if not notes or "scores" not in notes:
        doc.add_paragraph("No evaluation available.")
        continue
    scores = notes["scores"]
    one_liner = notes.get("one_sentence_note","")
    edits = notes.get("three_micro_edits", [])
    try:
        avg_score = sum(float(scores[k]) for k in scores)/len(scores)
    except Exception:
        avg_score = None
    doc.add_paragraph("Scores: " + ", ".join(f"{k}: {scores[k]}" for k in scores))
    if avg_score is not None:
        doc.add_paragraph(f"Average: {avg_score:.2f}")
    if one_liner:
        doc.add_paragraph(f"Note: {one_liner}")
    if edits:
        for e in edits:
            doc.add_paragraph(f"• {e}")

fn = BOOK_TITLE.replace(" ", "_") + ".docx"
doc.save(fn)
print(f"📘 Saved {fn}")
files.download(fn)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/253.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m245.8/253.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Ollama status: 200
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026

Chapters: 100%|██████████| 20/20 [11:44:45<00:00, 2114.26s/it]


📘 Saved Artificial_Influencers_2.docx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#This verson has knobs to make it better maybe

# ─────────────────────────────────────────────────────────────────────────────
# 0) Colab Setup: install & launch Ollama (planner/evaluator + writer)
# ─────────────────────────────────────────────────────────────────────────────
!pip install --quiet langchain-ollama python-docx tqdm

import os, threading, subprocess, time, requests, json, re
from typing import List, Any, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from tqdm import tqdm
import docx
from google.colab import files

# Unset any OpenAI/LiteLLM envs that could hijack LangChain
for v in [
    "OPENAI_API_KEY",
    "LITELLM_PROVIDER", "LITELLM_MODEL", "LITELLM_BASE_URL",
    "LITELL M_PROVIDER", "LITELL M_MODEL", "LITELL M_BASE_URL"  # catch stray typos
]:
    os.environ.pop(v, None)

os.environ["OLLAMA_HOST"]    = "127.0.0.1:11434"
os.environ["OLLAMA_ORIGINS"] = "*"

# Install & start Ollama
!curl -fsSL https://ollama.com/install.sh -o install.sh
!bash install.sh >/dev/null 2>&1 || true

def _serve_ollama():
    subprocess.Popen(["ollama","serve"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)

threading.Thread(target=_serve_ollama, daemon=True).start()
time.sleep(8)
print("✅ Ollama status:", requests.get("http://127.0.0.1:11434").status_code)

# Pull models: planner/evaluator (DeepSeek-R1) and writer (Llama 3.1)
!ollama pull deepseek-r1:1.5b
!ollama pull llama3.1:8b

# ─────────────────────────────────────────────────────────────────────────────
# 1) Imports (LangChain) & User Inputs
# ─────────────────────────────────────────────────────────────────────────────
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate  # modern import

# ─────────────────────────────────────────────────────────────────────────────
# User parameters
# ─────────────────────────────────────────────────────────────────────────────
NUM_CH       = 20  # set 70 when you're happy with results
SEED_IDEA    = ("This idea is for a technothriller: Dr. Lena Park — a brilliant but introverted data scientist at Datum, "
                "a social-media analytics startup. She notices impossible engagement "
                "patterns in a rising star; her investigation unravels a conspiracy "
                "of AI-driven “influencers” masquerading as humans—and she must decide "
                "whether to expose the truth or risk blowing up the platform.")
BOOK_TITLE   = "Artificial Influencers 2"
MODE         = "fiction"  # or "philosophy"

# ─────────────────────────────────────────────────────────────────────────────
# 2) LLMs & Core Prompts (using the | operator, not LLMChain)
# ─────────────────────────────────────────────────────────────────────────────
planner_llm = OllamaLLM(
    model="deepseek-r1:1.5b",
    base_url="http://127.0.0.1:11434",
    temperature=0.3,  # lower ⇒ stricter structure for NDJSON/JSON
)
writer_llm  = OllamaLLM(
    model="llama3.1:8b",
    base_url="http://127.0.0.1:11434",
    temperature=0.8,
)

# NDJSON prompts (MUCH easier to parse than JSON arrays)
outline_prompt = PromptTemplate(
    input_variables=["topic","count"],
    template=(
"You are a creative fiction author. Generate exactly {count} UNIQUE chapter seeds "
"for the book below. OUTPUT FORMAT: **NDJSON**, one compact JSON object per line, "
"no leading numbering, no extra text, no trailing commas, no code fences.\n\n"
"Each line MUST be like:\n"
'{{\"title\":\"...\",\"description\":\"...\"}}\\n'
"\nBook idea:\n{topic}\n"
    )
)

character_prompt = PromptTemplate(
    input_variables=["outline","num_chars"],
    template=(
"Given this chapter outline (JSON list):\n{outline}\n\n"
"Create exactly {num_chars} main characters.\n"
"OUTPUT FORMAT: **NDJSON**, one compact JSON object per line (no array, no extra text):\n"
'{{\"name\":\"...\",\"role\":\"...\",\"development_arc\":\"...\"}}'
    )
)

chapter_prompt = PromptTemplate(
    input_variables=["title","description","idea"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Chapter description: "{description}"

Constraints:
- NO meta commentary, NO analysis of your process, NO decision making.
- Assume the reader remembers prior chapters; do not re-explain backstory.
- Maintain continuity, but introduce at least one fresh obstacle, one vivid sensory beat, and one believable surprise.
- Use concrete, precise details; avoid clichés.
- End with a small but real unresolved tension.
Return TEXT ONLY.
"""
    )
)

REVISION_PROMPT = PromptTemplate(
    input_variables=["chapter","title","description","ledger"],
    template=(
"""Revise the chapter to reduce repetition with earlier chapters while improving novelty and tension.
Keep the same characters and continuity, but change scene dynamics, setting details, and micro-beats.

Rules:
- Do NOT re-explain backstory already known.
- Add at least one fresh obstacle, one specific sensory detail, and one surprising but plausible turn.
- Preserve voice and POV.
Return TEXT ONLY.

Title: {title}
Description: {description}

Do-not-repeat ledger (phrases/scenes to avoid): {ledger}

Chapter draft:
{chapter}
"""
    )
)

# NOTE: braces escaped below ({{ … }}) so LangChain doesn't treat them as variables
EVAL_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""You are a tough fiction editor. Rate the chapter (1–10) on:
- pacing
- tension
- voice
- imagery
- dialogue
- novelty

Return STRICT JSON only:
{{"scores":{{"pacing":x,"tension":x,"voice":x,"imagery":x,"dialogue":x,"novelty":x}},"one_sentence_note":"...","three_micro_edits":["...","...","..."]}}

Chapter:
{chapter}
"""
    )
)

PUNCHUP_PROMPT = PromptTemplate(
    input_variables=["chapter","edits"],
    template=(
"""Apply these micro-edits to strengthen the chapter without changing the plot:
- {edits}

Rules:
- Keep POV, continuity, and length roughly the same (±10%).
- Add concrete sensory details.
- Tighten weak sentences; remove clichés.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

# ─────────────────────────────────────────────────────────────────────────────
# 2.a THEME/MOTIF bible, BEATS planner, dialogue tuner, anti-cliché, marketing
# ─────────────────────────────────────────────────────────────────────────────
THEME_PROMPT = PromptTemplate(
    input_variables=["topic","outline"],
    template=(
"""You are a development editor. From the seed idea and outline, extract:
- 3–5 core THEMES (short phrases)
- 6–12 recurring MOTIFS or props (e.g., "glitch-art", "rain-soaked streets")
- 3–6 PROMISES to the reader (e.g., "ethical tension", "reveal of hidden mastermind")
- A one-sentence LOGLINE pitched to a general audience
- Genre expectations to signal with setting & imagery (GENRE_SIGNALS, 5–8 phrases)

Return STRICT JSON:
{{
  "themes": ["..."],
  "motifs": ["..."],
  "promises": ["..."],
  "logline": "...",
  "genre_signals": ["..."]
}}

Seed idea: {topic}
Outline: {outline}
"""
    )
)

BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","theme_bible","motif_ledger"],
    template=(
"""Plan a tight beat sheet for the chapter with approx. 8–12 beats following a tension curve.
Enforce: strong opening hook line, mid-chapter reversal, and a cliffhanger/stinger.

Constraints:
- Integrate 1–2 MOTIFS from motif_ledger (callbacks) and 1 FRESH setting detail aligned to genre_signals.
- Target dialogue ratio: 30–45% of lines contain dialogue.
- Specify a SENSORY PALETTE (2–3 senses to emphasize).
- Map an EMOTION ARC over beats (e.g., curiosity → dread → resolve).

Return STRICT JSON:
{{
  "beats": [
    {{"name":"Hook","goal":"...","conflict":"...","setting":"...","emotion":"..."}},
    ...
  ],
  "dialogue_target_pct": 0.38,
  "sensory_palette": ["sound","smell"],
  "foreshadow":"...",
  "callback_motif":"..."
}}

Title: {title}
Description: {description}
THEME_BIBLE: {theme_bible}
MOTIF_LEDGER: {motif_ledger}
"""
    )
)

CHAPTER_WITH_BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","idea","plan","themes","sensory_palette","dialogue_target"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".

Seed idea: {idea}
Mini-brief: {description}
Plan (beats): {plan}

Must do:
- OPEN with a punchy 1–2 sentence HOOK that raises a concrete question.
- Emphasize SENSORY PALETTE: {sensory_palette}
- Aim for DIALOGUE DENSITY ≈ {dialogue_target:.2f} (about 30–45% lines include dialogue).
- Integrate 1 motif or prop from the plan naturally.
- Midpoint reversal that reframes stakes.
- END with a plausible CLIFFHANGER/STINGER (no meta).

Style:
- Concrete details, crisp verbs; avoid clichés.
- Maintain POV and continuity; no backstory dumps.

Themes to subtly reinforce: {themes}
Return TEXT ONLY.
"""
    )
)

DIALOGUE_TUNER_PROMPT = PromptTemplate(
    input_variables=["chapter","target"],
    template=(
"""Revise the chapter to adjust dialogue density to ≈ {target:.2f} (±0.08).
Keep plot and beats intact. Do not shorten by more than 10% or lengthen by more than 10%.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

DECLICHE_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Rewrite at sentence-level to remove clichés, generic metaphors, and filler.
Replace with specific, concrete imagery fitting a near-future techno-thriller vibe.
Preserve plot, POV, beats, and length (±5%). Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

MOTIF_MINER_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Extract 1–3 recurring motifs/props/images present in this chapter (short noun phrases).
Return STRICT JSON: {{"motifs":["..."]}}

Chapter:
{chapter}
"""
    )
)

BLURB_PROMPT = PromptTemplate(
    input_variables=["title","logline","themes","promises"],
    template=(
"""Write:
1) A high-impact back-cover BLURB (120–160 words) using the logline and themes.
2) A 1-sentence hook for retailer product pages.
3) 3 short BookTok/Bookstagram snippets (≤140 chars each).

Return STRICT JSON:
{{
  "blurb":"...",
  "product_hook":"...",
  "snippets": ["...","...","..."]
}}

Title: {title}
Logline: {logline}
Themes: {themes}
Promises: {promises}
"""
    )
)

# Build chains with the pipe operator
outline_chain     = outline_prompt               | planner_llm
character_chain   = character_prompt             | planner_llm
chapter_chain     = chapter_prompt               | writer_llm
revision_chain    = REVISION_PROMPT              | writer_llm
eval_chain        = EVAL_PROMPT                  | planner_llm
punchup_chain     = PUNCHUP_PROMPT               | writer_llm

theme_chain       = THEME_PROMPT                 | planner_llm
beats_chain       = BEATS_PROMPT                 | planner_llm
chapter_beats_llm = CHAPTER_WITH_BEATS_PROMPT    | writer_llm
dialogue_tuner    = DIALOGUE_TUNER_PROMPT        | writer_llm
decliche_chain    = DECLICHE_PROMPT              | writer_llm
motif_miner       = MOTIF_MINER_PROMPT           | planner_llm
blurb_chain       = BLURB_PROMPT                 | planner_llm

# ─────────────────────────────────────────────────────────────────────────────
# 3) Utilities: strip <think>…</think>, parse NDJSON/JSON, similarity guards
# ─────────────────────────────────────────────────────────────────────────────
THINK_RE = re.compile(r"<think>.*?</think>\s*", flags=re.S|re.I)
FENCE_RE = re.compile(r"```(?:json)?|```", flags=re.I)

def strip_think(x: Any) -> str:
    s = x["text"] if isinstance(x, dict) and "text" in x else str(x)
    s = THINK_RE.sub("", s)
    s = FENCE_RE.sub("", s)
    return s.strip()

def parse_ndjson(text: str, expected: int = None) -> List[dict]:
    out = []
    for ln in text.splitlines():
        ln = ln.strip()
        if not ln:
            continue
        # ignore accidental bullets/numbering
        if ln[:2] in ("- ", "* "):
            ln = ln[2:].strip()
        if ln and ln[0].isdigit() and ln.lstrip().split(" ",1)[0].rstrip(".").isdigit():
            ln = re.sub(r"^\d+\.\s*", "", ln)  # "1. { ... }" → "{ ... }"
        try:
            obj = json.loads(ln)
            if isinstance(obj, dict):
                out.append(obj)
        except json.JSONDecodeError:
            # tiny auto-fix: smart quotes / trailing commas
            fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
            fix = re.sub(r",\s*}", "}", fix)
            fix = re.sub(r",\s*]", "]", fix)
            try:
                obj = json.loads(fix)
                if isinstance(obj, dict):
                    out.append(obj)
            except Exception:
                continue
    return out

def jaccard(a: str, b: str) -> float:
    A = set(re.findall(r"[a-z0-9']+", a.lower()))
    B = set(re.findall(r"[a-z0-9']+", b.lower()))
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

def too_similar(ch1: Dict[str,str], ch2: Dict[str,str]) -> bool:
    t_sim = jaccard(ch1["title"], ch2["title"])
    d_sim = jaccard(ch1["description"], ch2["description"])
    return (t_sim > 0.65) or (t_sim > 0.45 and d_sim > 0.55)

def bigram_overlap(a: str, b: str) -> float:
    def bigrams(s):
        toks = re.findall(r"[a-z0-9']+", s.lower())
        return set(zip(toks, toks[1:])) if len(toks) > 1 else set()
    A, B = bigrams(a), bigrams(b)
    denom = len(A | B) if (A or B) else 1
    return len(A & B) / denom

def top_trigrams(text: str, k: int = 30) -> List[str]:
    toks = re.findall(r"[a-z0-9']+", text.lower())
    tris = Counter(zip(toks, toks[1:], toks[2:]))
    return [" ".join(t) for t,_ in tris.most_common(k)]

# 3.a Dialogue ratio & strict JSON helper
def approx_dialogue_ratio(text: str) -> float:
    # crude but robust: count lines with quotes or leading em-dash dialogue
    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
    if not lines: return 0.0
    dial = sum(1 for ln in lines if re.search(r'["“”]|^—', ln))
    return dial / max(1, len(lines))

def parse_strict_json(s: str) -> dict:
    s = strip_think(s)
    m = re.search(r"\{[\s\S]*\}\s*$", s)
    if not m: return {}
    try:
        return json.loads(m.group(0))
    except Exception:
        # lenient fixes
        fix = m.group(0).replace("“","\"").replace("”","\"").replace("’","'")
        fix = re.sub(r",\s*}", "}", fix)
        fix = re.sub(r",\s*]", "]", fix)
        try: return json.loads(fix)
        except Exception: return {}

# ─────────────────────────────────────────────────────────────────────────────
# 4) Outline generation with NDJSON + retry + strict dedupe
# ─────────────────────────────────────────────────────────────────────────────
print(f"→ Generating {NUM_CH}-chapter outline in chunks of ~10…")
outline, seen_titles = [], set()
chunk = 10
attempts = 0
MAX_ATTEMPTS = 40

while len(outline) < NUM_CH and attempts < MAX_ATTEMPTS:
    ask = min(chunk, NUM_CH - len(outline))
    raw = outline_chain.invoke({"topic": SEED_IDEA, "count": ask})
    attempts += 1
    lines = parse_ndjson(strip_think(raw), expected=ask)
    if not lines:
        print("⚠️ Outline chunk parse failed (empty); retrying…")
        continue
    added = 0
    for ch in lines:
        cand = {
            "title": (ch.get("title") or "").strip(),
            "description": (ch.get("description") or "").strip()
        }
        if not cand["title"] or not cand["description"]:
            continue
        if cand["title"] in seen_titles:
            continue
        if any(too_similar(cand, existing) for existing in outline):
            continue
        outline.append(cand)
        seen_titles.add(cand["title"])
        added += 1
        if len(outline) >= NUM_CH:
            break
    if added == 0:
        print("ℹ️ No new unique chapters from this chunk; retrying…")

print(f"✔ Final outline: {len(outline)} chapters\n")
if len(outline) < NUM_CH:
    print("⚠️ Could not reach target count; proceeding with what we have.")

# ─────────────────────────────────────────────────────────────────────────────
# 5) Character Bible (NDJSON + retry) + Theme/Motif Bible
# ─────────────────────────────────────────────────────────────────────────────
NUM_CHAR = max(3, min(10, NUM_CH//8))
print(f"→ Generating {NUM_CHAR} characters…")
characters = []
for _ in range(6):  # a few retries if parsing fails
    raw_chars = character_chain.invoke({
        "outline": json.dumps(outline, ensure_ascii=False),
        "num_chars": NUM_CHAR
    })
    characters = parse_ndjson(strip_think(raw_chars), expected=NUM_CHAR)
    if len(characters) == NUM_CHAR:
        break
print(f"✔ Got {len(characters)} characters\n")

# 5.a Theme/Motif bible from outline
print("→ Building theme/motif bible…")
theme_raw = theme_chain.invoke({
    "topic": SEED_IDEA,
    "outline": json.dumps(outline, ensure_ascii=False)
})
THEME_BIBLE = parse_strict_json(theme_raw) or {
    "themes": [],
    "motifs": [],
    "promises": [],
    "logline": "",
    "genre_signals": []
}
MOTIF_LEDGER = list(THEME_BIBLE.get("motifs", []))  # seed with global motifs
print("✔ Theme bible ready.\n")

# ─────────────────────────────────────────────────────────────────────────────
# 6) Chapter Generation (+ beats + dialogue tuner + anti-cliché) with guards
# ─────────────────────────────────────────────────────────────────────────────
print("→ Generating chapters in parallel…")
chap_texts = [None]*len(outline)
editor_notes = [None]*len(outline)

MAX_WORKERS = 3  # adjust if your session is slower/faster
BIGRAM_THRESHOLD = 0.22  # repetition strictness

def write_one(idx):
    meta = outline[idx]

    # 1) Plan beats with callbacks to existing motif ledger
    plan_raw = beats_chain.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "theme_bible": json.dumps(THEME_BIBLE, ensure_ascii=False),
        "motif_ledger": json.dumps(MOTIF_LEDGER[-12:], ensure_ascii=False)  # recent motifs
    })
    plan = parse_strict_json(plan_raw)
    dialogue_target = float(plan.get("dialogue_target_pct", 0.36))
    sensory_palette = plan.get("sensory_palette", ["sight","sound"])
    plan_json = json.dumps(plan.get("beats", []), ensure_ascii=False)

    # 2) Draft with hooks, sensory palette, dialogue target
    res = chapter_beats_llm.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "idea": SEED_IDEA,
        "plan": plan_json,
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "sensory_palette": ", ".join(sensory_palette),
        "dialogue_target": dialogue_target
    })
    chapter_txt = strip_think(res)

    # 3) Repetition guard vs earlier chapters
    ledger = []
    for j in range(idx):
        prev = chap_texts[j]
        if not prev: continue
        if bigram_overlap(chapter_txt, prev) > BIGRAM_THRESHOLD:
            ledger.extend(top_trigrams(prev, k=10))
    ledger = list(dict.fromkeys(ledger))[:60]
    if ledger:
        revised = revision_chain.invoke({
            "chapter": chapter_txt,
            "title": meta["title"],
            "description": meta["description"],
            "ledger": "; ".join(ledger)
        })
        chapter_txt = strip_think(revised)

    # 4) Dialogue density tuner (±8%)
    dr = approx_dialogue_ratio(chapter_txt)
    if abs(dr - dialogue_target) > 0.08:
        tuned = dialogue_tuner.invoke({"chapter": chapter_txt, "target": dialogue_target})
        chapter_txt = strip_think(tuned)

    # 5) Anti-cliché polish (tight, specific imagery)
    polished = decliche_chain.invoke({"chapter": chapter_txt})
    chapter_txt = strip_think(polished)

    # 6) Auto-evaluation + optional punch-up
    data = None
    try:
        report_raw = eval_chain.invoke({"chapter": chapter_txt})
        report_txt = strip_think(report_raw)
        m = re.search(r"\{[\s\S]*\}\s*$", report_txt)
        if m:
            data = json.loads(m.group(0))
        if data and "scores" in data:
            scores = data["scores"]
            avg_score = sum(float(scores[k]) for k in scores)/len(scores)
            if avg_score < 7.5:
                edits = " | ".join(data.get("three_micro_edits", [])) or \
                        "Sharpen hooks; escalate mid-point reversal; add concrete sensory beats."
                punched = punchup_chain.invoke({"chapter": chapter_txt, "edits": edits})
                chapter_txt = strip_think(punched)
    except Exception:
        data = None

    # 7) Mine motifs from this chapter → update global ledger (for callbacks)
    try:
        mined_raw = motif_miner.invoke({"chapter": chapter_txt})
        mined = parse_strict_json(mined_raw)
        for m in (mined.get("motifs") or []):
            if m not in MOTIF_LEDGER:
                MOTIF_LEDGER.append(m)
    except Exception:
        pass

    return chapter_txt, data

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = { ex.submit(write_one, i): i for i in range(len(outline)) }
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Chapters"):
        idx = futures[fut]
        try:
            ch_txt, notes = fut.result()
        except Exception as e:
            ch_txt, notes = f"[Generation failed: {e}]", None
        chap_texts[idx] = ch_txt
        editor_notes[idx] = notes

# ─────────────────────────────────────────────────────────────────────────────
# 7) Save to Word + Back-cover copy + Download (includes Editor’s Notes)
# ─────────────────────────────────────────────────────────────────────────────
doc = docx.Document()
doc.add_heading(BOOK_TITLE, 0)
doc.add_paragraph(f"Seed idea: {SEED_IDEA}")

# Character Development Section
if characters:
    doc.add_page_break()
    doc.add_heading("Character Development", level=1)
    for c in characters:
        name = c.get("name","(Unnamed)")
        role = c.get("role","")
        arc  = c.get("development_arc","")
        doc.add_heading(name, level=2)
        if role: doc.add_paragraph(f"Role: {role}")
        if arc:  doc.add_paragraph(arc)

# Chapters
for i, (meta, text) in enumerate(zip(outline, chap_texts), start=1):
    doc.add_page_break()
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=1)
    doc.add_paragraph(meta['description'], style="Intense Quote")
    doc.add_paragraph((text or "").strip())

# Editor’s Notes (Auto-Eval)
doc.add_page_break()
doc.add_heading("Editor’s Notes (Auto-Eval)", level=1)
for i, (meta, notes) in enumerate(zip(outline, editor_notes), start=1):
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=2)
    if not notes or "scores" not in notes:
        doc.add_paragraph("No evaluation available.")
        continue
    scores = notes["scores"]
    one_liner = notes.get("one_sentence_note","")
    edits = notes.get("three_micro_edits", [])
    try:
        avg_score = sum(float(scores[k]) for k in scores)/len(scores)
    except Exception:
        avg_score = None
    doc.add_paragraph("Scores: " + ", ".join(f"{k}: {scores[k]}" for k in scores))
    if avg_score is not None:
        doc.add_paragraph(f"Average: {avg_score:.2f}")
    if one_liner:
        doc.add_paragraph(f"Note: {one_liner}")
    if edits:
        for e in edits:
            doc.add_paragraph(f"• {e}")

# 7.a Back-cover blurb + retailer hook + social snippets
blurb_data = {}
try:
    blurb_raw = blurb_chain.invoke({
        "title": BOOK_TITLE,
        "logline": THEME_BIBLE.get("logline",""),
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "promises": ", ".join(THEME_BIBLE.get("promises", []))
    })
    blurb_data = parse_strict_json(blurb_raw) or {}
except Exception:
    blurb_data = {}

doc.add_page_break()
doc.add_heading("Back-Cover Copy & Retailer Hook", level=1)
if blurb_data.get("blurb"):
    doc.add_paragraph(blurb_data["blurb"])
if blurb_data.get("product_hook"):
    doc.add_paragraph(f"\nRetailer Hook: {blurb_data['product_hook']}")
if blurb_data.get("snippets"):
    doc.add_heading("Short Social Snippets", level=2)
    for s in blurb_data["snippets"]:
        doc.add_paragraph(f"• {s}")

# Save & download
fn = BOOK_TITLE.replace(" ", "_") + ".docx"
doc.save(fn)
print(f"📘 Saved {fn}")
files.download(fn)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/253.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Ollama status: 200
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G

KeyboardInterrupt: 

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 0) Colab Setup: install & launch Ollama (with speed toggles & checkpointing)
# ─────────────────────────────────────────────────────────────────────────────
!pip install --quiet langchain-ollama python-docx tqdm

import os, threading, subprocess, time, requests, json, re, shutil, pathlib, math
from typing import List, Any, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from tqdm import tqdm
import docx
from google.colab import files

# Avoid LangChain provider hijacks
for v in [
    "OPENAI_API_KEY",
    "LITELLM_PROVIDER", "LITELLM_MODEL", "LITELLM_BASE_URL",
    "LITELL M_PROVIDER", "LITELL M_MODEL", "LITELL M_BASE_URL"  # catch stray typos
]:
    os.environ.pop(v, None)

# —— Speed/robustness knobs ——
FAST_MODE = True                 # Flip to False when you're happy with outputs
CHECKPOINT_DIR = "book_ckpt"     # Saves each chapter as it completes
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# Make Ollama conservative about parallelism in Colab
os.environ["OLLAMA_MAX_LOADED_MODELS"] = "1"
os.environ["OLLAMA_NUM_PARALLEL"] = "1"

# Launch Ollama daemon
os.environ["OLLAMA_HOST"]    = "127.0.0.1:11434"
os.environ["OLLAMA_ORIGINS"] = "*"

!curl -fsSL https://ollama.com/install.sh -o install.sh
!bash install.sh >/dev/null 2>&1 || true

def _serve_ollama():
    subprocess.Popen(["ollama","serve"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)

threading.Thread(target=_serve_ollama, daemon=True).start()
time.sleep(8)
print("✅ Ollama health:", requests.get("http://127.0.0.1:11434").status_code)

# GPU detection to size workers
def _has_gpu():
    try:
        return shutil.which("nvidia-smi") and (subprocess.run(["nvidia-smi"], capture_output=True).returncode==0)
    except Exception:
        return False
HAS_GPU = bool(_has_gpu())
print("🖥️ GPU:", "available" if HAS_GPU else "not detected")

# ─────────────────────────────────────────────────────────────────────────────
# 1) User parameters (auto size by target pages → chapters)
# ─────────────────────────────────────────────────────────────────────────────
BOOK_TITLE   = "Artificial Influencers 2"
MODE         = "fiction"  # or "nonfiction"

# Either set an explicit target page count, or leave None to auto-center the profile
TARGET_PAGES = 320 if MODE == "fiction" else 280

GENRE_PROFILE = {
    "fiction":   {"pages_min": 280, "pages_max": 360, "chapter_words_typical": (2400, 3600)},
    "nonfiction":{"pages_min": 220, "pages_max": 320, "chapter_words_typical": (3000, 4500)}
}[MODE]

if TARGET_PAGES is None:
    TARGET_PAGES = (GENRE_PROFILE["pages_min"] + GENRE_PROFILE["pages_max"]) // 2

# Words-per-page assumption (trade paperback-ish)
WORDS_PER_PAGE = 275
TARGET_WORDS   = int(TARGET_PAGES * WORDS_PER_PAGE)

# Choose a target chapter length (midpoint of typical band), and derive initial chapter count:
CH_MIN, CH_MAX = GENRE_PROFILE["chapter_words_typical"]
CH_TARGET_WORDS = int((CH_MIN + CH_MAX) / 2)
NUM_CH = max(12, min(80, (TARGET_WORDS + CH_TARGET_WORDS - 1) // CH_TARGET_WORDS))

# Your story seed:
SEED_IDEA = (
    "idea for technothriller: Dr. Lena Park — a brilliant but introverted data scientist at Datum, "
    "a social-media analytics startup. She notices impossible engagement "
    "patterns in a rising star; her investigation unravels a conspiracy "
    "of AI-driven 'influencers' masquerading as humans—and she must decide "
    "whether to expose the truth or risk blowing up the platform."
)

print(f"🎯 Target pages: {TARGET_PAGES}  → target words ≈ {TARGET_WORDS:,}")
print(f"📏 Chapter target: ~{CH_TARGET_WORDS:,} words → initial chapters: {NUM_CH}")

# ─────────────────────────────────────────────────────────────────────────────
# 2) LLMs & Prompts (with speed-aware models/options)
# ─────────────────────────────────────────────────────────────────────────────
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate  # modern import

PLANNER_MODEL = "deepseek-r1:1.5b"
WRITER_MODEL  = "llama3.2:3b-instruct" if FAST_MODE else "llama3.1:8b"
PLANNER_NUM_PREDICT = 384 if FAST_MODE else 640
WRITER_NUM_PREDICT  = 1800 if FAST_MODE else 3000
MAX_WORKERS = 1 if (FAST_MODE or not HAS_GPU) else 3   # be conservative on CPU

# Pull only what's needed for this run
subprocess.run(["ollama","pull", PLANNER_MODEL], check=False)
subprocess.run(["ollama","pull", WRITER_MODEL],  check=False)

planner_llm = OllamaLLM(
    model=PLANNER_MODEL,
    base_url="http://127.0.0.1:11434",
    temperature=0.25,
    num_predict=PLANNER_NUM_PREDICT,
)

writer_llm  = OllamaLLM(
    model=WRITER_MODEL,
    base_url="http://127.0.0.1:11434",
    temperature=0.8,
    num_ctx=4096,
    num_predict=WRITER_NUM_PREDICT,
)

# NDJSON prompts (easier to parse than JSON arrays)
outline_prompt = PromptTemplate(
    input_variables=["topic","count"],
    template=(
"You are a creative fiction author. Generate exactly {count} UNIQUE chapter seeds "
"for the book below. OUTPUT FORMAT: **NDJSON**, one compact JSON object per line, "
"no leading numbering, no extra text, no trailing commas, no code fences.\n\n"
"Each line MUST be like:\n"
'{{\"title\":\"...\",\"description\":\"...\"}}\\n'
"\nBook idea:\n{topic}\n"
    )
)

character_prompt = PromptTemplate(
    input_variables=["outline","num_chars"],
    template=(
"Given this chapter outline (JSON list):\n{outline}\n\n"
"Create exactly {num_chars} main characters.\n"
"OUTPUT FORMAT: **NDJSON**, one compact JSON object per line (no array, no extra text):\n"
'{{\"name\":\"...\",\"role\":\"...\",\"development_arc\":\"...\"}}'
    )
)

chapter_prompt = PromptTemplate(
    input_variables=["title","description","idea"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Chapter description: "{description}"

Constraints:
- NO meta commentary, NO analysis of your process, NO decision making.
- Assume the reader remembers prior chapters; do not re-explain backstory.
- Maintain continuity, but introduce at least one fresh obstacle, one vivid sensory beat, and one believable surprise.
- Use concrete, precise details; avoid clichés.
- End with a small but real unresolved tension.
Return TEXT ONLY.
"""
    )
)

REVISION_PROMPT = PromptTemplate(
    input_variables=["chapter","title","description","ledger"],
    template=(
"""Revise the chapter to reduce repetition with earlier chapters while improving novelty and tension.
Keep the same characters and continuity, but change scene dynamics, setting details, and micro-beats.

Rules:
- Do NOT re-explain backstory already known.
- Add at least one fresh obstacle, one specific sensory detail, and one surprising but plausible turn.
- Preserve voice and POV.
Return TEXT ONLY.

Title: {title}
Description: {description}

Do-not-repeat ledger (phrases/scenes to avoid): {ledger}

Chapter draft:
{chapter}
"""
    )
)

# NOTE: braces escaped below ({{ … }})
EVAL_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""You are a tough fiction editor. Rate the chapter (1–10) on:
- pacing
- tension
- voice
- imagery
- dialogue
- novelty

Return STRICT JSON only:
{{"scores":{{"pacing":x,"tension":x,"voice":x,"imagery":x,"dialogue":x,"novelty":x}},"one_sentence_note":"...","three_micro_edits":["...","...","..."]}}

Chapter:
{chapter}
"""
    )
)

PUNCHUP_PROMPT = PromptTemplate(
    input_variables=["chapter","edits"],
    template=(
"""Apply these micro-edits to strengthen the chapter without changing the plot:
- {edits}

Rules:
- Keep POV, continuity, and length roughly the same (±10%).
- Add concrete sensory details.
- Tighten weak sentences; remove clichés.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

# 2.a Theme/Motif bible, beats planner, dialogue tuner, anti-cliché, marketing
THEME_PROMPT = PromptTemplate(
    input_variables=["topic","outline"],
    template=(
"""You are a development editor. From the seed idea and outline, extract:
- 3–5 core THEMES (short phrases)
- 6–12 recurring MOTIFS or props (e.g., "glitch-art", "rain-soaked streets")
- 3–6 PROMISES to the reader (e.g., "ethical tension", "reveal of hidden mastermind")
- A one-sentence LOGLINE pitched to a general audience
- Genre expectations to signal with setting & imagery (GENRE_SIGNALS, 5–8 phrases)

Return STRICT JSON:
{{
  "themes": ["..."],
  "motifs": ["..."],
  "promises": ["..."],
  "logline": "...",
  "genre_signals": ["..."]
}}

Seed idea: {topic}
Outline: {outline}
"""
    )
)

BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","theme_bible","motif_ledger"],
    template=(
"""Plan a tight beat sheet for the chapter with approx. 8–12 beats following a tension curve.
Enforce: strong opening hook line, mid-chapter reversal, and a cliffhanger/stinger.

Constraints:
- Integrate 1–2 MOTIFS from motif_ledger (callbacks) and 1 FRESH setting detail aligned to genre_signals.
- Target dialogue ratio: 30–45% of lines contain dialogue.
- Specify a SENSORY PALETTE (2–3 senses to emphasize).
- Map an EMOTION ARC over beats (e.g., curiosity → dread → resolve).

Return STRICT JSON:
{{
  "beats": [
    {{"name":"Hook","goal":"...","conflict":"...","setting":"...","emotion":"..."}},
    ...
  ],
  "dialogue_target_pct": 0.38,
  "sensory_palette": ["sound","smell"],
  "foreshadow":"...",
  "callback_motif":"..."
}}

Title: {title}
Description: {description}
THEME_BIBLE: {theme_bible}
MOTIF_LEDGER: {motif_ledger}
"""
    )
)

CHAPTER_WITH_BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","idea","plan","themes","sensory_palette","dialogue_target"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".

Seed idea: {idea}
Mini-brief: {description}
Plan (beats): {plan}

Must do:
- OPEN with a punchy 1–2 sentence HOOK that raises a concrete question.
- Emphasize SENSORY PALETTE: {sensory_palette}
- Aim for DIALOGUE DENSITY ≈ {dialogue_target:.2f} (about 30–45% lines include dialogue).
- Integrate 1 motif or prop from the plan naturally.
- Midpoint reversal that reframes stakes.
- END with a plausible CLIFFHANGER/STINGER (no meta).

Style:
- Concrete details, crisp verbs; avoid clichés.
- Maintain POV and continuity; no backstory dumps.

Themes to subtly reinforce: {themes}
Return TEXT ONLY.
"""
    )
)

DIALOGUE_TUNER_PROMPT = PromptTemplate(
    input_variables=["chapter","target"],
    template=(
"""Revise the chapter to adjust dialogue density to ≈ {target:.2f} (±0.08).
Keep plot and beats intact. Do not shorten by more than 10% or lengthen by more than 10%.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

DECLICHE_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Rewrite at sentence-level to remove clichés, generic metaphors, and filler.
Replace with specific, concrete imagery fitting a near-future techno-thriller vibe.
Preserve plot, POV, beats, and length (±5%). Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

MOTIF_MINER_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Extract 1–3 recurring motifs/props/images present in this chapter (short noun phrases).
Return STRICT JSON: {{"motifs":["..."]}}

Chapter:
{chapter}
"""
    )
)

BLURB_PROMPT = PromptTemplate(
    input_variables=["title","logline","themes","promises"],
    template=(
"""Write:
1) A high-impact back-cover BLURB (120–160 words) using the logline and themes.
2) A 1-sentence hook for retailer product pages.
3) 3 short BookTok/Bookstagram snippets (≤140 chars each).

Return STRICT JSON:
{{
  "blurb":"...",
  "product_hook":"...",
  "snippets": ["...","...","..."]
}}

Title: {title}
Logline: {logline}
Themes: {themes}
Promises: {promises}
"""
    )
)

# Build chains with the pipe operator
outline_chain     = outline_prompt               | planner_llm
character_chain   = character_prompt             | planner_llm
chapter_chain     = chapter_prompt               | writer_llm
revision_chain    = REVISION_PROMPT              | writer_llm
eval_chain        = EVAL_PROMPT                  | planner_llm
punchup_chain     = PUNCHUP_PROMPT               | writer_llm

theme_chain       = THEME_PROMPT                 | planner_llm
beats_chain       = BEATS_PROMPT                 | planner_llm
chapter_beats_llm = CHAPTER_WITH_BEATS_PROMPT    | writer_llm
dialogue_tuner    = DIALOGUE_TUNER_PROMPT        | writer_llm
decliche_chain    = DECLICHE_PROMPT              | writer_llm
motif_miner       = MOTIF_MINER_PROMPT           | planner_llm
blurb_chain       = BLURB_PROMPT                 | planner_llm

# ─────────────────────────────────────────────────────────────────────────────
# 3) Utilities: strip <think>, parse NDJSON/JSON, similarity, IO helpers
# ─────────────────────────────────────────────────────────────────────────────
THINK_RE = re.compile(r"<think>.*?</think>\s*", flags=re.S|re.I)
FENCE_RE = re.compile(r"```(?:json)?|```", flags=re.I)

def strip_think(x: Any) -> str:
    s = x["text"] if isinstance(x, dict) and "text" in x else str(x)
    s = THINK_RE.sub("", s)
    s = FENCE_RE.sub("", s)
    return s.strip()

def parse_ndjson(text: str, expected: int = None) -> List[dict]:
    out = []
    for ln in text.splitlines():
        ln = ln.strip()
        if not ln:
            continue
        if ln[:2] in ("- ", "* "): ln = ln[2:].strip()  # ignore bullets
        if ln and ln[0].isdigit() and ln.lstrip().split(" ",1)[0].rstrip(".").isdigit():
            ln = re.sub(r"^\d+\.\s*", "", ln)
        try:
            obj = json.loads(ln)
            if isinstance(obj, dict): out.append(obj)
        except json.JSONDecodeError:
            fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
            fix = re.sub(r",\s*}", "}", fix); fix = re.sub(r",\s*]", "]", fix)
            try:
                obj = json.loads(fix)
                if isinstance(obj, dict): out.append(obj)
            except Exception:
                continue
    return out

def parse_strict_json(s: str) -> dict:
    s = strip_think(s)
    m = re.search(r"\{[\s\S]*\}\s*$", s)
    if not m: return {}
    try:
        return json.loads(m.group(0))
    except Exception:
        fix = m.group(0).replace("“","\"").replace("”","\"").replace("’","'")
        fix = re.sub(r",\s*}", "}", fix); fix = re.sub(r",\s*]", "]", fix)
        try: return json.loads(fix)
        except Exception: return {}

def jaccard(a: str, b: str) -> float:
    A = set(re.findall(r"[a-z0-9']+", a.lower()))
    B = set(re.findall(r"[a-z0-9']+", b.lower()))
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

def too_similar(ch1: Dict[str,str], ch2: Dict[str,str]) -> bool:
    t_sim = jaccard(ch1["title"], ch2["title"])
    d_sim = jaccard(ch1["description"], ch2["description"])
    return (t_sim > 0.65) or (t_sim > 0.45 and d_sim > 0.55)

def bigram_overlap(a: str, b: str) -> float:
    def bigrams(s):
        toks = re.findall(r"[a-z0-9']+", s.lower())
        return set(zip(toks, toks[1:])) if len(toks) > 1 else set()
    A, B = bigrams(a), bigrams(b)
    denom = len(A | B) if (A or B) else 1
    return len(A & B) / denom

def top_trigrams(text: str, k: int = 30) -> List[str]:
    toks = re.findall(r"[a-z0-9']+", text.lower())
    tris = Counter(zip(toks, toks[1:], toks[2:]))
    return [" ".join(t) for t,_ in tris.most_common(k)]

def approx_dialogue_ratio(text: str) -> float:
    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
    if not lines: return 0.0
    dial = sum(1 for ln in lines if re.search(r'["“”]|^—', ln))
    return dial / max(1, len(lines))

def word_count(text: str) -> int:
    return len(re.findall(r"[A-Za-z0-9']+", text or ""))

# Checkpoint helpers
def _slug(s):
    return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:60]

def _ck_paths(i, title):
    base = f"{i:03d}-{_slug(title)}"
    p = pathlib.Path(CHECKPOINT_DIR)
    return p / (base + ".txt"), p / (base + ".json")

def save_ckpt(i, title, text, notes):
    p_txt, p_meta = _ck_paths(i, title)
    p_txt.write_text(text or "", encoding="utf-8")
    meta = {"chapter_index": i, "title": title, "notes": notes}
    p_meta.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

def load_ckpt_if_any(i, title):
    p_txt, p_meta = _ck_paths(i, title)
    if p_txt.exists():
        text = p_txt.read_text(encoding="utf-8")
        notes = None
        if p_meta.exists():
            try: notes = json.loads(p_meta.read_text(encoding="utf-8"))
            except Exception: notes = None
        return text, notes
    return None, None

# ─────────────────────────────────────────────────────────────────────────────
# 4) Outline generation with NDJSON + retry + strict dedupe
# ─────────────────────────────────────────────────────────────────────────────
print(f"→ Generating {NUM_CH}-chapter outline in chunks of ~10…")
outline, seen_titles = [], set()
chunk = 10
attempts = 0
MAX_ATTEMPTS = 40

while len(outline) < NUM_CH and attempts < MAX_ATTEMPTS:
    ask = min(chunk, NUM_CH - len(outline))
    raw = outline_chain.invoke({"topic": SEED_IDEA, "count": ask})
    attempts += 1
    lines = parse_ndjson(strip_think(raw), expected=ask)
    if not lines:
        print("⚠️ Outline chunk parse failed (empty); retrying…")
        continue
    added = 0
    for ch in lines:
        cand = {
            "title": (ch.get("title") or "").strip(),
            "description": (ch.get("description") or "").strip()
        }
        if not cand["title"] or not cand["description"]:
            continue
        if cand["title"] in seen_titles:
            continue
        if any(too_similar(cand, existing) for existing in outline):
            continue
        outline.append(cand)
        seen_titles.add(cand["title"])
        added += 1
        if len(outline) >= NUM_CH:
            break
    if added == 0:
        print("ℹ️ No new unique chapters from this chunk; retrying…")

print(f"✔ Final outline: {len(outline)} chapters\n")
if len(outline) < NUM_CH:
    print("⚠️ Could not reach target count; proceeding with what we have.")

# ─────────────────────────────────────────────────────────────────────────────
# 5) Character Bible + Theme/Motif Bible
# ─────────────────────────────────────────────────────────────────────────────
NUM_CHAR = max(3, min(10, len(outline)//8))
print(f"→ Generating {NUM_CHAR} characters…")
characters = []
for _ in range(6):  # a few retries if parsing fails
    raw_chars = character_chain.invoke({
        "outline": json.dumps(outline, ensure_ascii=False),
        "num_chars": NUM_CHAR
    })
    characters = parse_ndjson(strip_think(raw_chars), expected=NUM_CHAR)
    if len(characters) == NUM_CHAR:
        break
print(f"✔ Got {len(characters)} characters\n")

print("→ Building theme/motif bible…")
theme_raw = theme_chain.invoke({
    "topic": SEED_IDEA,
    "outline": json.dumps(outline, ensure_ascii=False)
})
THEME_BIBLE = parse_strict_json(theme_raw) or {
    "themes": [],
    "motifs": [],
    "promises": [],
    "logline": "",
    "genre_signals": []
}
MOTIF_LEDGER = list(THEME_BIBLE.get("motifs", []))  # seed with global motifs
print("✔ Theme bible ready.\n")

# ─────────────────────────────────────────────────────────────────────────────
# 6) Chapter Generation with beats + checkpointing + early re-calibration
# ─────────────────────────────────────────────────────────────────────────────
print("→ Generating chapters…")
chap_texts = [None]*len(outline)
editor_notes = [None]*len(outline)
BIGRAM_THRESHOLD = 0.22  # repetition strictness

def write_one(idx):
    meta = outline[idx]
    title = meta["title"]

    # Resume if checkpoint exists
    cached_txt, cached_notes = load_ckpt_if_any(idx, title)
    if cached_txt:
        return cached_txt, cached_notes

    # 1) Plan beats with callbacks to existing motif ledger
    plan_raw = beats_chain.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "theme_bible": json.dumps(THEME_BIBLE, ensure_ascii=False),
        "motif_ledger": json.dumps(MOTIF_LEDGER[-12:], ensure_ascii=False)  # recent motifs
    })
    plan = parse_strict_json(plan_raw)
    dialogue_target = float(plan.get("dialogue_target_pct", 0.36))
    sensory_palette = plan.get("sensory_palette", ["sight","sound"])
    plan_json = json.dumps(plan.get("beats", []), ensure_ascii=False)

    # 2) Draft with hooks, sensory palette, dialogue target
    res = chapter_beats_llm.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "idea": SEED_IDEA,
        "plan": plan_json,
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "sensory_palette": ", ".join(sensory_palette),
        "dialogue_target": dialogue_target
    })
    chapter_txt = strip_think(res)

    # 3) Repetition guard vs earlier chapters
    ledger = []
    for j in range(idx):
        prev = chap_texts[j]
        if not prev: continue
        if bigram_overlap(chapter_txt, prev) > BIGRAM_THRESHOLD:
            ledger.extend(top_trigrams(prev, k=10))
    ledger = list(dict.fromkeys(ledger))[:60]
    if ledger:
        revised = revision_chain.invoke({
            "chapter": chapter_txt,
            "title": meta["title"],
            "description": meta["description"],
            "ledger": "; ".join(ledger)
        })
        chapter_txt = strip_think(revised)

    # 4/5) Auto-evaluation → only run heavy passes if needed
    data = None
    try:
        report_raw = eval_chain.invoke({"chapter": chapter_txt})
        report_txt = strip_think(report_raw)
        m = re.search(r"\{[\s\S]*\}\s*$", report_txt)
        if m:
            data = json.loads(m.group(0))
        if data and "scores" in data:
            scores = data["scores"]
            avg_score = sum(float(scores[k]) for k in scores)/len(scores)

            # Dialogue tuner if notably off target
            dr = approx_dialogue_ratio(chapter_txt)
            if abs(dr - dialogue_target) > 0.10:
                tuned = dialogue_tuner.invoke({"chapter": chapter_txt, "target": dialogue_target})
                chapter_txt = strip_think(tuned)

            # Decliché only when quality is mid or FAST_MODE off
            if (avg_score < 7.6) or (not FAST_MODE):
                polished = decliche_chain.invoke({"chapter": chapter_txt})
                chapter_txt = strip_think(polished)

            # Punch-up if still low
            if avg_score < 7.4:
                edits = " | ".join(data.get("three_micro_edits", [])) or \
                        "Sharpen hooks; escalate midpoint; add concrete sensory beats."
                punched = punchup_chain.invoke({"chapter": chapter_txt, "edits": edits})
                chapter_txt = strip_think(punched)
    except Exception:
        data = None

    # 6) Mine motifs from this chapter → update global ledger
    try:
        mined_raw = motif_miner.invoke({"chapter": chapter_txt})
        mined = parse_strict_json(mined_raw)
        for m in (mined.get("motifs") or []):
            if m not in MOTIF_LEDGER:
                MOTIF_LEDGER.append(m)
    except Exception:
        pass

    # Save checkpoint
    save_ckpt(idx, title, chapter_txt, data)
    return chapter_txt, data

# —— Early calibration: write first few chapters serially, resize outline
PREGEN = min(3, len(outline))
for i in range(PREGEN):
    ch_txt, notes = write_one(i)
    chap_texts[i] = ch_txt
    editor_notes[i] = notes

# Measure actual words/chapter and re-size remaining outline to hit target pages
actual_avg = max(500, sum(word_count(chap_texts[i]) for i in range(PREGEN)) // PREGEN)
recalc_num_ch = max(12, min(80, (TARGET_WORDS + actual_avg - 1) // actual_avg))

if recalc_num_ch != len(outline):
    delta = recalc_num_ch - len(outline)
    if delta > 0:
        ask_more = delta
        raw_extra = outline_chain.invoke({"topic": SEED_IDEA, "count": ask_more})
        extra = parse_ndjson(strip_think(raw_extra), expected=ask_more)
        seen_titles = set([c["title"] for c in outline])
        for ch in extra:
            cand = {"title": (ch.get("title") or "").strip(),
                    "description": (ch.get("description") or "").strip()}
            if not cand["title"] or not cand["description"]:
                continue
            if cand["title"] in seen_titles:
                continue
            if any(too_similar(cand, e) for e in outline):
                continue
            outline.append(cand); seen_titles.add(cand["title"])
            chap_texts.append(None); editor_notes.append(None)
            if len(outline) >= recalc_num_ch: break
        print(f"🔁 Resized outline: +{max(0, delta)} → total {len(outline)} chapters")
    elif delta < 0:
        keep = max(PREGEN, recalc_num_ch)
        outline = outline[:keep]
        chap_texts = chap_texts[:keep]
        editor_notes = editor_notes[:keep]
        print(f"✂️  Trimmed outline to {len(outline)} chapters")

# Generate remaining chapters in parallel/respectful workers
remaining_idxs = [i for i, t in enumerate(chap_texts) if t is None]
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = { ex.submit(write_one, i): i for i in remaining_idxs }
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Chapters"):
        idx = futures[fut]
        try:
            ch_txt, notes = fut.result()
        except Exception as e:
            ch_txt, notes = f"[Generation failed: {e}]", None
        chap_texts[idx] = ch_txt
        editor_notes[idx] = notes

# ─────────────────────────────────────────────────────────────────────────────
# 7) Pre-save totals → Save to Word + Back-cover copy + Download
# ─────────────────────────────────────────────────────────────────────────────
total_words = sum(word_count(t or "") for t in chap_texts)
est_pages  = total_words / WORDS_PER_PAGE
suggested_ch = max(12, min(80, round(total_words / CH_TARGET_WORDS)))
print(f"🧮 Total words: {total_words:,}  → est. pages ≈ {est_pages:.0f}")
print(f"🔧 If you rerun, suggested chapters for this style ≈ {suggested_ch}")

doc = docx.Document()
doc.add_heading(BOOK_TITLE, 0)
doc.add_paragraph(f"Seed idea: {SEED_IDEA}")
doc.add_paragraph(f"Estimated pages: ~{est_pages:.0f}")

# Character Development Section
if characters:
    doc.add_page_break()
    doc.add_heading("Character Development", level=1)
    for c in characters:
        name = c.get("name","(Unnamed)")
        role = c.get("role","")
        arc  = c.get("development_arc","")
        doc.add_heading(name, level=2)
        if role: doc.add_paragraph(f"Role: {role}")
        if arc:  doc.add_paragraph(arc)

# Chapters
for i, (meta, text) in enumerate(zip(outline, chap_texts), start=1):
    doc.add_page_break()
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=1)
    doc.add_paragraph(meta['description'], style="Intense Quote")
    doc.add_paragraph((text or "").strip())

# Editor’s Notes (Auto-Eval)
doc.add_page_break()
doc.add_heading("Editor’s Notes (Auto-Eval)", level=1)
for i, (meta, notes) in enumerate(zip(outline, editor_notes), start=1):
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=2)
    if not notes or "scores" not in (notes or {}):
        doc.add_paragraph("No evaluation available.")
        continue
    scores = notes["scores"]
    one_liner = notes.get("one_sentence_note","")
    edits = notes.get("three_micro_edits", [])
    try:
        avg_score = sum(float(scores[k]) for k in scores)/len(scores)
    except Exception:
        avg_score = None
    doc.add_paragraph("Scores: " + ", ".join(f"{k}: {scores[k]}" for k in scores))
    if avg_score is not None:
        doc.add_paragraph(f"Average: {avg_score:.2f}")
    if one_liner:
        doc.add_paragraph(f"Note: {one_liner}")
    if edits:
        for e in edits:
            doc.add_paragraph(f"• {e}")

# Back-cover blurb + retailer hook + social snippets
blurb_data = {}
try:
    blurb_raw = blurb_chain.invoke({
        "title": BOOK_TITLE,
        "logline": THEME_BIBLE.get("logline",""),
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "promises": ", ".join(THEME_BIBLE.get("promises", []))
    })
    blurb_data = parse_strict_json(blurb_raw) or {}
except Exception:
    blurb_data = {}

doc.add_page_break()
doc.add_heading("Back-Cover Copy & Retailer Hook", level=1)
if blurb_data.get("blurb"):
    doc.add_paragraph(blurb_data["blurb"])
if blurb_data.get("product_hook"):
    doc.add_paragraph(f"\nRetailer Hook: {blurb_data['product_hook']}")
if blurb_data.get("snippets"):
    doc.add_heading("Short Social Snippets", level=2)
    for s in blurb_data["snippets"]:
        doc.add_paragraph(f"• {s}")

# Save & download
fn = BOOK_TITLE.replace(" ", "_") + ".docx"
doc.save(fn)
print(f"📘 Saved {fn}")
files.download(fn)


✅ Ollama health: 200
🖥️ GPU: not detected
🎯 Target pages: 320  → target words ≈ 88,000
📏 Chapter target: ~3,000 words → initial chapters: 30
→ Generating 30-chapter outline in chunks of ~10…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse failed (empty); retrying…
⚠️ Outline chunk parse fa

KeyboardInterrupt: 

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 0) Colab Setup: install & launch Ollama (speed toggles, checkpoints, resolver)
# ─────────────────────────────────────────────────────────────────────────────
!pip install --quiet langchain-ollama python-docx tqdm

import os, threading, subprocess, time, requests, json, re, shutil, pathlib, math, sys
from typing import List, Any, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from tqdm import tqdm
import docx
from google.colab import files

# Avoid LangChain provider hijacks
for v in [
    "OPENAI_API_KEY",
    "LITELLM_PROVIDER", "LITELLM_MODEL", "LITELLM_BASE_URL",
    "LITELL M_PROVIDER", "LITELL M_MODEL", "LITELL M_BASE_URL"  # catch stray typos
]:
    os.environ.pop(v, None)

# —— Speed/robustness knobs ——
FAST_MODE = True                 # Flip to False when you're happy with the outputs
CHECKPOINT_DIR = "book_ckpt"     # Saves each chapter & metadata
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# Make Ollama conservative about parallelism in Colab
os.environ["OLLAMA_MAX_LOADED_MODELS"] = "1"
os.environ["OLLAMA_NUM_PARALLEL"] = "1"

# Launch Ollama daemon
os.environ["OLLAMA_HOST"]    = "127.0.0.1:11434"
os.environ["OLLAMA_ORIGINS"] = "*"

!curl -fsSL https://ollama.com/install.sh -o install.sh
!bash install.sh >/dev/null 2>&1 || true

def _serve_ollama():
    subprocess.Popen(["ollama","serve"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)

threading.Thread(target=_serve_ollama, daemon=True).start()
time.sleep(8)
print("✅ Ollama health:", requests.get("http://127.0.0.1:11434").status_code)

# GPU detection to size workers
def _has_gpu():
    try:
        return shutil.which("nvidia-smi") and (subprocess.run(["nvidia-smi"], capture_output=True).returncode==0)
    except Exception:
        return False
HAS_GPU = bool(_has_gpu())
print("🖥️ GPU:", "available" if HAS_GPU else "not detected")

# ─────────────────────────────────────────────────────────────────────────────
# Helper: choose models that actually exist locally/remote via pull
# ─────────────────────────────────────────────────────────────────────────────
def pick_first_available(candidates: List[str]) -> str:
    for m in candidates:
        try:
            r = subprocess.run(["ollama", "pull", m], capture_output=True, text=True)
            if r.returncode == 0:
                print(f"✔ Using model: {m}")
                return m
            else:
                print(f"✖ Pull failed for {m} → {r.stderr.strip() or r.stdout.strip()}")
        except Exception as e:
            print(f"✖ Error pulling {m}: {e}")
    raise RuntimeError(f"No candidate models could be pulled: {candidates}")

# Good planner-size candidates (small & widely available)
PLANNER_CANDIDATES = [
    "llama3.2:3b",
    "qwen2.5:3b",
    "phi3:3.8b-mini",
    "gemma2:2b",
    "mistral:7b",
    "llama3.1:8b",
]

# Writer candidates
WRITER_FAST_CANDIDATES = [
    "llama3.2:3b",
    "qwen2.5:3b",
    "phi3:3.8b-mini",
    "gemma2:2b",
    "mistral:7b",
    "llama3.1:8b",
]
WRITER_QUALITY_CANDIDATES = [
    "llama3.1:8b",
    "mistral:7b",
    "qwen2.5:7b",
]

# Resolve models now
PLANNER_MODEL = pick_first_available(PLANNER_CANDIDATES)
WRITER_MODEL  = pick_first_available(WRITER_FAST_CANDIDATES if FAST_MODE else WRITER_QUALITY_CANDIDATES)

# ─────────────────────────────────────────────────────────────────────────────
# 1) User parameters (auto size by target pages → chapters)
# ─────────────────────────────────────────────────────────────────────────────
BOOK_TITLE   = "Artificial Influencers 2"
MODE         = "fiction"  # or "nonfiction"

# Either set an explicit target page count, or leave None to auto-center the profile
TARGET_PAGES = 320 if MODE == "fiction" else 280

GENRE_PROFILE = {
    "fiction":   {"pages_min": 280, "pages_max": 360, "chapter_words_typical": (2400, 3600)},
    "nonfiction":{"pages_min": 220, "pages_max": 320, "chapter_words_typical": (3000, 4500)}
}[MODE]

if TARGET_PAGES is None:
    TARGET_PAGES = (GENRE_PROFILE["pages_min"] + GENRE_PROFILE["pages_max"]) // 2

# Words-per-page assumption (trade paperback-ish)
WORDS_PER_PAGE = 275
TARGET_WORDS   = int(TARGET_PAGES * WORDS_PER_PAGE)

# Choose a target chapter length (midpoint of typical band), and derive initial chapter count:
CH_MIN, CH_MAX = GENRE_PROFILE["chapter_words_typical"]
CH_TARGET_WORDS = int((CH_MIN + CH_MAX) / 2)
NUM_CH = max(12, min(80, (TARGET_WORDS + CH_TARGET_WORDS - 1) // CH_TARGET_WORDS))

# Your story seed:
SEED_IDEA = (
    "Dr. Lena Park — a brilliant but introverted data scientist at Datum, "
    "a social-media analytics startup. She notices impossible engagement "
    "patterns in a rising star; her investigation unravels a conspiracy "
    "of AI-driven 'influencers' masquerading as humans—and she must decide "
    "whether to expose the truth or risk blowing up the platform."
)

print(f"🎯 Target pages: {TARGET_PAGES}  → target words ≈ {TARGET_WORDS:,}")
print(f"📏 Chapter target: ~{CH_TARGET_WORDS:,} words → initial chapters: {NUM_CH}")

# ─────────────────────────────────────────────────────────────────────────────
# 2) LLMs & Prompts (planner JSON+fallbacks baked in later)
# ─────────────────────────────────────────────────────────────────────────────
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate  # modern import

PLANNER_NUM_PREDICT = 900 if FAST_MODE else 1400
WRITER_NUM_PREDICT  = 1600 if FAST_MODE else 3000
MAX_WORKERS = 1 if (FAST_MODE or not HAS_GPU) else 3   # conservative on CPU

# Two planner clients: free-form and JSON-locked
planner_llm = OllamaLLM(
    model=PLANNER_MODEL,
    base_url="http://127.0.0.1:11434",
    temperature=0.25,
    num_predict=PLANNER_NUM_PREDICT,
)
planner_llm_json = OllamaLLM(
    model=PLANNER_MODEL,
    base_url="http://127.0.0.1:11434",
    temperature=0.2,
    num_predict=PLANNER_NUM_PREDICT,
    format="json",                 # ask for pure JSON (we still robustly fallback)
)

writer_llm  = OllamaLLM(
    model=WRITER_MODEL,
    base_url="http://127.0.0.1:11434",
    temperature=0.8,
    num_ctx=4096,
    num_predict=WRITER_NUM_PREDICT,
)

# — Prompts —
# Outline prompts: JSON and NDJSON
OUTLINE_JSON_PROMPT = PromptTemplate(
    input_variables=["topic","count"],
    template=(
"""Generate exactly {count} UNIQUE chapter seeds for this novel as a JSON ARRAY.
Each item is an object: {{"title":"...", "description":"..."}}
Return JSON ONLY. No commentary.

Book idea: {topic}
"""
    )
)

OUTLINE_NDJSON_PROMPT = PromptTemplate(
    input_variables=["topic","count"],
    template=(
"""Generate exactly {count} UNIQUE chapter seeds for this novel as NDJSON (one JSON object per line).
Each line: {{"title":"...", "description":"..."}}
No commentary, no numbering, no code fences.

Book idea: {topic}
"""
    )
)

# Character prompt (JSON)
CHAR_JSON_PROMPT = PromptTemplate(
    input_variables=["outline","num_chars"],
    template=(
"""Given this chapter outline (JSON list): {outline}
Create exactly {num_chars} MAIN CHARACTERS as a JSON ARRAY.
Each item: {{"name":"...","role":"...","development_arc":"..."}}
Return JSON ONLY. No commentary."""
    )
)

# Chapter & editing prompts
chapter_prompt = PromptTemplate(
    input_variables=["title","description","idea"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Chapter description: "{description}"

Constraints:
- NO meta commentary, NO analysis of your process, NO decision making.
- Assume the reader remembers prior chapters; do not re-explain backstory.
- Maintain continuity, but introduce at least one fresh obstacle, one vivid sensory beat, and one believable surprise.
- Use concrete, precise details; avoid clichés.
- End with a small but real unresolved tension.
Return TEXT ONLY.
"""
    )
)

REVISION_PROMPT = PromptTemplate(
    input_variables=["chapter","title","description","ledger"],
    template=(
"""Revise the chapter to reduce repetition with earlier chapters while improving novelty and tension.
Keep the same characters and continuity, but change scene dynamics, setting details, and micro-beats.

Rules:
- Do NOT re-explain backstory already known.
- Add at least one fresh obstacle, one specific sensory detail, and one surprising but plausible turn.
- Preserve voice and POV.
Return TEXT ONLY.

Title: {title}
Description: {description}

Do-not-repeat ledger (phrases/scenes to avoid): {ledger}

Chapter draft:
{chapter}
"""
    )
)

EVAL_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""You are a tough fiction editor. Rate the chapter (1–10) on:
- pacing
- tension
- voice
- imagery
- dialogue
- novelty

Return STRICT JSON only:
{{"scores":{{"pacing":x,"tension":x,"voice":x,"imagery":x,"dialogue":x,"novelty":x}},"one_sentence_note":"...","three_micro_edits":["...","...","..."]}}

Chapter:
{chapter}
"""
    )
)

PUNCHUP_PROMPT = PromptTemplate(
    input_variables=["chapter","edits"],
    template=(
"""Apply these micro-edits to strengthen the chapter without changing the plot:
- {edits}

Rules:
- Keep POV, continuity, and length roughly the same (±10%).
- Add concrete sensory details.
- Tighten weak sentences; remove clichés.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

# Theme/Motif bible, beats, dialogue tuner, decliche, motif miner, blurb
THEME_PROMPT = PromptTemplate(
    input_variables=["topic","outline"],
    template=(
"""From the seed idea and outline, produce STRICT JSON:
{{
  "themes": ["..."],         # 3–5 core themes
  "motifs": ["..."],         # 6–12 recurring props/images
  "promises": ["..."],       # 3–6 promises to the reader
  "logline": "...",          # one sentence
  "genre_signals": ["..."]   # 5–8 setting/imagery signals
}}
Seed idea: {topic}
Outline: {outline}
"""
    )
)

BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","theme_bible","motif_ledger"],
    template=(
"""Plan a beat sheet for the chapter as STRICT JSON:
{{
  "beats": [
    {{"name":"Hook","goal":"...","conflict":"...","setting":"...","emotion":"..."}},
    ...
  ],
  "dialogue_target_pct": 0.38,
  "sensory_palette": ["sound","smell"],
  "foreshadow":"...",
  "callback_motif":"..."
}}
Requirements:
- 8–12 beats, with a mid-chapter reversal and a cliffhanger/stinger.
- Use 1–2 motifs from MOTIF_LEDGER and add 1 fresh setting element aligned to GENRE_SIGNALS.

TITLE: {title}
DESC: {description}
THEME_BIBLE: {theme_bible}
MOTIF_LEDGER: {motif_ledger}
"""
    )
)

CHAPTER_WITH_BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","idea","plan","themes","sensory_palette","dialogue_target"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".

Seed idea: {idea}
Mini-brief: {description}
Plan (beats): {plan}

Must do:
- OPEN with a punchy 1–2 sentence HOOK that raises a concrete question.
- Emphasize SENSORY PALETTE: {sensory_palette}
- Aim for DIALOGUE DENSITY ≈ {dialogue_target:.2f} (about 30–45% lines include dialogue).
- Integrate 1 motif or prop from the plan naturally.
- Midpoint reversal that reframes stakes.
- END with a plausible CLIFFHANGER/STINGER (no meta).

Style:
- Concrete details, crisp verbs; avoid clichés.
- Maintain POV and continuity; no backstory dumps.

Themes to subtly reinforce: {themes}
Return TEXT ONLY.
"""
    )
)

DIALOGUE_TUNER_PROMPT = PromptTemplate(
    input_variables=["chapter","target"],
    template=(
"""Revise the chapter to adjust dialogue density to ≈ {target:.2f} (±0.08).
Keep plot and beats intact. Do not shorten by more than 10% or lengthen by more than 10%.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

DECLICHE_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Rewrite at sentence-level to remove clichés and filler.
Replace with specific, concrete imagery fitting a near-future techno-thriller vibe.
Preserve plot, POV, beats, and length (±5%). Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

MOTIF_MINER_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Extract 1–3 recurring motifs/props/images present in this chapter (short noun phrases).
Return STRICT JSON: {{"motifs":["..."]}}

Chapter:
{chapter}
"""
    )
)

BLURB_PROMPT = PromptTemplate(
    input_variables=["title","logline","themes","promises"],
    template=(
"""Write STRICT JSON:
{{
  "blurb":"...",           # 120–160 words
  "product_hook":"...",    # 1 sentence
  "snippets": ["...","...","..."]  # ≤140 chars each
}}
Title: {title}
Logline: {logline}
Themes: {themes}
Promises: {promises}
"""
    )
)

# Build chains
outline_chain_json          = OUTLINE_JSON_PROMPT       | planner_llm_json  # JSON mode
outline_chain_ndjson_plan   = OUTLINE_NDJSON_PROMPT     | planner_llm       # NDJSON (planner)
outline_chain_ndjson_writer = OUTLINE_NDJSON_PROMPT     | writer_llm        # NDJSON (writer)
# Writer JSON chain created on demand later if needed

character_chain_json = CHAR_JSON_PROMPT | planner_llm_json

chapter_chain     = chapter_prompt               | writer_llm
revision_chain    = REVISION_PROMPT              | writer_llm
eval_chain        = EVAL_PROMPT                  | planner_llm
punchup_chain     = PUNCHUP_PROMPT               | writer_llm

theme_chain       = THEME_PROMPT                 | planner_llm
beats_chain       = BEATS_PROMPT                 | planner_llm
chapter_beats_llm = CHAPTER_WITH_BEATS_PROMPT    | writer_llm
dialogue_tuner    = DIALOGUE_TUNER_PROMPT        | writer_llm
decliche_chain    = DECLICHE_PROMPT              | writer_llm
motif_miner       = MOTIF_MINER_PROMPT           | planner_llm
blurb_chain       = BLURB_PROMPT                 | planner_llm

# ─────────────────────────────────────────────────────────────────────────────
# 3) Utilities: parse, similarity, IO helpers
# ─────────────────────────────────────────────────────────────────────────────
THINK_RE = re.compile(r"<think>.*?</think>\s*", flags=re.S|re.I)
FENCE_RE = re.compile(r"```(?:json)?|```", flags=re.I)

def strip_think(x: Any) -> str:
    s = x["text"] if isinstance(x, dict) and "text" in x else str(x)
    s = THINK_RE.sub("", s)
    s = FENCE_RE.sub("", s)
    return s.strip()

def parse_json_value(s: str):
    """Parse last JSON value ({...} or [...]) in a string; return Python obj or None."""
    s = strip_think(s)
    m = re.search(r"(\{.*\}|\[.*\])\s*$", s, flags=re.S)
    if not m: return None
    blob = m.group(1)
    try:
        return json.loads(blob)
    except Exception:
        fix = blob.replace("“","\"").replace("”","\"").replace("’","'")
        fix = re.sub(r",\s*}", "}", fix); fix = re.sub(r",\s*]", "]", fix)
        try: return json.loads(fix)
        except Exception: return None

def parse_strict_json(s: str) -> dict:
    obj = parse_json_value(s)
    return obj if isinstance(obj, dict) else {}

# Extract individual JSON objects even if an array is truncated
OBJ_RE = re.compile(r"\{(?:[^{}]|\"[^\"\\]*(?:\\.[^\"\\]*)*\")*\}")
def extract_json_objects(text: str):
    text = strip_think(text)
    objs = []
    for m in OBJ_RE.finditer(text):
        blob = m.group(0)
        try:
            obj = json.loads(blob)
            if isinstance(obj, dict):
                objs.append(obj)
        except Exception:
            try:
                fix = blob.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                obj = json.loads(fix)
                if isinstance(obj, dict):
                    objs.append(obj)
            except Exception:
                pass
    return objs

def jaccard(a: str, b: str) -> float:
    A = set(re.findall(r"[a-z0-9']+", a.lower()))
    B = set(re.findall(r"[a-z0-9']+", b.lower()))
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

def too_similar(ch1: Dict[str,str], ch2: Dict[str,str]) -> bool:
    t_sim = jaccard(ch1["title"], ch2["title"])
    d_sim = jaccard(ch1["description"], ch2["description"])
    return (t_sim > 0.65) or (t_sim > 0.45 and d_sim > 0.55)

def bigram_overlap(a: str, b: str) -> float:
    def bigrams(s):
        toks = re.findall(r"[a-z0-9']+", s.lower())
        return set(zip(toks, toks[1:])) if len(toks) > 1 else set()
    A, B = bigrams(a), bigrams(b)
    denom = len(A | B) if (A or B) else 1
    return len(A & B) / denom

def top_trigrams(text: str, k: int = 30) -> List[str]:
    toks = re.findall(r"[a-z0-9']+", text.lower())
    tris = Counter(zip(toks, toks[1:], toks[2:]))
    return [" ".join(t) for t,_ in tris.most_common(k)]

def approx_dialogue_ratio(text: str) -> float:
    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
    if not lines: return 0.0
    dial = sum(1 for ln in lines if re.search(r'["“”]|^—', ln))
    return dial / max(1, len(lines))

def word_count(text: str) -> int:
    return len(re.findall(r"[A-Za-z0-9']+", text or ""))

# Checkpoint helpers
def _slug(s):
    return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:60]

def _ck_paths(i, title):
    base = f"{i:03d}-{_slug(title)}"
    p = pathlib.Path(CHECKPOINT_DIR)
    return p / (base + ".txt"), p / (base + ".json")

def save_ckpt(i, title, text, notes):
    p_txt, p_meta = _ck_paths(i, title)
    p_txt.write_text(text or "", encoding="utf-8")
    meta = {"chapter_index": i, "title": title, "notes": notes}
    p_meta.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

def load_ckpt_if_any(i, title):
    p_txt, p_meta = _ck_paths(i, title)
    if p_txt.exists():
        text = p_txt.read_text(encoding="utf-8")
        notes = None
        if p_meta.exists():
            try: notes = json.loads(p_meta.read_text(encoding="utf-8"))
            except Exception: notes = None
        return text, notes
    return None, None

# ─────────────────────────────────────────────────────────────────────────────
# 4) Robust multi-strategy OUTLINE generator (JSON → NDJSON; planner → writer)
# ─────────────────────────────────────────────────────────────────────────────
print(f"→ Generating {NUM_CH}-chapter outline with robust fallbacks …")
chunk = 5 if not HAS_GPU else 10  # smaller on CPU
attempts = 0
MAX_ATTEMPTS = 40

def clean_outline_items(items):
    out = []
    for obj in items:
        if not isinstance(obj, dict):
            continue
        title = (obj.get("title") or "").strip()
        desc  = (obj.get("description") or "").strip()
        if title and desc:
            out.append({"title": title, "description": desc})
    return out

def robust_outline_batch(topic: str, ask: int):
    # (A) Planner JSON mode
    try:
        raw = (OUTLINE_JSON_PROMPT | planner_llm_json).invoke({"topic": topic, "count": ask})
        arr = parse_json_value(raw)
        if isinstance(arr, list) and arr:
            print("   ✓ Outline via planner JSON")
            return clean_outline_items(arr)
        objs = extract_json_objects(str(raw))
        if objs:
            print("   ✓ Outline via planner JSON (partial array rescued)")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · Planner JSON failed: {e}")

    # (B) Planner NDJSON
    try:
        raw = (OUTLINE_NDJSON_PROMPT | planner_llm).invoke({"topic": topic, "count": ask})
        lines = [ln for ln in strip_think(raw).splitlines() if ln.strip()]
        objs = []
        for ln in lines:
            try:
                objs.append(json.loads(ln))
            except Exception:
                fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                try: objs.append(json.loads(fix))
                except Exception: pass
        if objs:
            print("   ✓ Outline via planner NDJSON")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · Planner NDJSON failed: {e}")

    # (C) Writer JSON mode
    try:
        outline_chain_json_writer = OUTLINE_JSON_PROMPT | writer_llm
        raw = outline_chain_json_writer.invoke({"topic": topic, "count": ask})
        arr = parse_json_value(raw)
        if isinstance(arr, list) and arr:
            print("   ✓ Outline via writer JSON")
            return clean_outline_items(arr)
        objs = extract_json_objects(str(raw))
        if objs:
            print("   ✓ Outline via writer JSON (partial array rescued)")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · Writer JSON failed: {e}")

    # (D) Writer NDJSON
    try:
        raw = (OUTLINE_NDJSON_PROMPT | writer_llm).invoke({"topic": topic, "count": ask})
        lines = [ln for ln in strip_think(raw).splitlines() if ln.strip()]
        objs = []
        for ln in lines:
            try:
                objs.append(json.loads(ln))
            except Exception:
                fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                try: objs.append(json.loads(fix))
                except Exception: pass
        if objs:
            print("   ✓ Outline via writer NDJSON")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · Writer NDJSON failed: {e}")

    # (E) Last-resort stub generation so the pipeline can proceed
    print("   ⚠️ Using stubbed outline seeds (fallback).")
    return [
        {"title": f"Thread {i+1}: Anomalous Signal",
         "description": "A troubling pattern emerges in the influencer data; tension escalates and a new obstacle appears."}
        for i in range(ask)
    ]

# Build the outline with dedupe
outline, seen_titles = [], set()
needed = NUM_CH
while len(outline) < needed and attempts < MAX_ATTEMPTS:
    ask = min(chunk, needed - len(outline))
    batch = robust_outline_batch(SEED_IDEA, ask)
    attempts += 1
    added = 0
    for ch in batch:
        if not ch["title"] or not ch["description"]:
            continue
        if ch["title"] in seen_titles:
            continue
        if any(too_similar(ch, e) for e in outline):
            continue
        outline.append(ch)
        seen_titles.add(ch["title"])
        added += 1
        if len(outline) >= needed:
            break
    if added == 0:
        print("   · No unique chapters accepted from batch; retrying…")

print(f"✔ Final outline: {len(outline)} chapters\n")
if len(outline) < needed:
    print("⚠️ Could not reach target count; proceeding with what we have.")

# ─────────────────────────────────────────────────────────────────────────────
# 5) Character Bible (robust JSON) + Theme/Motif Bible
# ─────────────────────────────────────────────────────────────────────────────
def robust_characters(outline_list, n_chars):
    # Try planner JSON first
    for stage in ["planner_json", "writer_json", "planner_fallback"]:
        try:
            if stage == "planner_json":
                raw = character_chain_json.invoke({
                    "outline": json.dumps(outline_list, ensure_ascii=False),
                    "num_chars": n_chars
                })
                arr = parse_json_value(raw)
                if isinstance(arr, list) and len(arr) >= min(3, n_chars//2):
                    return [c for c in arr if isinstance(c, dict)][:n_chars]
            elif stage == "writer_json":
                char_chain_json_writer = CHAR_JSON_PROMPT | writer_llm
                raw = char_chain_json_writer.invoke({
                    "outline": json.dumps(outline_list, ensure_ascii=False),
                    "num_chars": n_chars
                })
                arr = parse_json_value(raw)
                if isinstance(arr, list) and len(arr) >= min(3, n_chars//2):
                    return [c for c in arr if isinstance(c, dict)][:n_chars]
            else:
                # last-resort minimal characters
                return [
                    {"name": "Lena Park", "role": "Protagonist",
                     "development_arc": "From isolated analyst to whistleblower forging unlikely alliances."},
                    {"name": "Mara Voss", "role": "Rising Influencer",
                     "development_arc": "Charismatic star reveals engineered persona; torn between truth and fame."},
                    {"name": "Rex Calder", "role": "Datum Executive",
                     "development_arc": "From mentor figure to antagonist entangled in AI-influence scheme."},
                ][:n_chars]
        except Exception:
            continue
    return []

NUM_CHAR = max(3, min(10, len(outline)//8))
print(f"→ Generating {NUM_CHAR} characters (robust)…")
characters = robust_characters(outline, NUM_CHAR)
print(f"✔ Got {len(characters)} characters\n")

print("→ Building theme/motif bible…")
theme_raw = theme_chain.invoke({
    "topic": SEED_IDEA,
    "outline": json.dumps(outline, ensure_ascii=False)
})
THEME_BIBLE = parse_strict_json(theme_raw) or {
    "themes": [],
    "motifs": [],
    "promises": [],
    "logline": "",
    "genre_signals": []
}
MOTIF_LEDGER = list(THEME_BIBLE.get("motifs", []))  # seed with global motifs
print("✔ Theme bible ready.\n")

# ─────────────────────────────────────────────────────────────────────────────
# 6) Chapter Generation with beats + checkpointing + early re-calibration
# ─────────────────────────────────────────────────────────────────────────────
print("→ Generating chapters…")
chap_texts = [None]*len(outline)
editor_notes = [None]*len(outline)
BIGRAM_THRESHOLD = 0.22  # repetition strictness

def write_one(idx):
    meta = outline[idx]
    title = meta["title"]

    # Resume if checkpoint exists
    cached_txt, cached_notes = load_ckpt_if_any(idx, title)
    if cached_txt:
        return cached_txt, cached_notes

    # 1) Plan beats with callbacks to existing motif ledger
    plan_raw = beats_chain.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "theme_bible": json.dumps(THEME_BIBLE, ensure_ascii=False),
        "motif_ledger": json.dumps(MOTIF_LEDGER[-12:], ensure_ascii=False)
    })
    plan = parse_strict_json(plan_raw)
    dialogue_target = float(plan.get("dialogue_target_pct", 0.36))
    sensory_palette = plan.get("sensory_palette", ["sight","sound"])
    plan_json = json.dumps(plan.get("beats", []), ensure_ascii=False)

    # 2) Draft with hooks, sensory palette, dialogue target
    res = chapter_beats_llm.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "idea": SEED_IDEA,
        "plan": plan_json,
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "sensory_palette": ", ".join(sensory_palette),
        "dialogue_target": dialogue_target
    })
    chapter_txt = strip_think(res)

    # 3) Repetition guard vs earlier chapters
    ledger = []
    for j in range(idx):
        prev = chap_texts[j]
        if not prev: continue
        if bigram_overlap(chapter_txt, prev) > BIGRAM_THRESHOLD:
            ledger.extend(top_trigrams(prev, k=10))
    ledger = list(dict.fromkeys(ledger))[:60]
    if ledger:
        revised = revision_chain.invoke({
            "chapter": chapter_txt,
            "title": meta["title"],
            "description": meta["description"],
            "ledger": "; ".join(ledger)
        })
        chapter_txt = strip_think(revised)

    # 4/5) Auto-evaluation → only run heavy passes if needed
    data = None
    try:
        report_raw = eval_chain.invoke({"chapter": chapter_txt})
        report_txt = strip_think(report_raw)
        m = re.search(r"\{[\s\S]*\}\s*$", report_txt)
        if m:
            data = json.loads(m.group(0))
        if data and "scores" in data:
            scores = data["scores"]
            avg_score = sum(float(scores[k]) for k in scores)/len(scores)

            # Dialogue tuner if notably off target
            dr = approx_dialogue_ratio(chapter_txt)
            if abs(dr - dialogue_target) > 0.10:
                tuned = dialogue_tuner.invoke({"chapter": chapter_txt, "target": dialogue_target})
                chapter_txt = strip_think(tuned)

            # Decliché only when quality is mid or FAST_MODE off
            if (avg_score < 7.6) or (not FAST_MODE):
                polished = decliche_chain.invoke({"chapter": chapter_txt})
                chapter_txt = strip_think(polished)

            # Punch-up if still low
            if avg_score < 7.4:
                edits = " | ".join(data.get("three_micro_edits", [])) or \
                        "Sharpen hooks; escalate midpoint; add concrete sensory beats."
                punched = punchup_chain.invoke({"chapter": chapter_txt, "edits": edits})
                chapter_txt = strip_think(punched)
    except Exception:
        data = None

    # 6) Mine motifs from this chapter → update global ledger
    try:
        mined_raw = motif_miner.invoke({"chapter": chapter_txt})
        mined = parse_strict_json(mined_raw)
        for m in (mined.get("motifs") or []):
            if m not in MOTIF_LEDGER:
                MOTIF_LEDGER.append(m)
    except Exception:
        pass

    # Save checkpoint
    save_ckpt(idx, title, chapter_txt, data)
    return chapter_txt, data

# —— Early calibration: write first few chapters serially, resize outline
PREGEN = min(3, len(outline))
for i in range(PREGEN):
    ch_txt, notes = write_one(i)
    chap_texts[i] = ch_txt
    editor_notes[i] = notes

# Measure actual words/chapter and re-size remaining outline to hit target pages
actual_avg = max(500, sum(word_count(chap_texts[i]) for i in range(PREGEN)) // PREGEN)
recalc_num_ch = max(12, min(80, (TARGET_WORDS + actual_avg - 1) // actual_avg))

if recalc_num_ch != len(outline):
    delta = recalc_num_ch - len(outline)
    if delta > 0:
        ask_more = delta
        extra = robust_outline_batch(SEED_IDEA, ask_more)
        seen_titles = set([c["title"] for c in outline])
        for obj in extra:
            cand = {"title": (obj.get("title") or "").strip(),
                    "description": (obj.get("description") or "").strip()}
            if not cand["title"] or not cand["description"]:
                continue
            if cand["title"] in seen_titles:
                continue
            if any(too_similar(cand, e) for e in outline):
                continue
            outline.append(cand); seen_titles.add(cand["title"])
            chap_texts.append(None); editor_notes.append(None)
            if len(outline) >= recalc_num_ch: break
        print(f"🔁 Resized outline: +{max(0, delta)} → total {len(outline)} chapters")
    elif delta < 0:
        keep = max(PREGEN, recalc_num_ch)
        outline = outline[:keep]
        chap_texts = chap_texts[:keep]
        editor_notes = editor_notes[:keep]
        print(f"✂️  Trimmed outline to {len(outline)} chapters")

# Generate remaining chapters
remaining_idxs = [i for i, t in enumerate(chap_texts) if t is None]
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = { ex.submit(write_one, i): i for i in remaining_idxs }
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Chapters"):
        idx = futures[fut]
        try:
            ch_txt, notes = fut.result()
        except Exception as e:
            ch_txt, notes = f"[Generation failed: {e}]", None
        chap_texts[idx] = ch_txt
        editor_notes[idx] = notes

# ─────────────────────────────────────────────────────────────────────────────
# 7) Pre-save totals → Save to Word + Back-cover copy + Download
# ─────────────────────────────────────────────────────────────────────────────
total_words = sum(word_count(t or "") for t in chap_texts)
est_pages  = total_words / WORDS_PER_PAGE
suggested_ch = max(12, min(80, round(total_words / CH_TARGET_WORDS)))
print(f"🧮 Total words: {total_words:,}  → est. pages ≈ {est_pages:.0f}")
print(f"🔧 If you rerun, suggested chapters for this style ≈ {suggested_ch}")

doc = docx.Document()
doc.add_heading(BOOK_TITLE, 0)
doc.add_paragraph(f"Seed idea: {SEED_IDEA}")
doc.add_paragraph(f"Estimated pages: ~{est_pages:.0f}")

# Character Development Section
if characters:
    doc.add_page_break()
    doc.add_heading("Character Development", level=1)
    for c in characters:
        name = c.get("name","(Unnamed)")
        role = c.get("role","")
        arc  = c.get("development_arc","")
        doc.add_heading(name, level=2)
        if role: doc.add_paragraph(f"Role: {role}")
        if arc:  doc.add_paragraph(arc)

# Chapters
for i, (meta, text) in enumerate(zip(outline, chap_texts), start=1):
    doc.add_page_break()
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=1)
    doc.add_paragraph(meta['description'], style="Intense Quote")
    doc.add_paragraph((text or "").strip())

# Editor’s Notes (Auto-Eval)
doc.add_page_break()
doc.add_heading("Editor’s Notes (Auto-Eval)", level=1)
for i, (meta, notes) in enumerate(zip(outline, editor_notes), start=1):
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=2)
    if not notes or "scores" not in (notes or {}):
        doc.add_paragraph("No evaluation available.")
        continue
    scores = notes["scores"]
    one_liner = notes.get("one_sentence_note","")
    edits = notes.get("three_micro_edits", [])
    try:
        avg_score = sum(float(scores[k]) for k in scores)/len(scores)
    except Exception:
        avg_score = None
    doc.add_paragraph("Scores: " + ", ".join(f"{k}: {scores[k]}" for k in scores))
    if avg_score is not None:
        doc.add_paragraph(f"Average: {avg_score:.2f}")
    if one_liner:
        doc.add_paragraph(f"Note: {one_liner}")
    if edits:
        for e in edits:
            doc.add_paragraph(f"• {e}")

# Back-cover blurb + retailer hook + social snippets
blurb_data = {}
try:
    blurb_raw = blurb_chain.invoke({
        "title": BOOK_TITLE,
        "logline": THEME_BIBLE.get("logline",""),
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "promises": ", ".join(THEME_BIBLE.get("promises", []))
    })
    blurb_data = parse_strict_json(blurb_raw) or {}
except Exception:
    blurb_data = {}

doc.add_page_break()
doc.add_heading("Back-Cover Copy & Retailer Hook", level=1)
if blurb_data.get("blurb"):
    doc.add_paragraph(blurb_data["blurb"])
if blurb_data.get("product_hook"):
    doc.add_paragraph(f"\nRetailer Hook: {blurb_data['product_hook']}")
if blurb_data.get("snippets"):
    doc.add_heading("Short Social Snippets", level=2)
    for s in blurb_data["snippets"]:
        doc.add_paragraph(f"• {s}")

# Save & download
fn = BOOK_TITLE.replace(" ", "_") + ".docx"
doc.save(fn)
print(f"📘 Saved {fn}")
files.download(fn)


✅ Ollama health: 200
🖥️ GPU: not detected
✔ Using model: llama3.2:3b
✔ Using model: llama3.2:3b
🎯 Target pages: 320  → target words ≈ 88,000
📏 Chapter target: ~3,000 words → initial chapters: 30
→ Generating 30-chapter outline with robust fallbacks …
   ✓ Outline via planner JSON (partial array rescued)
   ✓ Outline via planner JSON (partial array rescued)
   ✓ Outline via planner JSON (partial array rescued)
   ✓ Outline via planner JSON (partial array rescued)
   · No unique chapters accepted from batch; retrying…
   ✓ Outline via planner JSON (partial array rescued)
   · No unique chapters accepted from batch; retrying…
   ✓ Outline via planner JSON (partial array rescued)
   · No unique chapters accepted from batch; retrying…
   ✓ Outline via planner JSON (partial array rescued)
   · No unique chapters accepted from batch; retrying…
   ✓ Outline via planner JSON (partial array rescued)
   · No unique chapters accepted from batch; retrying…
   ✓ Outline via planner JSON (partial arr

Chapters: 100%|██████████| 1/1 [20:37<00:00, 1237.66s/it]


🧮 Total words: 3,043  → est. pages ≈ 11
🔧 If you rerun, suggested chapters for this style ≈ 12
📘 Saved Artificial_Influencers_2.docx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1]:
# ─────────────────────────────────────────────────────────────────────────────
# 0) Colab Setup: install & launch Ollama (speed toggles, checkpoints, resolver)
# ─────────────────────────────────────────────────────────────────────────────
!pip install --quiet langchain-ollama python-docx tqdm

import os, threading, subprocess, time, requests, json, re, shutil, pathlib, math, sys, random
from typing import List, Any, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from tqdm import tqdm
import docx
from google.colab import files

# Avoid LangChain provider hijacks
for v in [
    "OPENAI_API_KEY",
    "LITELLM_PROVIDER", "LITELLM_MODEL", "LITELLM_BASE_URL",
    "LITELL M_PROVIDER", "LITELL M_MODEL", "LITELL M_BASE_URL"  # catch stray typos
]:
    os.environ.pop(v, None)

# —— Speed/robustness knobs ——
FAST_MODE = True                 # Flip to False when you're happy with the outputs
CHECKPOINT_DIR = "book_ckpt"     # Saves each chapter & metadata
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# Make Ollama conservative about parallelism in Colab
os.environ["OLLAMA_MAX_LOADED_MODELS"] = "1"
os.environ["OLLAMA_NUM_PARALLEL"] = "1"

# Launch Ollama daemon
os.environ["OLLAMA_HOST"]    = "127.0.0.1:11434"
os.environ["OLLAMA_ORIGINS"] = "*"

!curl -fsSL https://ollama.com/install.sh -o install.sh
!bash install.sh >/dev/null 2>&1 || true

def _serve_ollama():
    subprocess.Popen(["ollama","serve"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)

threading.Thread(target=_serve_ollama, daemon=True).start()
time.sleep(8)
print("✅ Ollama health:", requests.get("http://127.0.0.1:11434").status_code)

# GPU detection to size workers
def _has_gpu():
    try:
        return shutil.which("nvidia-smi") and (subprocess.run(["nvidia-smi"], capture_output=True).returncode==0)
    except Exception:
        return False
HAS_GPU = bool(_has_gpu())
print("🖥️ GPU:", "available" if HAS_GPU else "not detected")

# ─────────────────────────────────────────────────────────────────────────────
# Helper: choose models that actually exist locally/remote via pull
# ─────────────────────────────────────────────────────────────────────────────
def pick_first_available(candidates: List[str]) -> str:
    for m in candidates:
        try:
            r = subprocess.run(["ollama", "pull", m], capture_output=True, text=True)
            if r.returncode == 0:
                print(f"✔ Using model: {m}")
                return m
            else:
                print(f"✖ Pull failed for {m} → {r.stderr.strip() or r.stdout.strip()}")
        except Exception as e:
            print(f"✖ Error pulling {m}: {e}")
    raise RuntimeError(f"No candidate models could be pulled: {candidates}")

# Good planner-size candidates (small & widely available)
PLANNER_CANDIDATES = [
    "llama3.2:3b",
    "qwen2.5:3b",
    "phi3:3.8b-mini",
    "gemma2:2b",
    "mistral:7b",
    "llama3.1:8b",
]

# Writer candidates
WRITER_FAST_CANDIDATES = [
    "llama3.2:3b",
    "qwen2.5:3b",
    "phi3:3.8b-mini",
    "gemma2:2b",
    "mistral:7b",
    "llama3.1:8b",
]
WRITER_QUALITY_CANDIDATES = [
    "llama3.1:8b",
    "mistral:7b",
    "qwen2.5:7b",
]

# Resolve models now
PLANNER_MODEL = pick_first_available(PLANNER_CANDIDATES)
WRITER_MODEL  = pick_first_available(WRITER_FAST_CANDIDATES if FAST_MODE else WRITER_QUALITY_CANDIDATES)

# ─────────────────────────────────────────────────────────────────────────────
# 1) User parameters (auto size by target pages → chapters)
# ─────────────────────────────────────────────────────────────────────────────
BOOK_TITLE   = "Artificial Influencers 2"
MODE         = "fiction"  # or "nonfiction"

# Either set an explicit target page count, or leave None to auto-center the profile
TARGET_PAGES = 320 if MODE == "fiction" else 280

GENRE_PROFILE = {
    "fiction":   {"pages_min": 280, "pages_max": 360, "chapter_words_typical": (2400, 3600)},
    "nonfiction":{"pages_min": 220, "pages_max": 320, "chapter_words_typical": (3000, 4500)}
}[MODE]

if TARGET_PAGES is None:
    TARGET_PAGES = (GENRE_PROFILE["pages_min"] + GENRE_PROFILE["pages_max"]) // 2

# Words-per-page assumption (trade paperback-ish)
WORDS_PER_PAGE = 275
TARGET_WORDS   = int(TARGET_PAGES * WORDS_PER_PAGE)

# Choose a target chapter length (midpoint of typical band), and derive initial chapter count:
CH_MIN, CH_MAX = GENRE_PROFILE["chapter_words_typical"]
CH_TARGET_WORDS = int((CH_MIN + CH_MAX) / 2)
NUM_CH = max(12, min(80, (TARGET_WORDS + CH_TARGET_WORDS - 1) // CH_TARGET_WORDS))

# Your story seed:
SEED_IDEA = (
    "an idea for an alternative history story: What would have happended if Hillary Clinton  won the 2016 election instead of Donald J Trump."
)

print(f"🎯 Target pages: {TARGET_PAGES}  → target words ≈ {TARGET_WORDS:,}")
print(f"📏 Chapter target: ~{CH_TARGET_WORDS:,} words → initial chapters: {NUM_CH}")

# ─────────────────────────────────────────────────────────────────────────────
# 2) LLMs & Prompts (planner JSON+fallbacks baked in)
# ─────────────────────────────────────────────────────────────────────────────
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate  # modern import

PLANNER_NUM_PREDICT = 900 if FAST_MODE else 1400
WRITER_NUM_PREDICT  = 1600 if FAST_MODE else 3000
MAX_WORKERS = 1 if (FAST_MODE or not HAS_GPU) else 3   # conservative on CPU

# Planner: free-form and JSON-locked
planner_llm = OllamaLLM(
    model=PLANNER_MODEL,
    base_url="http://127.0.0.1:11434",
    temperature=0.25,
    num_predict=PLANNER_NUM_PREDICT,
)
planner_llm_json = OllamaLLM(
    model=PLANNER_MODEL,
    base_url="http://127.0.0.1:11434",
    temperature=0.2,
    num_predict=PLANNER_NUM_PREDICT,
    format="json",  # ask for pure JSON (we still robustly fallback)
)

writer_llm  = OllamaLLM(
    model=WRITER_MODEL,
    base_url="http://127.0.0.1:11434",
    temperature=0.8,
    num_ctx=4096,
    num_predict=WRITER_NUM_PREDICT,
)

# Outline prompts: JSON and NDJSON with "avoid" lists for diversity
OUTLINE_JSON_PROMPT = PromptTemplate(
    input_variables=["topic","count","avoid_titles","avoid_phrases"],
    template=(
"""Generate exactly {count} DIFFERENT chapter seeds for this novel as a JSON ARRAY.
Each item is an object: {{"title":"...", "description":"..."}}

HARD RULES:
- Vary SETTING, MODE OF CONFLICT, and REVERSAL TYPE across items.
- Avoid any titles in AVOID_TITLES and any phrases in AVOID_PHRASES.
- Return JSON ONLY. No commentary.

Book idea: {topic}
AVOID_TITLES: {avoid_titles}
AVOID_PHRASES: {avoid_phrases}
"""
    )
)

OUTLINE_NDJSON_PROMPT = PromptTemplate(
    input_variables=["topic","count","avoid_titles","avoid_phrases"],
    template=(
"""Generate exactly {count} DIFFERENT chapter seeds as NDJSON (one JSON object per line).
Each line: {{"title":"...", "description":"..."}}

HARD RULES:
- Vary SETTING, MODE OF CONFLICT, and REVERSAL TYPE across items.
- Avoid any titles in AVOID_TITLES and any phrases in AVOID_PHRASES.
- No numbering, no code fences, no commentary.

Book idea: {topic}
AVOID_TITLES: {avoid_titles}
AVOID_PHRASES: {avoid_phrases}
"""
    )
)

# Character prompt (JSON)
CHAR_JSON_PROMPT = PromptTemplate(
    input_variables=["outline","num_chars"],
    template=(
"""Given this chapter outline (JSON list): {outline}
Create exactly {num_chars} MAIN CHARACTERS as a JSON ARRAY.
Each item: {{"name":"...","role":"...","development_arc":"..."}}
Return JSON ONLY. No commentary."""
    )
)

# Chapter & editing prompts
chapter_prompt = PromptTemplate(
    input_variables=["title","description","idea"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Chapter description: "{description}"

Constraints:
- NO meta commentary, NO analysis of your process, NO decision making.
- Assume the reader remembers prior chapters; do not re-explain backstory.
- Maintain continuity, but introduce at least one fresh obstacle, one vivid sensory beat, and one believable surprise.
- Use concrete, precise details; avoid clichés.
- End with a small but real unresolved tension.
Return TEXT ONLY.
"""
    )
)

REVISION_PROMPT = PromptTemplate(
    input_variables=["chapter","title","description","ledger"],
    template=(
"""Revise the chapter to reduce repetition with earlier chapters while improving novelty and tension.
Keep the same characters and continuity, but change scene dynamics, setting details, and micro-beats.

Rules:
- Do NOT re-explain backstory already known.
- Add at least one fresh obstacle, one specific sensory detail, and one surprising but plausible turn.
- Preserve voice and POV.
Return TEXT ONLY.

Title: {title}
Description: {description}

Do-not-repeat ledger (phrases/scenes to avoid): {ledger}

Chapter draft:
{chapter}
"""
    )
)

EVAL_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""You are a tough fiction editor. Rate the chapter (1–10) on:
- pacing
- tension
- voice
- imagery
- dialogue
- novelty

Return STRICT JSON only:
{{"scores":{{"pacing":x,"tension":x,"voice":x,"imagery":x,"dialogue":x,"novelty":x}},"one_sentence_note":"...","three_micro_edits":["...","...","..."]}}

Chapter:
{chapter}
"""
    )
)

PUNCHUP_PROMPT = PromptTemplate(
    input_variables=["chapter","edits"],
    template=(
"""Apply these micro-edits to strengthen the chapter without changing the plot:
- {edits}

Rules:
- Keep POV, continuity, and length roughly the same (±10%).
- Add concrete sensory details.
- Tighten weak sentences; remove clichés.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

# Theme/Motif bible, beats, dialogue tuner, decliche, motif miner, blurb
THEME_PROMPT = PromptTemplate(
    input_variables=["topic","outline"],
    template=(
"""From the seed idea and outline, produce STRICT JSON:
{{
  "themes": ["..."],         # 3–5 core themes
  "motifs": ["..."],         # 6–12 recurring props/images
  "promises": ["..."],       # 3–6 promises to the reader
  "logline": "...",          # one sentence
  "genre_signals": ["..."]   # 5–8 setting/imagery signals
}}
Seed idea: {topic}
Outline: {outline}
"""
    )
)

BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","theme_bible","motif_ledger"],
    template=(
"""Plan a beat sheet for the chapter as STRICT JSON:
{{
  "beats": [
    {{"name":"Hook","goal":"...","conflict":"...","setting":"...","emotion":"..."}},
    ...
  ],
  "dialogue_target_pct": 0.38,
  "sensory_palette": ["sound","smell"],
  "foreshadow":"...",
  "callback_motif":"..."
}}
Requirements:
- 8–12 beats, with a mid-chapter reversal and a cliffhanger/stinger.
- Use 1–2 motifs from MOTIF_LEDGER and add 1 fresh setting element aligned to GENRE_SIGNALS.

TITLE: {title}
DESC: {description}
THEME_BIBLE: {theme_bible}
MOTIF_LEDGER: {motif_ledger}
"""
    )
)

CHAPTER_WITH_BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","idea","plan","themes","sensory_palette","dialogue_target"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".

Seed idea: {idea}
Mini-brief: {description}
Plan (beats): {plan}

Must do:
- OPEN with a punchy 1–2 sentence HOOK that raises a concrete question.
- Emphasize SENSORY PALETTE: {sensory_palette}
- Aim for DIALOGUE DENSITY ≈ {dialogue_target:.2f} (about 30–45% lines include dialogue).
- Integrate 1 motif or prop from the plan naturally.
- Midpoint reversal that reframes stakes.
- END with a plausible CLIFFHANGER/STINGER (no meta).

Style:
- Concrete details, crisp verbs; avoid clichés.
- Maintain POV and continuity; no backstory dumps.

Themes to subtly reinforce: {themes}
Return TEXT ONLY.
"""
    )
)

DIALOGUE_TUNER_PROMPT = PromptTemplate(
    input_variables=["chapter","target"],
    template=(
"""Revise the chapter to adjust dialogue density to ≈ {target:.2f} (±0.08).
Keep plot and beats intact. Do not shorten by more than 10% or lengthen by more than 10%.
Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

DECLICHE_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Rewrite at sentence-level to remove clichés and filler.
Replace with specific, concrete imagery fitting a near-future techno-thriller vibe.
Preserve plot, POV, beats, and length (±5%). Return TEXT ONLY.

Chapter:
{chapter}
"""
    )
)

MOTIF_MINER_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Extract 1–3 recurring motifs/props/images present in this chapter (short noun phrases).
Return STRICT JSON: {{"motifs":["..."]}}

Chapter:
{chapter}
"""
    )
)

BLURB_PROMPT = PromptTemplate(
    input_variables=["title","logline","themes","promises"],
    template=(
"""Write STRICT JSON:
{{
  "blurb":"...",           # 120–160 words
  "product_hook":"...",    # 1 sentence
  "snippets": ["...","...","..."]  # ≤140 chars each
}}
Title: {title}
Logline: {logline}
Themes: {themes}
Promises: {promises}
"""
    )
)

# Build chains
outline_chain_json_planner  = OUTLINE_JSON_PROMPT   | planner_llm_json   # JSON mode
outline_chain_ndjson_plan   = OUTLINE_NDJSON_PROMPT | planner_llm        # NDJSON (planner)
outline_chain_ndjson_writer = OUTLINE_NDJSON_PROMPT | writer_llm         # NDJSON (writer)

character_chain_json = CHAR_JSON_PROMPT | planner_llm_json

chapter_chain     = chapter_prompt               | writer_llm
revision_chain    = REVISION_PROMPT              | writer_llm
eval_chain        = EVAL_PROMPT                  | planner_llm
punchup_chain     = PUNCHUP_PROMPT               | writer_llm

theme_chain       = THEME_PROMPT                 | planner_llm
beats_chain       = BEATS_PROMPT                 | planner_llm
chapter_beats_llm = CHAPTER_WITH_BEATS_PROMPT    | writer_llm
dialogue_tuner    = DIALOGUE_TUNER_PROMPT        | writer_llm
decliche_chain    = DECLICHE_PROMPT              | writer_llm
motif_miner       = MOTIF_MINER_PROMPT           | planner_llm
blurb_chain       = BLURB_PROMPT                 | planner_llm

# ─────────────────────────────────────────────────────────────────────────────
# 3) Utilities: parse, similarity, IO helpers
# ─────────────────────────────────────────────────────────────────────────────
THINK_RE = re.compile(r"<think>.*?</think>\s*", flags=re.S|re.I)
FENCE_RE = re.compile(r"```(?:json)?|```", flags=re.I)

def strip_think(x: Any) -> str:
    s = x["text"] if isinstance(x, dict) and "text" in x else str(x)
    s = THINK_RE.sub("", s)
    s = FENCE_RE.sub("", s)
    return s.strip()

def parse_json_value(s: str):
    """Parse last JSON value ({...} or [...]) in a string; return Python obj or None."""
    s = strip_think(s)
    m = re.search(r"(\{.*\}|\[.*\])\s*$", s, flags=re.S)
    if not m: return None
    blob = m.group(1)
    try:
        return json.loads(blob)
    except Exception:
        fix = blob.replace("“","\"").replace("”","\"").replace("’","'")
        fix = re.sub(r",\s*}", "}", fix); fix = re.sub(r",\s*]", "]", fix)
        try: return json.loads(fix)
        except Exception: return None

def parse_strict_json(s: str) -> dict:
    obj = parse_json_value(s)
    return obj if isinstance(obj, dict) else {}

# Extract individual JSON objects even if an array is truncated
OBJ_RE = re.compile(r"\{(?:[^{}]|\"[^\"\\]*(?:\\.[^\"\\]*)*\")*\}")
def extract_json_objects(text: str):
    text = strip_think(text)
    objs = []
    for m in OBJ_RE.finditer(text):
        blob = m.group(0)
        try:
            obj = json.loads(blob)
            if isinstance(obj, dict):
                objs.append(obj)
        except Exception:
            try:
                fix = blob.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                obj = json.loads(fix)
                if isinstance(obj, dict):
                    objs.append(obj)
            except Exception:
                pass
    return objs

def jaccard(a: str, b: str) -> float:
    A = set(re.findall(r"[a-z0-9']+", (a or "").lower()))
    B = set(re.findall(r"[a-z0-9']+", (b or "").lower()))
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

# Dedup thresholds — slightly relaxed to avoid over-filtering on small models
def too_similar_relaxed(ch1: Dict[str,str], ch2: Dict[str,str]) -> bool:
    t_sim = jaccard(ch1["title"], ch2["title"])
    d_sim = jaccard(ch1["description"], ch2["description"])
    return (t_sim > 0.80) or (t_sim > 0.55 and d_sim > 0.62)

def bigram_overlap(a: str, b: str) -> float:
    def bigrams(s):
        toks = re.findall(r"[a-z0-9']+", (s or "").lower())
        return set(zip(toks, toks[1:])) if len(toks) > 1 else set()
    A, B = bigrams(a), bigrams(b)
    denom = len(A | B) if (A or B) else 1
    return len(A & B) / denom

def top_trigrams(text: str, k: int = 30) -> List[str]:
    toks = re.findall(r"[a-z0-9']+", (text or "").lower())
    tris = Counter(zip(toks, toks[1:], toks[2:]))
    return [" ".join(t) for t,_ in tris.most_common(k)]

def approx_dialogue_ratio(text: str) -> float:
    lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
    if not lines: return 0.0
    dial = sum(1 for ln in lines if re.search(r'["“”]|^—', ln))
    return dial / max(1, len(lines))

def word_count(text: str) -> int:
    return len(re.findall(r"[A-Za-z0-9']+", text or ""))

# Checkpoint helpers
def _slug(s):
    return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:60]

def _ck_paths(i, title):
    base = f"{i:03d}-{_slug(title)}"
    p = pathlib.Path(CHECKPOINT_DIR)
    return p / (base + ".txt"), p / (base + ".json")

def save_ckpt(i, title, text, notes):
    p_txt, p_meta = _ck_paths(i, title)
    p_txt.write_text(text or "", encoding="utf-8")
    meta = {"chapter_index": i, "title": title, "notes": notes}
    p_meta.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

def load_ckpt_if_any(i, title):
    p_txt, p_meta = _ck_paths(i, title)
    if p_txt.exists():
        text = p_txt.read_text(encoding="utf-8")
        notes = None
        if p_meta.exists():
            try: notes = json.loads(p_meta.read_text(encoding="utf-8"))
            except Exception: notes = None
        return text, notes
    return None, None

# ─────────────────────────────────────────────────────────────────────────────
# 4) Booster-grade OUTLINE generator (JSON→NDJSON; planner→writer; avoid lists)
# ─────────────────────────────────────────────────────────────────────────────
def clean_outline_items(items):
    out = []
    for obj in items:
        if not isinstance(obj, dict):
            continue
        title = (obj.get("title") or "").strip()
        desc  = (obj.get("description") or "").strip()
        if title and desc:
            out.append({"title": title, "description": desc})
    return out

def robust_outline_batch(topic: str, ask: int, avoid_titles: List[str], avoid_phrases: List[str]):
    payload = {
        "topic": topic,
        "count": ask,
        "avoid_titles": ", ".join(sorted(set(avoid_titles))[:50]),
        "avoid_phrases": ", ".join(sorted(set(avoid_phrases))[:50]),
    }
    # (A) Planner JSON
    try:
        raw = outline_chain_json_planner.invoke(payload)
        arr = parse_json_value(raw)
        if isinstance(arr, list) and arr:
            print("   ✓ planner JSON")
            return clean_outline_items(arr)
        objs = extract_json_objects(str(raw))
        if objs:
            print("   ✓ planner JSON (rescued)")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · planner JSON failed: {e}")

    # (B) Planner NDJSON
    try:
        raw = outline_chain_ndjson_plan.invoke(payload)
        lines = [ln for ln in strip_think(raw).splitlines() if ln.strip()]
        objs = []
        for ln in lines:
            try:
                objs.append(json.loads(ln))
            except Exception:
                fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                try: objs.append(json.loads(fix))
                except Exception: pass
        if objs:
            print("   ✓ planner NDJSON")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · planner NDJSON failed: {e}")

    # (C) Writer NDJSON
    try:
        raw = outline_chain_ndjson_writer.invoke(payload)
        lines = [ln for ln in strip_think(raw).splitlines() if ln.strip()]
        objs = []
        for ln in lines:
            try:
                objs.append(json.loads(ln))
            except Exception:
                fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                try: objs.append(json.loads(fix))
                except Exception: pass
        if objs:
            print("   ✓ writer NDJSON")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · writer NDJSON failed: {e}")

    # (D) Writer JSON (on demand) — sometimes strong for small asks
    try:
        outline_chain_json_writer = OUTLINE_JSON_PROMPT | writer_llm
        raw = outline_chain_json_writer.invoke(payload)
        arr = parse_json_value(raw)
        if isinstance(arr, list) and arr:
            print("   ✓ writer JSON")
            return clean_outline_items(arr)
        objs = extract_json_objects(str(raw))
        if objs:
            print("   ✓ writer JSON (rescued)")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · writer JSON failed: {e}")

    # (E) Stub fallback to guarantee forward progress
    print("   ⚠️ stub fallback")
    TEMPLATES = [
        ("A Cold Start", "A data blackout forces Lena to rely on analog sleuthing; a new ally reveals a risky lead."),
        ("Flood the Zone", "Bots overwhelm a protest; Lena must choose between saving one friend or saving the dataset."),
        ("Paper Trail", "A mundane invoice uncovers a laundering loop; a quiet break-in turns into a chase."),
        ("Mirror Test", "An influencer glitches live; the team stages a Turing-style trap with unintended consequences."),
        ("Dead Channel", "The platform buries a scandal; Lena weaponizes a forgotten API endpoint to surface the truth."),
        ("The Honeypot", "A romance subplot intersects with an op; trust fractures as a private archive leaks."),
        ("Proxy War", "A rival startup dangles a deal; double agents swap models and the ground truth shifts."),
    ]
    random.shuffle(TEMPLATES)
    return [{"title": t, "description": d} for t,d in TEMPLATES[:ask]]

print(f"→ Generating {NUM_CH}-chapter outline (booster mode) …")
outline, seen_titles = [], set()
needed = NUM_CH
chunk = 3  # small asks are more reliable on CPU
attempts, MAX_ATTEMPTS = 0, 60

while len(outline) < needed and attempts < MAX_ATTEMPTS:
    attempts += 1
    ask = min(chunk, needed - len(outline))
    # Build avoid lists from what we already have to promote variety
    avoid_titles = list(seen_titles)
    recent_phrases = []
    for it in outline[-12:]:
        recent_phrases.extend(top_trigrams(it.get("description",""), k=6))
    batch = robust_outline_batch(SEED_IDEA, ask, avoid_titles, recent_phrases)
    added = 0
    for ch in batch:
        if not ch["title"] or not ch["description"]:
            continue
        if ch["title"] in seen_titles:
            continue
        if any(too_similar_relaxed(ch, e) for e in outline):
            continue
        outline.append(ch)
        seen_titles.add(ch["title"])
        added += 1
        if len(outline) >= needed:
            break
    print(f"   → accepted {added}; total now {len(outline)}/{needed}")
    if added == 0:
        print("   · No unique chapters accepted from batch; retrying…")

print(f"✔ Final outline: {len(outline)} chapters\n")
if len(outline) < needed:
    print("⚠️ Could not reach target; proceeding with what we have.")

# ─────────────────────────────────────────────────────────────────────────────
# 5) Character Bible (robust JSON) + Theme/Motif Bible
# ─────────────────────────────────────────────────────────────────────────────
def robust_characters(outline_list, n_chars):
    # Try planner JSON first; then writer JSON; finally stub trio
    try:
        raw = character_chain_json.invoke({
            "outline": json.dumps(outline_list, ensure_ascii=False),
            "num_chars": n_chars
        })
        arr = parse_json_value(raw)
        if isinstance(arr, list) and len(arr) >= min(3, n_chars//2):
            return [c for c in arr if isinstance(c, dict)][:n_chars]
    except Exception:
        pass
    try:
        char_chain_json_writer = CHAR_JSON_PROMPT | writer_llm
        raw = char_chain_json_writer.invoke({
            "outline": json.dumps(outline_list, ensure_ascii=False),
            "num_chars": n_chars
        })
        arr = parse_json_value(raw)
        if isinstance(arr, list) and len(arr) >= min(3, n_chars//2):
            return [c for c in arr if isinstance(c, dict)][:n_chars]
    except Exception:
        pass
    return [
        {"name": "Lena Park", "role": "Protagonist",
         "development_arc": "From isolated analyst to whistleblower forging unlikely alliances."},
        {"name": "Mara Voss", "role": "Rising Influencer",
         "development_arc": "Charismatic star reveals engineered persona; torn between truth and fame."},
        {"name": "Rex Calder", "role": "Datum Executive",
         "development_arc": "Mentor becomes antagonist entangled in AI-influence scheme."},
    ][:n_chars]

NUM_CHAR = max(3, min(10, len(outline)//8))
print(f"→ Generating {NUM_CHAR} characters (robust)…")
characters = robust_characters(outline, NUM_CHAR)
print(f"✔ Got {len(characters)} characters\n")

print("→ Building theme/motif bible…")
theme_raw = theme_chain.invoke({
    "topic": SEED_IDEA,
    "outline": json.dumps(outline, ensure_ascii=False)
})
THEME_BIBLE = parse_strict_json(theme_raw) or {
    "themes": [],
    "motifs": [],
    "promises": [],
    "logline": "",
    "genre_signals": []
}
MOTIF_LEDGER = list(THEME_BIBLE.get("motifs", []))  # seed with global motifs
print("✔ Theme bible ready.\n")

# ─────────────────────────────────────────────────────────────────────────────
# 6) Chapter Generation with beats + checkpointing + early re-calibration
# ─────────────────────────────────────────────────────────────────────────────
print("→ Generating chapters…")
chap_texts = [None]*len(outline)
editor_notes = [None]*len(outline)
BIGRAM_THRESHOLD = 0.22  # repetition strictness

def write_one(idx):
    meta = outline[idx]
    title = meta["title"]

    # Resume if checkpoint exists
    cached_txt, cached_notes = load_ckpt_if_any(idx, title)
    if cached_txt:
        return cached_txt, cached_notes

    # 1) Plan beats with callbacks to existing motif ledger
    plan_raw = beats_chain.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "theme_bible": json.dumps(THEME_BIBLE, ensure_ascii=False),
        "motif_ledger": json.dumps(MOTIF_LEDGER[-12:], ensure_ascii=False)
    })
    plan = parse_strict_json(plan_raw)
    dialogue_target = float(plan.get("dialogue_target_pct", 0.36))
    sensory_palette = plan.get("sensory_palette", ["sight","sound"])
    plan_json = json.dumps(plan.get("beats", []), ensure_ascii=False)

    # 2) Draft with hooks, sensory palette, dialogue target
    res = chapter_beats_llm.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "idea": SEED_IDEA,
        "plan": plan_json,
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "sensory_palette": ", ".join(sensory_palette),
        "dialogue_target": dialogue_target
    })
    chapter_txt = strip_think(res)

    # 3) Repetition guard vs earlier chapters
    ledger = []
    for j in range(idx):
        prev = chap_texts[j]
        if not prev: continue
        if bigram_overlap(chapter_txt, prev) > BIGRAM_THRESHOLD:
            ledger.extend(top_trigrams(prev, k=10))
    ledger = list(dict.fromkeys(ledger))[:60]
    if ledger:
        revised = revision_chain.invoke({
            "chapter": chapter_txt,
            "title": meta["title"],
            "description": meta["description"],
            "ledger": "; ".join(ledger)
        })
        chapter_txt = strip_think(revised)

    # 4/5) Auto-evaluation → conditional polish
    data = None
    try:
        report_raw = eval_chain.invoke({"chapter": chapter_txt})
        report_txt = strip_think(report_raw)
        m = re.search(r"\{[\s\S]*\}\s*$", report_txt)
        if m:
            data = json.loads(m.group(0))
        if data and "scores" in data:
            scores = data["scores"]
            avg_score = sum(float(scores[k]) for k in scores)/len(scores)

            # Dialogue tuner if notably off target
            dr = approx_dialogue_ratio(chapter_txt)
            if abs(dr - dialogue_target) > 0.10:
                tuned = dialogue_tuner.invoke({"chapter": chapter_txt, "target": dialogue_target})
                chapter_txt = strip_think(tuned)

            # Decliché only when quality is mid or FAST_MODE off
            if (avg_score < 7.6) or (not FAST_MODE):
                polished = decliche_chain.invoke({"chapter": chapter_txt})
                chapter_txt = strip_think(polished)

            # Punch-up if still low
            if avg_score < 7.4:
                edits = " | ".join(data.get("three_micro_edits", [])) or \
                        "Sharpen hooks; escalate midpoint; add concrete sensory beats."
                punched = punchup_chain.invoke({"chapter": chapter_txt, "edits": edits})
                chapter_txt = strip_think(punched)
    except Exception:
        data = None

    # 6) Mine motifs from this chapter → update global ledger
    try:
        mined_raw = motif_miner.invoke({"chapter": chapter_txt})
        mined = parse_strict_json(mined_raw)
        for m in (mined.get("motifs") or []):
            if m not in MOTIF_LEDGER:
                MOTIF_LEDGER.append(m)
    except Exception:
        pass

    # Save checkpoint
    save_ckpt(idx, title, chapter_txt, data)
    return chapter_txt, data

# —— Early calibration: write first few chapters serially, resize outline
PREGEN = min(3, len(outline))
for i in range(PREGEN):
    ch_txt, notes = write_one(i)
    chap_texts[i] = ch_txt
    editor_notes[i] = notes

# Measure actual words/chapter and re-size remaining outline to hit target pages
actual_avg = max(500, sum(word_count(chap_texts[i]) for i in range(PREGEN)) // PREGEN)
recalc_num_ch = max(12, min(80, (TARGET_WORDS + actual_avg - 1) // actual_avg))

if recalc_num_ch != len(outline):
    delta = recalc_num_ch - len(outline)
    if delta > 0:
        ask_more = delta
        print(f"🔁 Resizing outline: need +{delta} more chapters…")
        extra = robust_outline_batch(SEED_IDEA, ask_more, list(seen_titles), [])
        for obj in extra:
            cand = {"title": (obj.get("title") or "").strip(),
                    "description": (obj.get("description") or "").strip()}
            if not cand["title"] or not cand["description"]:
                continue
            if cand["title"] in seen_titles:
                continue
            if any(too_similar_relaxed(cand, e) for e in outline):
                continue
            outline.append(cand); seen_titles.add(cand["title"])
            chap_texts.append(None); editor_notes.append(None)
            if len(outline) >= recalc_num_ch: break
        print(f"   → total {len(outline)} chapters after resize")
    elif delta < 0:
        keep = max(PREGEN, recalc_num_ch)
        outline = outline[:keep]
        chap_texts = chap_texts[:keep]
        editor_notes = editor_notes[:keep]
        print(f"✂️  Trimmed outline to {len(outline)} chapters")

# Generate remaining chapters
remaining_idxs = [i for i, t in enumerate(chap_texts) if t is None]
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = { ex.submit(write_one, i): i for i in remaining_idxs }
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Chapters"):
        idx = futures[fut]
        try:
            ch_txt, notes = fut.result()
        except Exception as e:
            ch_txt, notes = f"[Generation failed: {e}]", None
        chap_texts[idx] = ch_txt
        editor_notes[idx] = notes

# ─────────────────────────────────────────────────────────────────────────────
# 7) Pre-save totals → Save to Word + Back-cover copy + Download
# ─────────────────────────────────────────────────────────────────────────────
total_words = sum(word_count(t or "") for t in chap_texts)
est_pages  = total_words / WORDS_PER_PAGE
suggested_ch = max(12, min(80, round(total_words / CH_TARGET_WORDS)))
print(f"🧮 Total words: {total_words:,}  → est. pages ≈ {est_pages:.0f}")
print(f"🔧 If you rerun, suggested chapters for this style ≈ {suggested_ch}")

doc = docx.Document()
doc.add_heading(BOOK_TITLE, 0)
doc.add_paragraph(f"Seed idea: {SEED_IDEA}")
doc.add_paragraph(f"Estimated pages: ~{est_pages:.0f}")

# Character Development Section
if characters:
    doc.add_page_break()
    doc.add_heading("Character Development", level=1)
    for c in characters:
        name = c.get("name","(Unnamed)")
        role = c.get("role","")
        arc  = c.get("development_arc","")
        doc.add_heading(name, level=2)
        if role: doc.add_paragraph(f"Role: {role}")
        if arc:  doc.add_paragraph(arc)

# Chapters
for i, (meta, text) in enumerate(zip(outline, chap_texts), start=1):
    doc.add_page_break()
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=1)
    doc.add_paragraph(meta['description'], style="Intense Quote")
    doc.add_paragraph((text or "").strip())

# Editor’s Notes (Auto-Eval)
doc.add_page_break()
doc.add_heading("Editor’s Notes (Auto-Eval)", level=1)
for i, (meta, notes) in enumerate(zip(outline, editor_notes), start=1):
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=2)
    if not notes or "scores" not in (notes or {}):
        doc.add_paragraph("No evaluation available.")
        continue
    scores = notes["scores"]
    one_liner = notes.get("one_sentence_note","")
    edits = notes.get("three_micro_edits", [])
    try:
        avg_score = sum(float(scores[k]) for k in scores)/len(scores)
    except Exception:
        avg_score = None
    doc.add_paragraph("Scores: " + ", ".join(f"{k}: {scores[k]}" for k in scores))
    if avg_score is not None:
        doc.add_paragraph(f"Average: {avg_score:.2f}")
    if one_liner:
        doc.add_paragraph(f"Note: {one_liner}")
    if edits:
        for e in edits:
            doc.add_paragraph(f"• {e}")

# Back-cover blurb + retailer hook + social snippets
blurb_data = {}
try:
    blurb_raw = blurb_chain.invoke({
        "title": BOOK_TITLE,
        "logline": THEME_BIBLE.get("logline",""),
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "promises": ", ".join(THEME_BIBLE.get("promises", []))
    })
    blurb_data = parse_strict_json(blurb_raw) or {}
except Exception:
    blurb_data = {}

doc.add_page_break()
doc.add_heading("Back-Cover Copy & Retailer Hook", level=1)
if blurb_data.get("blurb"):
    doc.add_paragraph(blurb_data["blurb"])
if blurb_data.get("product_hook"):
    doc.add_paragraph(f"\nRetailer Hook: {blurb_data['product_hook']}")
if blurb_data.get("snippets"):
    doc.add_heading("Short Social Snippets", level=2)
    for s in blurb_data["snippets"]:
        doc.add_paragraph(f"• {s}")

# Save & download
fn = BOOK_TITLE.replace(" ", "_") + ".docx"
doc.save(fn)
print(f"📘 Saved {fn}")
files.download(fn)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/253.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m245.8/253.0 kB[0m [31m12.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Ollama health: 200
🖥️ GPU: not detected
✔ Using model: llama3.2:3b
✔ Using model: llama3.2:3b
🎯 Target pages: 320  → target words ≈ 88,000
📏 Chapter target: ~3,000 words → initial chapters: 30
→ Generating 30-chapter outline (booster mode) …
   ✓ planner JSON (rescued)
   → accepted 1; total now 1/30
   ✓ planner JSON (rescued)
   → accepted 1; total now 2/30
   ✓ planner JSON (rescued)
   → accepted 1; total now 3/30
   ✓ planner JSON (rescued)
   → accepted 1; total now 4/30
   ✓ planner JSON (rescued)
   → accepted 1; total now 5/30
   ✓ planner JSON (rescued)
   → accepted 1; total now 6/30
   ✓ planner JSON 

Chapters: 100%|██████████| 16/16 [6:15:53<00:00, 1409.59s/it]


🧮 Total words: 15,899  → est. pages ≈ 58
🔧 If you rerun, suggested chapters for this style ≈ 12
📘 Saved Artificial_Influencers_2.docx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [2]:
# ─────────────────────────────────────────────────────────────────────────────
# 0) Colab Setup: install & launch Ollama (+ research agent deps, toggles)
# ─────────────────────────────────────────────────────────────────────────────
!pip install --quiet langchain-ollama python-docx tqdm duckduckgo-search trafilatura readability-lxml

import os, threading, subprocess, time, requests, json, re, shutil, pathlib, sys, random, math
from typing import List, Any, Dict, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from tqdm import tqdm
import docx
from google.colab import files

# Avoid LangChain provider hijacks
for v in [
    "OPENAI_API_KEY",
    "LITELLM_PROVIDER", "LITELLM_MODEL", "LITELLM_BASE_URL",
    "LITELL M_PROVIDER", "LITELL M_MODEL", "LITELL M_BASE_URL"
]:
    os.environ.pop(v, None)

# —— Toggles ——
FAST_MODE = True                   # faster pass with lighter sampling
RESEARCH_AGENT_ENABLED = True      # turn on/off the web research step
CHECKPOINT_DIR = "book_ckpt"       # per-chapter cache
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# Conservative Ollama parallelism for Colab
os.environ["OLLAMA_MAX_LOADED_MODELS"] = "1"
os.environ["OLLAMA_NUM_PARALLEL"] = "1"

# Launch Ollama
os.environ["OLLAMA_HOST"]    = "127.0.0.1:11434"
os.environ["OLLAMA_ORIGINS"] = "*"
!curl -fsSL https://ollama.com/install.sh -o install.sh
!bash install.sh >/dev/null 2>&1 || true

def _serve_ollama():
    subprocess.Popen(["ollama","serve"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
threading.Thread(target=_serve_ollama, daemon=True).start()
time.sleep(8)
print("✅ Ollama health:", requests.get("http://127.0.0.1:11434").status_code)

def _has_gpu():
    try:
        return shutil.which("nvidia-smi") and (subprocess.run(["nvidia-smi"], capture_output=True).returncode==0)
    except Exception:
        return False
HAS_GPU = bool(_has_gpu())
print("🖥️ GPU:", "available" if HAS_GPU else "not detected")

# ─────────────────────────────────────────────────────────────────────────────
# 1) Model resolver: pick the first available model from candidates
# ─────────────────────────────────────────────────────────────────────────────
def pick_first_available(candidates: List[str]) -> str:
    for m in candidates:
        try:
            r = subprocess.run(["ollama", "pull", m], capture_output=True, text=True)
            if r.returncode == 0:
                print(f"✔ Using model: {m}")
                return m
            else:
                print(f"✖ Pull failed for {m} → {r.stderr.strip() or r.stdout.strip()}")
        except Exception as e:
            print(f"✖ Error pulling {m}: {e}")
    raise RuntimeError(f"No candidate models could be pulled: {candidates}")

PLANNER_CANDIDATES = ["llama3.2:3b","qwen2.5:3b","phi3:3.8b-mini","gemma2:2b","mistral:7b","llama3.1:8b"]
WRITER_FAST_CANDIDATES = ["llama3.2:3b","qwen2.5:3b","phi3:3.8b-mini","gemma2:2b","mistral:7b","llama3.1:8b"]
WRITER_QUALITY_CANDIDATES = ["llama3.1:8b","mistral:7b","qwen2.5:7b"]

PLANNER_MODEL = pick_first_available(PLANNER_CANDIDATES)
WRITER_MODEL  = pick_first_available(WRITER_FAST_CANDIDATES if FAST_MODE else WRITER_QUALITY_CANDIDATES)

# ─────────────────────────────────────────────────────────────────────────────
# 2) User targets: auto pages → words → chapters
# ─────────────────────────────────────────────────────────────────────────────
BOOK_TITLE   = "What Could Have Been: An Alternative History"
MODE         = "fiction"  # or "nonfiction"
TARGET_PAGES = 320 if MODE == "fiction" else 280

GENRE_PROFILE = {
    "fiction":   {"pages_min": 280, "pages_max": 360, "chapter_words_typical": (2400, 3600)},
    "nonfiction":{"pages_min": 220, "pages_max": 320, "chapter_words_typical": (3000, 4500)}
}[MODE]

if TARGET_PAGES is None:
    TARGET_PAGES = (GENRE_PROFILE["pages_min"] + GENRE_PROFILE["pages_max"]) // 2

WORDS_PER_PAGE = 275
TARGET_WORDS   = int(TARGET_PAGES * WORDS_PER_PAGE)

CH_MIN, CH_MAX = GENRE_PROFILE["chapter_words_typical"]
CH_TARGET_WORDS = int((CH_MIN + CH_MAX) / 2)
NUM_CH = max(12, min(80, (TARGET_WORDS + CH_TARGET_WORDS - 1) // CH_TARGET_WORDS))

SEED_IDEA = ("Counterfactual: Hillary Clinton wins the 2016 U.S. election. "
             "Track real 2017–2021 events as baseline, then explore plausible divergences "
             "in domestic policy, foreign affairs, courts, and tech/social media dynamics.")

print(f"🎯 Target pages: {TARGET_PAGES}  → target words ≈ {TARGET_WORDS:,}")
print(f"📏 Chapter target: ~{CH_TARGET_WORDS:,} words → initial chapters: {NUM_CH}")

# ─────────────────────────────────────────────────────────────────────────────
# 3) LLMs & prompts (incl. research-aware chapter prompt)
# ─────────────────────────────────────────────────────────────────────────────
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate

PLANNER_NUM_PREDICT = 900 if FAST_MODE else 1400
WRITER_NUM_PREDICT  = 1600 if FAST_MODE else 3000
MAX_WORKERS = 1 if (FAST_MODE or not HAS_GPU) else 3

planner_llm = OllamaLLM(model=PLANNER_MODEL, base_url="http://127.0.0.1:11434",
                        temperature=0.25, num_predict=PLANNER_NUM_PREDICT)
planner_llm_json = OllamaLLM(model=PLANNER_MODEL, base_url="http://127.0.0.1:11434",
                             temperature=0.2, num_predict=PLANNER_NUM_PREDICT, format="json")
writer_llm  = OllamaLLM(model=WRITER_MODEL, base_url="http://127.0.0.1:11434",
                        temperature=0.8, num_ctx=4096, num_predict=WRITER_NUM_PREDICT)

# Outline prompts with diversity/avoid lists
OUTLINE_JSON_PROMPT = PromptTemplate(
    input_variables=["topic","count","avoid_titles","avoid_phrases"],
    template=(
"""Generate exactly {count} DIFFERENT chapter seeds for this novel as a JSON ARRAY.
Each item: {{"title":"...", "description":"..."}}
Rules:
- Vary SETTING, MODE OF CONFLICT, and REVERSAL TYPE across items.
- Avoid any titles in AVOID_TITLES and any phrases in AVOID_PHRASES.
- Return JSON ONLY (no commentary).
Book idea: {topic}
AVOID_TITLES: {avoid_titles}
AVOID_PHRASES: {avoid_phrases}
"""))

OUTLINE_NDJSON_PROMPT = PromptTemplate(
    input_variables=["topic","count","avoid_titles","avoid_phrases"],
    template=(
"""Generate exactly {count} DIFFERENT chapter seeds as NDJSON (one JSON object per line).
Each line: {{"title":"...", "description":"..."}}
Rules:
- Vary SETTING, MODE OF CONFLICT, and REVERSAL TYPE across items.
- Avoid any titles in AVOID_TITLES and any phrases in AVOID_PHRASES.
- No numbering, no code fences, no commentary.
Book idea: {topic}
AVOID_TITLES: {avoid_titles}
AVOID_PHRASES: {avoid_phrases}
"""))

CHAR_JSON_PROMPT = PromptTemplate(
    input_variables=["outline","num_chars"],
    template=(
"""Given this chapter outline (JSON list): {outline}
Create exactly {num_chars} MAIN CHARACTERS as a JSON ARRAY.
Each item: {{"name":"...","role":"...","development_arc":"..."}}
Return JSON ONLY."""
))

# Research-aware chapter flow
THEME_PROMPT = PromptTemplate(
    input_variables=["topic","outline"],
    template=(
"""From the seed idea and outline, produce STRICT JSON:
{{
  "themes": ["..."], "motifs": ["..."], "promises": ["..."],
  "logline": "...", "genre_signals": ["..."]
}}
Seed idea: {topic}
Outline: {outline}
"""))

BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","theme_bible","motif_ledger","worldbrief"],
    template=(
"""Plan a beat sheet as STRICT JSON:
{{
  "beats": [
    {{"name":"Hook","goal":"...","conflict":"...","setting":"...","emotion":"..."}}, ...
  ],
  "dialogue_target_pct": 0.36,
  "sensory_palette": ["sound","smell"],
  "foreshadow":"...",
  "callback_motif":"..."
}}
Requirements:
- 8–12 beats with a midpoint reversal and a stinger.
- Weave in WORLD BRIEF lightly (do not info-dump): {worldbrief}
TITLE: {title}
DESC: {description}
THEME_BIBLE: {theme_bible}
MOTIF_LEDGER: {motif_ledger}
"""))

CHAPTER_WITH_BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","idea","plan","themes","sensory_palette","dialogue_target","worldbrief"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Mini-brief: {description}
Plan (beats): {plan}
Context to weave subtly (no info-dumps; show, don't tell): {worldbrief}

Must do:
- Open with a punchy 1–2 sentence hook.
- Emphasize SENSORY PALETTE: {sensory_palette}
- Aim for DIALOGUE DENSITY ≈ {dialogue_target:.2f}.
- Integrate 1 motif/prop from the plan naturally.
- Midpoint reversal that reframes stakes.
- End with a plausible cliffhanger/stinger.

Style: concrete details, crisp verbs; avoid clichés; maintain POV & continuity.
Themes to reinforce: {themes}
Return TEXT ONLY.
"""))

chapter_prompt = PromptTemplate(
    input_variables=["title","description","idea"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Chapter description: "{description}"
(If WORLD BRIEF is present in your system prompt, weave it subtly.)
Return TEXT ONLY.
"""))

REVISION_PROMPT = PromptTemplate(
    input_variables=["chapter","title","description","ledger"],
    template=(
"""Revise to reduce repetition with earlier chapters while improving novelty and tension.
Keep continuity; change micro-beats and setting details.
- Add one fresh obstacle, one specific sensory detail, one plausible surprise.
Return TEXT ONLY.

Title: {title}
Description: {description}
Do-not-repeat ledger: {ledger}
Chapter draft:
{chapter}
"""))

EVAL_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""You are a tough fiction editor. Rate the chapter (1–10) on:
pacing, tension, voice, imagery, dialogue, novelty.
Return STRICT JSON only:
{{"scores":{{"pacing":x,"tension":x,"voice":x,"imagery":x,"dialogue":x,"novelty":x}},
 "one_sentence_note":"...", "three_micro_edits":["...","...","..."]}}
Chapter:
{chapter}
"""))

PUNCHUP_PROMPT = PromptTemplate(
    input_variables=["chapter","edits"],
    template=(
"""Apply these micro-edits without changing plot:
- {edits}
Keep length ±10%. Add concrete sensory details. Remove clichés.
Return TEXT ONLY.

Chapter:
{chapter}
"""))

DIALOGUE_TUNER_PROMPT = PromptTemplate(
    input_variables=["chapter","target"],
    template=(
"""Revise chapter to adjust dialogue density to ≈ {target:.2f} (±0.08).
Keep plot and beats intact. Length change ≤10%.
Return TEXT ONLY.

Chapter:
{chapter}
"""))

DECLICHE_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Line-edit to remove clichés and filler. Replace with specific, concrete imagery.
Preserve plot, POV, beats, and length (±5%). Return TEXT ONLY.

Chapter:
{chapter}
"""))

MOTIF_MINER_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Extract 1–3 recurring motifs/props/images (short noun phrases).
Return STRICT JSON: {{"motifs":["..."]}}
Chapter:
{chapter}
"""))

BLURB_PROMPT = PromptTemplate(
    input_variables=["title","logline","themes","promises"],
    template=(
"""Write STRICT JSON:
{{"blurb":"(120–160 words)","product_hook":"(1 sentence)","snippets":["...","...","..."]}}
Title: {title}
Logline: {logline}
Themes: {themes}
Promises: {promises}
"""))

# Build chains
outline_chain_json_planner  = OUTLINE_JSON_PROMPT   | planner_llm_json
outline_chain_ndjson_plan   = OUTLINE_NDJSON_PROMPT | planner_llm
outline_chain_ndjson_writer = OUTLINE_NDJSON_PROMPT | writer_llm
character_chain_json        = CHAR_JSON_PROMPT      | planner_llm_json
theme_chain                 = THEME_PROMPT          | planner_llm
beats_chain                 = BEATS_PROMPT          | planner_llm
chapter_beats_llm           = CHAPTER_WITH_BEATS_PROMPT | writer_llm
chapter_chain               = chapter_prompt        | writer_llm
revision_chain              = REVISION_PROMPT       | writer_llm
eval_chain                  = EVAL_PROMPT           | planner_llm
punchup_chain               = PUNCHUP_PROMPT        | writer_llm
dialogue_tuner              = DIALOGUE_TUNER_PROMPT | writer_llm
decliche_chain              = DECLICHE_PROMPT       | writer_llm
motif_miner                 = MOTIF_MINER_PROMPT    | planner_llm
blurb_chain                 = BLURB_PROMPT          | planner_llm

# Counterfactual scaffolder (for HRC presidency)
COUNTERFACTUAL_PROMPT = PromptTemplate(
    input_variables=["history_brief","premise"],
    template=(
"""Based on this real history brief:\n{history_brief}\n\n
Propose 10 plausible DIVERGENCE POINTS if Hillary Clinton had won in 2016.
For each: {{ "title": "...", "what_changes": "...", "downstream_ripples": "...", "conflicts": ["...","..."] }}
Return a STRICT JSON array of 10 items only.
Premise: {premise}
"""))
counterfactual_chain = COUNTERFACTUAL_PROMPT | planner_llm_json

# ─────────────────────────────────────────────────────────────────────────────
# 4) Utilities: parsing, similarity, checkpoints, etc.
# ─────────────────────────────────────────────────────────────────────────────
THINK_RE = re.compile(r"<think>.*?</think>\s*", flags=re.S|re.I)
FENCE_RE = re.compile(r"```(?:json)?|```", flags=re.I)

def strip_think(x: Any) -> str:
    s = x["text"] if isinstance(x, dict) and "text" in x else str(x)
    s = THINK_RE.sub("", s); s = FENCE_RE.sub("", s)
    return s.strip()

def parse_json_value(s: str):
    s = strip_think(s)
    m = re.search(r"(\{.*\}|\[.*\])\s*$", s, flags=re.S)
    if not m: return None
    blob = m.group(1)
    try:
        return json.loads(blob)
    except Exception:
        fix = blob.replace("“","\"").replace("”","\"").replace("’","'")
        fix = re.sub(r",\s*}", "}", fix); fix = re.sub(r",\s*]", "]", fix)
        try: return json.loads(fix)
        except Exception: return None

def parse_strict_json(s: str) -> dict:
    obj = parse_json_value(s)
    return obj if isinstance(obj, dict) else {}

OBJ_RE = re.compile(r"\{(?:[^{}]|\"[^\"\\]*(?:\\.[^\"\\]*)*\")*\}")
def extract_json_objects(text: str):
    text = strip_think(text); objs = []
    for m in OBJ_RE.finditer(text):
        blob = m.group(0)
        try:
            obj = json.loads(blob)
            if isinstance(obj, dict): objs.append(obj)
        except Exception:
            try:
                fix = blob.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                obj = json.loads(fix)
                if isinstance(obj, dict): objs.append(obj)
            except Exception: pass
    return objs

def jaccard(a: str, b: str) -> float:
    A = set(re.findall(r"[a-z0-9']+", (a or "").lower()))
    B = set(re.findall(r"[a-z0-9']+", (b or "").lower()))
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

def too_similar_relaxed(ch1: Dict[str,str], ch2: Dict[str,str]) -> bool:
    t_sim = jaccard(ch1["title"], ch2["title"])
    d_sim = jaccard(ch1["description"], ch2["description"])
    return (t_sim > 0.80) or (t_sim > 0.55 and d_sim > 0.62)

def bigram_overlap(a: str, b: str) -> float:
    def bigrams(s):
        toks = re.findall(r"[a-z0-9']+", (s or "").lower())
        return set(zip(toks, toks[1:])) if len(toks) > 1 else set()
    A, B = bigrams(a), bigrams(b)
    denom = len(A | B) if (A or B) else 1
    return len(A & B) / denom

def top_trigrams(text: str, k: int = 30) -> List[str]:
    toks = re.findall(r"[a-z0-9']+", (text or "").lower())
    tris = Counter(zip(toks, toks[1:], toks[2:]))
    return [" ".join(t) for t,_ in tris.most_common(k)]

def approx_dialogue_ratio(text: str) -> float:
    lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
    if not lines: return 0.0
    dial = sum(1 for ln in lines if re.search(r'["“”]|^—', ln))
    return dial / max(1, len(lines))

def word_count(text: str) -> int:
    return len(re.findall(r"[A-Za-z0-9']+", text or ""))

def _slug(s):
    return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:60]

def _ck_paths(i, title):
    base = f"{i:03d}-{_slug(title)}"
    p = pathlib.Path(CHECKPOINT_DIR)
    return p / (base + ".txt"), p / (base + ".json")

def save_ckpt(i, title, text, notes):
    p_txt, p_meta = _ck_paths(i, title)
    p_txt.write_text(text or "", encoding="utf-8")
    meta = {"chapter_index": i, "title": title, "notes": notes}
    p_meta.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

def load_ckpt_if_any(i, title):
    p_txt, p_meta = _ck_paths(i, title)
    if p_txt.exists():
        text = p_txt.read_text(encoding="utf-8")
        notes = None
        if p_meta.exists():
            try: notes = json.loads(p_meta.read_text(encoding="utf-8"))
            except Exception: notes = None
        return text, notes
    return None, None

# ─────────────────────────────────────────────────────────────────────────────
# 5) Research Agent: duckduckgo + trafilatura → History Brief JSON/MD
# ─────────────────────────────────────────────────────────────────────────────
from duckduckgo_search import DDGS
import trafilatura

def ddg_text(q: str, max_results=6):
    with DDGS() as ddgs:
        return list(ddgs.text(q, max_results=max_results, region="us-en", safesearch="moderate"))

def ddg_news(q: str, max_results=6):
    with DDGS() as ddgs:
        return list(ddgs.news(q, max_results=max_results, region="us-en", safesearch="moderate"))

def fetch_clean(url: str, timeout=20_000) -> str:
    try:
        downloaded = trafilatura.fetch_url(url, timeout=timeout)
        if not downloaded: return ""
        text = trafilatura.extract(downloaded, include_comments=False, include_tables=False, no_fallback=False)
        return text or ""
    except Exception:
        return ""

def research_pack(topic: str, seed_queries: List[str], per_query=5) -> Dict[str, Any]:
    print("🔎 Research agent: collecting sources…")
    hits = []
    for q in seed_queries:
        try:
            hits.extend(ddg_text(q, max_results=per_query))
        except Exception:
            continue
    # Deduplicate by URL
    seen = set(); hits2 = []
    for h in hits:
        url = h.get("href") or h.get("url")
        if not url or url in seen: continue
        seen.add(url); hits2.append({"title": h.get("title",""), "url": url})

    docs = []
    for h in tqdm(hits2[:25], desc="Fetching"):
        txt = fetch_clean(h["url"])
        if not txt: continue
        docs.append({"title": h["title"], "url": h["url"], "text": txt[:120000]})

    if not docs:
        return {"topic": topic, "facts": [], "timeline": [], "citations": [], "summary": ""}

    # Summarize with the planner (JSON)
    SUMM_PROMPT = PromptTemplate(
        input_variables=["topic","docs"],
        template=(
"""You are an impartial researcher. From these sources, produce STRICT JSON:
{{
 "facts": ["..."],                       # 12–18 atomic, dated facts
 "timeline": [{{"date":"YYYY-MM","event":"...","why_it_matters":"..."}}, ...],  # 10–14 items
 "policy_buckets": {{
   "economy_tax":"...", "immigration":"...", "trade":"...", "foreign_policy":"...", "covid":"...", "justice_impeachments":"..."
 }},
 "summary": "(200–280 words, neutral)",
 "citations": [{{"title":"...","url":"..."}}, ...]  # 10–16 items
}}
Topic: {topic}
Sources (title + excerpts): {docs}
"""))
    chain = SUMM_PROMPT | planner_llm_json
    # compact sources text
    doc_blurbs = [{"title": d["title"], "url": d["url"], "snippet": (d["text"][:1000] + ("…" if len(d["text"])>1000 else ""))} for d in docs[:14]]
    raw = chain.invoke({"topic": topic, "docs": json.dumps(doc_blurbs, ensure_ascii=False)})
    data = parse_json_value(raw) or {}
    data.setdefault("citations", [])
    # add any missing citations
    for d in doc_blurbs:
        if not any(c.get("url")==d["url"] for c in data["citations"]):
            data["citations"].append({"title": d["title"], "url": d["url"]})
    return data

def brief_to_md(brief: Dict[str,Any]) -> str:
    lines = [f"# Research Brief: {brief.get('topic','')}", ""]
    if brief.get("summary"):
        lines += ["## Summary", brief["summary"], ""]
    if brief.get("timeline"):
        lines += ["## Timeline"]
        for t in brief["timeline"]:
            lines.append(f"- **{t.get('date','')}** — {t.get('event','')} — _{t.get('why_it_matters','')}_")
        lines.append("")
    if brief.get("policy_buckets"):
        lines += ["## Policy Buckets"]
        for k,v in brief["policy_buckets"].items():
            lines.append(f"- **{k}**: {v}")
        lines.append("")
    if brief.get("facts"):
        lines += ["## Facts"]
        for f in brief["facts"][:20]:
            lines.append(f"- {f}")
        lines.append("")
    if brief.get("citations"):
        lines += ["## Sources"]
        for c in brief["citations"][:20]:
            lines.append(f"- [{c.get('title','source')}]({c.get('url','')})")
    return "\n".join(lines)

# Default research queries for 2017–2021 baseline
DEFAULT_TRUMP_QUERIES = [
    "Presidency of Donald Trump 2017 2021 summary",
    "Tax Cuts and Jobs Act 2017 summary site:wikipedia.org",
    "Executive Order 13769 travel ban summary",
    "USMCA enters into force July 1 2020 site:ustr.gov",
    "First impeachment of Donald Trump 2019 summary",
    "Second impeachment of Donald Trump 2021 summary",
    "COVID-19 response CARES Act March 2020 CRS summary site:crsreports.congress.gov",
    "Operation Warp Speed overview site:gao.gov"
]

# ─────────────────────────────────────────────────────────────────────────────
# 6) Booster-grade OUTLINE generator (JSON→NDJSON; planner→writer; avoid lists)
# ─────────────────────────────────────────────────────────────────────────────
def clean_outline_items(items):
    out = []
    for obj in items:
        if not isinstance(obj, dict): continue
        title = (obj.get("title") or "").strip()
        desc  = (obj.get("description") or "").strip()
        if title and desc: out.append({"title": title, "description": desc})
    return out

def robust_outline_batch(topic: str, ask: int, avoid_titles: List[str], avoid_phrases: List[str]):
    payload = {
        "topic": topic,
        "count": ask,
        "avoid_titles": ", ".join(sorted(set(avoid_titles))[:50]),
        "avoid_phrases": ", ".join(sorted(set(avoid_phrases))[:50]),
    }
    # Planner JSON
    try:
        raw = outline_chain_json_planner.invoke(payload)
        arr = parse_json_value(raw)
        if isinstance(arr, list) and arr:
            print("   ✓ planner JSON")
            return clean_outline_items(arr)
        objs = extract_json_objects(str(raw))
        if objs:
            print("   ✓ planner JSON (rescued)")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · planner JSON failed: {e}")
    # Planner NDJSON
    try:
        raw = outline_chain_ndjson_plan.invoke(payload)
        lines = [ln for ln in strip_think(raw).splitlines() if ln.strip()]
        objs = []
        for ln in lines:
            try: objs.append(json.loads(ln))
            except Exception:
                fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                try: objs.append(json.loads(fix))
                except Exception: pass
        if objs:
            print("   ✓ planner NDJSON")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · planner NDJSON failed: {e}")
    # Writer NDJSON
    try:
        raw = outline_chain_ndjson_writer.invoke(payload)
        lines = [ln for ln in strip_think(raw).splitlines() if ln.strip()]
        objs = []
        for ln in lines:
            try: objs.append(json.loads(ln))
            except Exception:
                fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                try: objs.append(json.loads(fix))
                except Exception: pass
        if objs:
            print("   ✓ writer NDJSON")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · writer NDJSON failed: {e}")
    # Writer JSON last
    try:
        raw = (OUTLINE_JSON_PROMPT | writer_llm).invoke(payload)
        arr = parse_json_value(raw)
        if isinstance(arr, list) and arr:
            print("   ✓ writer JSON")
            return clean_outline_items(arr)
        objs = extract_json_objects(str(raw))
        if objs:
            print("   ✓ writer JSON (rescued)")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · writer JSON failed: {e}")
    # Stub fallback
    print("   ⚠️ stub fallback")
    TEMPLATES = [
        ("First 100 Days, Rewired", "A new administration rewrites norms; a misread memo sets up the season’s first conflict."),
        ("The Unseen Docket", "A Supreme Court vacancy collides with backchannel promises and an ethics snag."),
        ("Trade Winds", "Tariffs, treaties, and a leak force an unlikely coalition to form—or fracture."),
        ("Outbreak Narratives", "A public-health rehearsal becomes real; data, politics, and trust fall out of sync."),
        ("Backchannel Summit", "A surprise foreign breakthrough carries a personal cost that ricochets at home."),
        ("Platform Immunities", "A tech-policy skirmish pulls private lives into the open, with legal fallout."),
        ("Counting Rooms", "An election-year rule change triggers a chain of unintended consequences.")
    ]
    random.shuffle(TEMPLATES)
    return [{"title": t, "description": d} for t,d in TEMPLATES[:ask]]

print(f"→ Generating {NUM_CH}-chapter outline (booster mode) …")
outline, seen_titles = [], set()
needed = NUM_CH
chunk = 3
attempts, MAX_ATTEMPTS = 0, 60

while len(outline) < needed and attempts < MAX_ATTEMPTS:
    attempts += 1
    ask = min(chunk, needed - len(outline))
    avoid_titles = list(seen_titles)
    recent_phrases = []
    for it in outline[-12:]:
        recent_phrases.extend(top_trigrams(it.get("description",""), k=6))
    batch = robust_outline_batch(SEED_IDEA, ask, avoid_titles, recent_phrases)
    added = 0
    for ch in batch:
        if not ch["title"] or not ch["description"]: continue
        if ch["title"] in seen_titles: continue
        if any(too_similar_relaxed(ch, e) for e in outline): continue
        outline.append(ch); seen_titles.add(ch["title"]); added += 1
        if len(outline) >= needed: break
    print(f"   → accepted {added}; total now {len(outline)}/{needed}")
    if added == 0:
        print("   · No unique chapters accepted from batch; retrying…")

print(f"✔ Final outline: {len(outline)} chapters\n")
if len(outline) < needed:
    print("⚠️ Could not reach target; proceeding with what we have.")

# ─────────────────────────────────────────────────────────────────────────────
# 7) Characters + Theme/Motif Bible
# ─────────────────────────────────────────────────────────────────────────────
def robust_characters(outline_list, n_chars):
    try:
        raw = character_chain_json.invoke({"outline": json.dumps(outline_list, ensure_ascii=False),
                                           "num_chars": n_chars})
        arr = parse_json_value(raw)
        if isinstance(arr, list) and len(arr) >= min(3, n_chars//2):
            return [c for c in arr if isinstance(c, dict)][:n_chars]
    except Exception: pass
    try:
        raw = (CHAR_JSON_PROMPT | writer_llm).invoke({"outline": json.dumps(outline_list, ensure_ascii=False),
                                                      "num_chars": n_chars})
        arr = parse_json_value(raw)
        if isinstance(arr, list) and len(arr) >= min(3, n_chars//2):
            return [c for c in arr if isinstance(c, dict)][:n_chars]
    except Exception: pass
    return [
        {"name":"Alex Vega","role":"Chief of Staff","development_arc":"From risk-averse gatekeeper to bold coalition-builder."},
        {"name":"Ruth Delgado","role":"Solicitor General","development_arc":"Learns to balance principle with political reality."},
        {"name":"Jonah Price","role":"Data Journalist","development_arc":"Truth-telling collides with personal loyalties."},
    ][:n_chars]

NUM_CHAR = max(3, min(10, len(outline)//8))
print(f"→ Generating {NUM_CHAR} characters…")
characters = robust_characters(outline, NUM_CHAR)
print(f"✔ Got {len(characters)} characters\n")

print("→ Building theme/motif bible…")
THEME_BIBLE = parse_strict_json(theme_chain.invoke({
    "topic": SEED_IDEA,
    "outline": json.dumps(outline, ensure_ascii=False)
})) or {"themes":[],"motifs":[],"promises":[],"logline":"","genre_signals":[]}
MOTIF_LEDGER = list(THEME_BIBLE.get("motifs", []))
print("✔ Theme bible ready.\n")

# ─────────────────────────────────────────────────────────────────────────────
# 8) Run Research Agent (if enabled) → History Brief + Counterfactual seeds
# ─────────────────────────────────────────────────────────────────────────────
WORLD_BRIEF = ""
COUNTERFACTUAL_POINTS = []
if RESEARCH_AGENT_ENABLED:
    brief = research_pack(
        topic="U.S. Presidency 2017–2021 baseline for counterfactual (HRC wins 2016).",
        seed_queries=DEFAULT_TRUMP_QUERIES, per_query=5
    )
    # Save brief
    pathlib.Path("research").mkdir(exist_ok=True)
    with open("research/history_brief.json","w",encoding="utf-8") as f:
        json.dump(brief, f, ensure_ascii=False, indent=2)
    md = brief_to_md(brief)
    pathlib.Path("research/history_brief.md").write_text(md, encoding="utf-8")
    print("🧾 Saved research/history_brief.{json,md}")

    # Compact bullets for prompts
    bullets = []
    for t in (brief.get("timeline") or [])[:10]:
        bullets.append(f"{t.get('date','')}: {t.get('event','')}")
    if not bullets and brief.get("facts"):
        bullets = (brief["facts"])[:10]
    WORLD_BRIEF = " | ".join(bullets)[:1200]

    # Counterfactual divergence points
    cf_raw = counterfactual_chain.invoke({
        "history_brief": md[:6000],
        "premise": "Hillary Clinton wins 2016; explore plausible policy and geopolitical divergences 2017–2021."
    })
    COUNTERFACTUAL_POINTS = parse_json_value(cf_raw) or []
else:
    print("ℹ️ Research agent disabled; continuing without WORLD_BRIEF.")

# ─────────────────────────────────────────────────────────────────────────────
# 9) Chapter Generation (beats → draft → eval/polish) + checkpointing
# ─────────────────────────────────────────────────────────────────────────────
print("→ Generating chapters…")
chap_texts = [None]*len(outline)
editor_notes = [None]*len(outline)
BIGRAM_THRESHOLD = 0.22

def write_one(idx):
    meta = outline[idx]
    title = meta["title"]

    # Resume if cached
    cached_txt, cached_notes = load_ckpt_if_any(idx, title)
    if cached_txt:
        return cached_txt, cached_notes

    # Plan beats (with world brief if available)
    plan = parse_strict_json(beats_chain.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "theme_bible": json.dumps(THEME_BIBLE, ensure_ascii=False),
        "motif_ledger": json.dumps(MOTIF_LEDGER[-12:], ensure_ascii=False),
        "worldbrief": WORLD_BRIEF or "(none)"
    })) or {}
    dialogue_target = float(plan.get("dialogue_target_pct", 0.36))
    sensory_palette = plan.get("sensory_palette", ["sight","sound"])
    plan_json = json.dumps(plan.get("beats", []), ensure_ascii=False)

    # Draft
    res = chapter_beats_llm.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "idea": SEED_IDEA,
        "plan": plan_json,
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "sensory_palette": ", ".join(sensory_palette),
        "dialogue_target": dialogue_target,
        "worldbrief": WORLD_BRIEF or "(none)"
    })
    chapter_txt = strip_think(res)

    # Repetition guard
    ledger = []
    for j in range(idx):
        prev = chap_texts[j]
        if not prev: continue
        if bigram_overlap(chapter_txt, prev) > BIGRAM_THRESHOLD:
            ledger.extend(top_trigrams(prev, k=10))
    ledger = list(dict.fromkeys(ledger))[:60]
    if ledger:
        chapter_txt = strip_think(revision_chain.invoke({
            "chapter": chapter_txt,
            "title": meta["title"],
            "description": meta["description"],
            "ledger": "; ".join(ledger)
        }))

    # Evaluation + conditional polish
    data = None
    try:
        report_txt = strip_think(eval_chain.invoke({"chapter": chapter_txt}))
        m = re.search(r"\{[\s\S]*\}\s*$", report_txt)
        if m:
            data = json.loads(m.group(0))
        if data and "scores" in data:
            scores = data["scores"]
            avg = sum(float(scores[k]) for k in scores)/len(scores)
            dr = approx_dialogue_ratio(chapter_txt)
            if abs(dr - dialogue_target) > 0.10:
                chapter_txt = strip_think(dialogue_tuner.invoke({"chapter": chapter_txt, "target": dialogue_target}))
            if (avg < 7.6) or (not FAST_MODE):
                chapter_txt = strip_think(decliche_chain.invoke({"chapter": chapter_txt}))
            if avg < 7.4:
                edits = " | ".join(data.get("three_micro_edits", [])) or "Sharpen hook; escalate midpoint; add concrete sensory beats."
                chapter_txt = strip_think(punchup_chain.invoke({"chapter": chapter_txt, "edits": edits}))
    except Exception:
        data = None

    # Mine motifs
    try:
        mined = parse_strict_json(motif_miner.invoke({"chapter": chapter_txt})) or {}
        for m in (mined.get("motifs") or []):
            if m not in MOTIF_LEDGER: MOTIF_LEDGER.append(m)
    except Exception: pass

    save_ckpt(idx, title, chapter_txt, data)
    return chapter_txt, data

# Early calibration on first 3
PREGEN = min(3, len(outline))
for i in range(PREGEN):
    ch_txt, notes = write_one(i); chap_texts[i] = ch_txt; editor_notes[i] = notes

# Resize outline based on observed words/chapter to hit page target
actual_avg = max(500, sum(word_count(chap_texts[i]) for i in range(PREGEN)) // PREGEN)
recalc_num_ch = max(12, min(80, (TARGET_WORDS + actual_avg - 1) // actual_avg))
if recalc_num_ch != len(outline):
    delta = recalc_num_ch - len(outline)
    if delta > 0:
        print(f"🔁 Resizing outline: need +{delta} chapters…")
        extra = robust_outline_batch(SEED_IDEA, delta, list(seen_titles), [])
        for obj in extra:
            cand = {"title": (obj.get("title") or "").strip(),
                    "description": (obj.get("description") or "").strip()}
            if not cand["title"] or not cand["description"]: continue
            if cand["title"] in seen_titles: continue
            if any(too_similar_relaxed(cand, e) for e in outline): continue
            outline.append(cand); seen_titles.add(cand["title"])
            chap_texts.append(None); editor_notes.append(None)
            if len(outline) >= recalc_num_ch: break
        print(f"   → total {len(outline)} chapters after resize")
    elif delta < 0:
        keep = max(PREGEN, recalc_num_ch)
        outline = outline[:keep]; chap_texts = chap_texts[:keep]; editor_notes = editor_notes[:keep]
        print(f"✂️  Trimmed outline to {len(outline)} chapters")

# Finish remaining chapters
remaining_idxs = [i for i, t in enumerate(chap_texts) if t is None]
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = { ex.submit(write_one, i): i for i in remaining_idxs }
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Chapters"):
        idx = futures[fut]
        try:
            ch_txt, notes = fut.result()
        except Exception as e:
            ch_txt, notes = f"[Generation failed: {e}]", None
        chap_texts[idx] = ch_txt; editor_notes[idx] = notes

# ─────────────────────────────────────────────────────────────────────────────
# 10) Build DOCX: Characters, Research Brief, Counterfactual Points, Chapters
# ─────────────────────────────────────────────────────────────────────────────
total_words = sum(word_count(t or "") for t in chap_texts)
est_pages  = total_words / WORDS_PER_PAGE
suggested_ch = max(12, min(80, round(total_words / CH_TARGET_WORDS)))
print(f"🧮 Total words: {total_words:,}  → est. pages ≈ {est_pages:.0f}")
print(f"🔧 If you rerun, suggested chapters for this style ≈ {suggested_ch}")

doc = docx.Document()
doc.add_heading(BOOK_TITLE, 0)
doc.add_paragraph(f"Seed idea: {SEED_IDEA}")
doc.add_paragraph(f"Estimated pages: ~{est_pages:.0f}")

# Character Bible
if characters:
    doc.add_page_break()
    doc.add_heading("Character Development", level=1)
    for c in characters:
        name = c.get("name","(Unnamed)")
        role = c.get("role",""); arc = c.get("development_arc","")
        doc.add_heading(name, level=2)
        if role: doc.add_paragraph(f"Role: {role}")
        if arc:  doc.add_paragraph(arc)

# Research Brief
if RESEARCH_AGENT_ENABLED and pathlib.Path("research/history_brief.md").exists():
    doc.add_page_break()
    doc.add_heading("Historical Brief (2017–2021 Baseline)", level=1)
    md_text = pathlib.Path("research/history_brief.md").read_text(encoding="utf-8")
    # Keep it short in the docx (top sections only)
    for para in md_text.splitlines()[:300]:
        if para.startswith("#"):
            if para.startswith("## "): doc.add_heading(para.replace("## ",""), level=2)
            elif para.startswith("# "): doc.add_heading(para.replace("# ",""), level=1)
        elif para.strip():
            doc.add_paragraph(para.strip())

# Counterfactual Divergence Points
if COUNTERFACTUAL_POINTS:
    doc.add_page_break()
    doc.add_heading("Counterfactual: 10 Divergence Points (HRC 2017–2021)", level=1)
    for i, d in enumerate(COUNTERFACTUAL_POINTS, 1):
        doc.add_heading(f"{i}. {d.get('title','')}", level=2)
        doc.add_paragraph("What changes: " + d.get("what_changes",""))
        doc.add_paragraph("Downstream ripples: " + d.get("downstream_ripples",""))
        for c in d.get("conflicts",[])[:3]:
            doc.add_paragraph("• " + c)

# Chapters
for i, (meta, text) in enumerate(zip(outline, chap_texts), start=1):
    doc.add_page_break()
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=1)
    doc.add_paragraph(meta['description'], style="Intense Quote")
    doc.add_paragraph((text or "").strip())

# Editor’s Notes
doc.add_page_break()
doc.add_heading("Editor’s Notes (Auto-Eval)", level=1)
for i, (meta, notes) in enumerate(zip(outline, editor_notes), start=1):
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=2)
    if not notes or "scores" not in (notes or {}):
        doc.add_paragraph("No evaluation available."); continue
    scores = notes["scores"]; one_liner = notes.get("one_sentence_note","")
    edits = notes.get("three_micro_edits", [])
    try: avg_score = sum(float(scores[k]) for k in scores)/len(scores)
    except Exception: avg_score = None
    doc.add_paragraph("Scores: " + ", ".join(f"{k}: {scores[k]}" for k in scores))
    if avg_score is not None: doc.add_paragraph(f"Average: {avg_score:.2f}")
    if one_liner: doc.add_paragraph(f"Note: {one_liner}")
    for e in (edits or []): doc.add_paragraph(f"• {e}")

# Back-cover copy
blurb_data = {}
try:
    blurb_data = parse_strict_json(blurb_chain.invoke({
        "title": BOOK_TITLE,
        "logline": THEME_BIBLE.get("logline",""),
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "promises": ", ".join(THEME_BIBLE.get("promises", []))
    })) or {}
except Exception:
    blurb_data = {}

doc.add_page_break()
doc.add_heading("Back-Cover Copy & Retailer Hook", level=1)
if blurb_data.get("blurb"): doc.add_paragraph(blurb_data["blurb"])
if blurb_data.get("product_hook"): doc.add_paragraph(f"\nRetailer Hook: {blurb_data['product_hook']}")
if blurb_data.get("snippets"):
    doc.add_heading("Short Social Snippets", level=2)
    for s in blurb_data["snippets"]:
        doc.add_paragraph(f"• {s}")

# Save & download
fn = BOOK_TITLE.replace(" ", "_") + ".docx"
doc.save(fn)
print(f"📘 Saved {fn}")
files.download(fn)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.6/132.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m837.9/837.9 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.5/315.5 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.7/274.7 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Ollama health: 200
🖥️ GPU: not detected
✔ Using model: llama3.2:3b
✔ Using model: llama3.2:3b
🎯 Target pages: 320  → target words ≈ 88,000
📏 Chapter target: ~3,000 words → initial chapters: 30
→ Generating 30-chapter outline (booster mode) …
   ✓ planner JSON (rescued)
   → accepted 1; total now 1/30
   ✓ planner JSON (rescued)
   → accepted 1; total now 2/30
   ✓ planner JSON (rescued)
   → a

  with DDGS() as ddgs:
  with DDGS() as ddgs:
  with DDGS() as ddgs:
  with DDGS() as ddgs:
  with DDGS() as ddgs:
  with DDGS() as ddgs:
  with DDGS() as ddgs:
  with DDGS() as ddgs:
Fetching: 100%|██████████| 25/25 [00:00<00:00, 126182.43it/s]


🧾 Saved research/history_brief.{json,md}
→ Generating chapters…
🔁 Resizing outline: need +64 chapters…
   ✓ planner JSON (rescued)
   → total 17 chapters after resize


Chapters: 100%|██████████| 14/14 [4:50:48<00:00, 1246.33s/it]


🧮 Total words: 13,786  → est. pages ≈ 50
🔧 If you rerun, suggested chapters for this style ≈ 12


AttributeError: 'str' object has no attribute 'get'

In [1]:
# ─────────────────────────────────────────────────────────────────────────────
# 0) Colab Setup: install & launch Ollama (+ research agent, robust fallbacks)
# ─────────────────────────────────────────────────────────────────────────────
!pip install --quiet langchain-ollama python-docx tqdm ddgs trafilatura readability-lxml

import os, threading, subprocess, time, requests, json, re, shutil, pathlib, sys, random, math
from typing import List, Any, Dict, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from tqdm import tqdm
import docx
from google.colab import files

# Avoid LangChain provider hijacks
for v in [
    "OPENAI_API_KEY",
    "LITELLM_PROVIDER", "LITELLM_MODEL", "LITELLM_BASE_URL",
    "LITELL M_PROVIDER", "LITELL M_MODEL", "LITELL M_BASE_URL"
]:
    os.environ.pop(v, None)

# —— Toggles / sizing knobs ——
FAST_MODE = True                   # lighter sampling for speed on CPU
RESEARCH_AGENT_ENABLED = True      # set False to disable web brief
CPU_CHAPTER_CAP = 24               # soft cap for CPU-only runs; set to None to disable
CHECKPOINT_DIR = "book_ckpt"       # per-chapter cache on disk
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# Conservative Ollama parallelism for Colab
os.environ["OLLAMA_MAX_LOADED_MODELS"] = "1"
os.environ["OLLAMA_NUM_PARALLEL"] = "1"

# Launch Ollama
os.environ["OLLAMA_HOST"]    = "127.0.0.1:11434"
os.environ["OLLAMA_ORIGINS"] = "*"
!curl -fsSL https://ollama.com/install.sh -o install.sh
!bash install.sh >/dev/null 2>&1 || true

def _serve_ollama():
    subprocess.Popen(["ollama","serve"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
threading.Thread(target=_serve_ollama, daemon=True).start()
time.sleep(8)
print("✅ Ollama health:", requests.get("http://127.0.0.1:11434").status_code)

def _has_gpu():
    try:
        return shutil.which("nvidia-smi") and (subprocess.run(["nvidia-smi"], capture_output=True).returncode==0)
    except Exception:
        return False
HAS_GPU = bool(_has_gpu())
print("🖥️ GPU:", "available" if HAS_GPU else "not detected")

# ─────────────────────────────────────────────────────────────────────────────
# 1) Model resolver: pick the first available model from candidates
# ─────────────────────────────────────────────────────────────────────────────
def pick_first_available(candidates: List[str]) -> str:
    for m in candidates:
        try:
            r = subprocess.run(["ollama", "pull", m], capture_output=True, text=True)
            if r.returncode == 0:
                print(f"✔ Using model: {m}")
                return m
            else:
                print(f"✖ Pull failed for {m} → {r.stderr.strip() or r.stdout.strip()}")
        except Exception as e:
            print(f"✖ Error pulling {m}: {e}")
    raise RuntimeError(f"No candidate models could be pulled: {candidates}")

PLANNER_CANDIDATES = ["llama3.2:3b","qwen2.5:3b","phi3:3.8b-mini","gemma2:2b","mistral:7b","llama3.1:8b"]
WRITER_FAST_CANDIDATES = ["llama3.2:3b","qwen2.5:3b","phi3:3.8b-mini","gemma2:2b","mistral:7b","llama3.1:8b"]
WRITER_QUALITY_CANDIDATES = ["llama3.1:8b","mistral:7b","qwen2.5:7b"]

PLANNER_MODEL = pick_first_available(PLANNER_CANDIDATES)
WRITER_MODEL  = pick_first_available(WRITER_FAST_CANDIDATES if FAST_MODE else WRITER_QUALITY_CANDIDATES)

# ─────────────────────────────────────────────────────────────────────────────
# 2) User targets: auto pages → words → chapters
# ─────────────────────────────────────────────────────────────────────────────
BOOK_TITLE   = "What Could Have Been: An Alternative History"
MODE         = "fiction"  # or "nonfiction"
TARGET_PAGES = 320 if MODE == "fiction" else 280

GENRE_PROFILE = {
    "fiction":   {"pages_min": 280, "pages_max": 360, "chapter_words_typical": (2400, 3600)},
    "nonfiction":{"pages_min": 220, "pages_max": 320, "chapter_words_typical": (3000, 4500)}
}[MODE]

if TARGET_PAGES is None:
    TARGET_PAGES = (GENRE_PROFILE["pages_min"] + GENRE_PROFILE["pages_max"]) // 2

WORDS_PER_PAGE = 275
TARGET_WORDS   = int(TARGET_PAGES * WORDS_PER_PAGE)

CH_MIN, CH_MAX = GENRE_PROFILE["chapter_words_typical"]
CH_TARGET_WORDS = int((CH_MIN + CH_MAX) / 2)

# Base chapter count from pages goal
NUM_CH = max(12, min(80, (TARGET_WORDS + CH_TARGET_WORDS - 1) // CH_TARGET_WORDS))
# Optional CPU cap
if CPU_CHAPTER_CAP and not HAS_GPU:
    if NUM_CH > CPU_CHAPTER_CAP:
        print(f"⚙️ CPU chapter cap active → {CPU_CHAPTER_CAP} (was {NUM_CH})")
        NUM_CH = CPU_CHAPTER_CAP

SEED_IDEA = ("Counterfactual: Hillary Clinton wins the 2016 U.S. election. "
             "Track real 2017–2021 events as baseline, then explore plausible divergences "
             "in domestic policy, foreign affairs, courts, and tech/social media dynamics.")

print(f"🎯 Target pages: {TARGET_PAGES}  → target words ≈ {TARGET_WORDS:,}")
print(f"📏 Chapter target: ~{CH_TARGET_WORDS:,} words → initial chapters: {NUM_CH}")

# ─────────────────────────────────────────────────────────────────────────────
# 3) LLMs & prompts (incl. research-aware chapter prompt)
# ─────────────────────────────────────────────────────────────────────────────
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate

PLANNER_NUM_PREDICT = 900 if FAST_MODE else 1400
WRITER_NUM_PREDICT  = 1600 if FAST_MODE else 3000
MAX_WORKERS = 1 if (FAST_MODE or not HAS_GPU) else 3

planner_llm = OllamaLLM(model=PLANNER_MODEL, base_url="http://127.0.0.1:11434",
                        temperature=0.25, num_predict=PLANNER_NUM_PREDICT)
planner_llm_json = OllamaLLM(model=PLANNER_MODEL, base_url="http://127.0.0.1:11434",
                             temperature=0.2, num_predict=PLANNER_NUM_PREDICT, format="json")
writer_llm  = OllamaLLM(model=WRITER_MODEL, base_url="http://127.0.0.1:11434",
                        temperature=0.8, num_ctx=4096, num_predict=WRITER_NUM_PREDICT)

# Outline prompts with diversity/avoid lists (variety seed to shake outputs)
OUTLINE_JSON_PROMPT = PromptTemplate(
    input_variables=["topic","count","avoid_titles","avoid_phrases","vseed"],
    template=(
"""Generate exactly {count} DIFFERENT chapter seeds for this novel as a JSON ARRAY.
Each item: {{"title":"...", "description":"..."}}
Rules:
- Vary SETTING, MODE OF CONFLICT, and REVERSAL TYPE across items.
- Avoid any titles in AVOID_TITLES and any phrases in AVOID_PHRASES.
- Return JSON ONLY (no commentary).
VARIETY_SEED: {vseed}
Book idea: {topic}
AVOID_TITLES: {avoid_titles}
AVOID_PHRASES: {avoid_phrases}
"""))

OUTLINE_NDJSON_PROMPT = PromptTemplate(
    input_variables=["topic","count","avoid_titles","avoid_phrases","vseed"],
    template=(
"""Generate exactly {count} DIFFERENT chapter seeds as NDJSON (one JSON object per line).
Each line: {{"title":"...", "description":"..."}}
Rules:
- Vary SETTING, MODE OF CONFLICT, and REVERSAL TYPE across items.
- Avoid any titles in AVOID_TITLES and any phrases in AVOID_PHRASES.
- No numbering, no code fences, no commentary.
VARIETY_SEED: {vseed}
Book idea: {topic}
AVOID_TITLES: {avoid_titles}
AVOID_PHRASES: {avoid_phrases}
"""))

CHAR_JSON_PROMPT = PromptTemplate(
    input_variables=["outline","num_chars"],
    template=(
"""Given this chapter outline (JSON list): {outline}
Create exactly {num_chars} MAIN CHARACTERS as a JSON ARRAY.
Each item: {{"name":"...","role":"...","development_arc":"..."}}
Return JSON ONLY."""
))

# Research-aware chapter flow
THEME_PROMPT = PromptTemplate(
    input_variables=["topic","outline"],
    template=(
"""From the seed idea and outline, produce STRICT JSON:
{{
  "themes": ["..."], "motifs": ["..."], "promises": ["..."],
  "logline": "...", "genre_signals": ["..."]
}}
Seed idea: {topic}
Outline: {outline}
"""))

BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","theme_bible","motif_ledger","worldbrief"],
    template=(
"""Plan a beat sheet as STRICT JSON:
{{
  "beats": [
    {{"name":"Hook","goal":"...","conflict":"...","setting":"...","emotion":"..."}}, ...
  ],
  "dialogue_target_pct": 0.36,
  "sensory_palette": ["sound","smell"],
  "foreshadow":"...",
  "callback_motif":"..."
}}
Requirements:
- 8–12 beats with a midpoint reversal and a stinger.
- Weave in WORLD BRIEF lightly (no info-dumps): {worldbrief}
TITLE: {title}
DESC: {description}
THEME_BIBLE: {theme_bible}
MOTIF_LEDGER: {motif_ledger}
"""))

CHAPTER_WITH_BEATS_PROMPT = PromptTemplate(
    input_variables=["title","description","idea","plan","themes","sensory_palette","dialogue_target","worldbrief"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Mini-brief: {description}
Plan (beats): {plan}
Context to weave subtly (no info-dumps; show, don't tell): {worldbrief}

Must do:
- Open with a punchy 1–2 sentence hook.
- Emphasize SENSORY PALETTE: {sensory_palette}
- Aim for DIALOGUE DENSITY ≈ {dialogue_target:.2f}.
- Integrate 1 motif/prop from the plan naturally.
- Midpoint reversal that reframes stakes.
- End with a plausible cliffhanger/stinger.

Style: concrete details, crisp verbs; avoid clichés; maintain POV & continuity.
Themes to reinforce: {themes}
Return TEXT ONLY.
"""))

chapter_prompt = PromptTemplate(
    input_variables=["title","description","idea"],
    template=(
"""Write a ~2200–3200 word chapter titled "{title}".
Seed idea: {idea}
Chapter description: "{description}"
(If WORLD BRIEF is present in your system prompt, weave it subtly.)
Return TEXT ONLY.
"""))

REVISION_PROMPT = PromptTemplate(
    input_variables=["chapter","title","description","ledger"],
    template=(
"""Revise to reduce repetition with earlier chapters while improving novelty and tension.
Keep continuity; change micro-beats and setting details.
- Add one fresh obstacle, one specific sensory detail, one plausible surprise.
Return TEXT ONLY.

Title: {title}
Description: {description}
Do-not-repeat ledger: {ledger}
Chapter draft:
{chapter}
"""))

EVAL_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""You are a tough fiction editor. Rate the chapter (1–10) on:
pacing, tension, voice, imagery, dialogue, novelty.
Return STRICT JSON only:
{{"scores":{{"pacing":x,"tension":x,"voice":x,"imagery":x,"dialogue":x,"novelty":x}},
 "one_sentence_note":"...", "three_micro_edits":["...","...","..."]}}
Chapter:
{chapter}
"""))

PUNCHUP_PROMPT = PromptTemplate(
    input_variables=["chapter","edits"],
    template=(
"""Apply these micro-edits without changing plot:
- {edits}
Keep length ±10%. Add concrete sensory details. Remove clichés.
Return TEXT ONLY.

Chapter:
{chapter}
"""))

DIALOGUE_TUNER_PROMPT = PromptTemplate(
    input_variables=["chapter","target"],
    template=(
"""Revise chapter to adjust dialogue density to ≈ {target:.2f} (±0.08).
Keep plot and beats intact. Length change ≤10%.
Return TEXT ONLY.

Chapter:
{chapter}
"""))

DECLICHE_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Line-edit to remove clichés and filler. Replace with specific, concrete imagery.
Preserve plot, POV, beats, and length (±5%). Return TEXT ONLY.

Chapter:
{chapter}
"""))

MOTIF_MINER_PROMPT = PromptTemplate(
    input_variables=["chapter"],
    template=(
"""Extract 1–3 recurring motifs/props/images (short noun phrases).
Return STRICT JSON: {{"motifs":["..."]}}
Chapter:
{chapter}
"""))

BLURB_PROMPT = PromptTemplate(
    input_variables=["title","logline","themes","promises"],
    template=(
"""Write STRICT JSON:
{{"blurb":"(120–160 words)","product_hook":"(1 sentence)","snippets":["...","...","..."]}}
Title: {title}
Logline: {logline}
Themes: {themes}
Promises: {promises}
"""))

# Build chains
outline_chain_json_planner  = OUTLINE_JSON_PROMPT   | planner_llm_json
outline_chain_ndjson_plan   = OUTLINE_NDJSON_PROMPT | planner_llm
outline_chain_ndjson_writer = OUTLINE_NDJSON_PROMPT | writer_llm
character_chain_json        = CHAR_JSON_PROMPT      | planner_llm_json
theme_chain                 = THEME_PROMPT          | planner_llm
beats_chain                 = BEATS_PROMPT          | planner_llm
chapter_beats_llm           = CHAPTER_WITH_BEATS_PROMPT | writer_llm
chapter_chain               = chapter_prompt        | writer_llm
revision_chain              = REVISION_PROMPT       | writer_llm
eval_chain                  = EVAL_PROMPT           | planner_llm
punchup_chain               = PUNCHUP_PROMPT        | writer_llm
dialogue_tuner              = DIALOGUE_TUNER_PROMPT | writer_llm
decliche_chain              = DECLICHE_PROMPT       | writer_llm
motif_miner                 = MOTIF_MINER_PROMPT    | planner_llm
blurb_chain                 = BLURB_PROMPT          | planner_llm

# Counterfactual scaffolder (for HRC presidency)
COUNTERFACTUAL_PROMPT = PromptTemplate(
    input_variables=["history_brief","premise"],
    template=(
"""Based on this real history brief:\n{history_brief}\n\n
Propose 10 plausible DIVERGENCE POINTS if Hillary Clinton had won in 2016.
For each: {{ "title": "...", "what_changes": "...", "downstream_ripples": "...", "conflicts": ["...","..."] }}
Return a STRICT JSON array of 10 items only.
Premise: {premise}
"""))
counterfactual_chain = COUNTERFACTUAL_PROMPT | planner_llm_json

# ─────────────────────────────────────────────────────────────────────────────
# 4) Utilities: parsing, similarity, checkpoints, etc.
# ─────────────────────────────────────────────────────────────────────────────
THINK_RE = re.compile(r"<think>.*?</think>\s*", flags=re.S|re.I)
FENCE_RE = re.compile(r"```(?:json)?|```", flags=re.I)

def strip_think(x: Any) -> str:
    s = x["text"] if isinstance(x, dict) and "text" in x else str(x)
    s = THINK_RE.sub("", s); s = FENCE_RE.sub("", s)
    return s.strip()

def parse_json_value(s: str):
    s = strip_think(s)
    m = re.search(r"(\{.*\}|\[.*\])\s*$", s, flags=re.S)
    if not m: return None
    blob = m.group(1)
    try:
        return json.loads(blob)
    except Exception:
        fix = blob.replace("“","\"").replace("”","\"").replace("’","'")
        fix = re.sub(r",\s*}", "}", fix); fix = re.sub(r",\s*]", "]", fix)
        try: return json.loads(fix)
        except Exception: return None

def parse_strict_json(s: str) -> dict:
    obj = parse_json_value(s)
    return obj if isinstance(obj, dict) else {}

OBJ_RE = re.compile(r"\{(?:[^{}]|\"[^\"\\]*(?:\\.[^\"\\]*)*\")*\}")
def extract_json_objects(text: str):
    text = strip_think(text); objs = []
    for m in OBJ_RE.finditer(text):
        blob = m.group(0)
        try:
            obj = json.loads(blob)
            if isinstance(obj, dict): objs.append(obj)
        except Exception:
            try:
                fix = blob.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                obj = json.loads(fix)
                if isinstance(obj, dict): objs.append(obj)
            except Exception: pass
    return objs

def jaccard(a: str, b: str) -> float:
    A = set(re.findall(r"[a-z0-9']+", (a or "").lower()))
    B = set(re.findall(r"[a-z0-9']+", (b or "").lower()))
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

# Dedup thresholds — slightly relaxed to avoid over-filtering on small models
def too_similar_relaxed(ch1: Dict[str,str], ch2: Dict[str,str]) -> bool:
    t_sim = jaccard(ch1["title"], ch2["title"])
    d_sim = jaccard(ch1["description"], ch2["description"])
    return (t_sim > 0.80) or (t_sim > 0.55 and d_sim > 0.62)

def bigram_overlap(a: str, b: str) -> float:
    def bigrams(s):
        toks = re.findall(r"[a-z0-9']+", (s or "").lower())
        return set(zip(toks, toks[1:])) if len(toks) > 1 else set()
    A, B = bigrams(a), bigrams(b)
    denom = len(A | B) if (A or B) else 1
    return len(A & B) / denom

def top_trigrams(text: str, k: int = 30) -> List[str]:
    toks = re.findall(r"[a-z0-9']+", (text or "").lower())
    tris = Counter(zip(toks, toks[1:], toks[2:]))
    return [" ".join(t) for t,_ in tris.most_common(k)]

def approx_dialogue_ratio(text: str) -> float:
    lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
    if not lines: return 0.0
    dial = sum(1 for ln in lines if re.search(r'["“”]|^—', ln))
    return dial / max(1, len(lines))

def word_count(text: str) -> int:
    return len(re.findall(r"[A-Za-z0-9']+", text or ""))

def _slug(s):
    return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:60]

def _ck_paths(i, title):
    base = f"{i:03d}-{_slug(title)}"
    p = pathlib.Path(CHECKPOINT_DIR)
    return p / (base + ".txt"), p / (base + ".json")

def save_ckpt(i, title, text, notes):
    p_txt, p_meta = _ck_paths(i, title)
    p_txt.write_text(text or "", encoding="utf-8")
    meta = {"chapter_index": i, "title": title, "notes": notes}
    p_meta.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

def load_ckpt_if_any(i, title):
    p_txt, p_meta = _ck_paths(i, title)
    if p_txt.exists():
        text = p_txt.read_text(encoding="utf-8")
        notes = None
        if p_meta.exists():
            try: notes = json.loads(p_meta.read_text(encoding="utf-8"))
            except Exception: notes = None
        return text, notes
    return None, None

# ─────────────────────────────────────────────────────────────────────────────
# 5) Research Agent: ddgs + trafilatura → History Brief JSON/MD
# ─────────────────────────────────────────────────────────────────────────────
try:
    from ddgs import DDGS   # new package name
except Exception:
    from duckduckgo_search import DDGS  # fallback

import trafilatura

def ddg_text(q: str, max_results=6):
    with DDGS() as ddgs:
        return list(ddgs.text(q, max_results=max_results, region="us-en", safesearch="moderate"))

def ddg_news(q: str, max_results=6):
    with DDGS() as ddgs:
        return list(ddgs.news(q, max_results=max_results, region="us-en", safesearch="moderate"))

def fetch_clean(url: str, timeout=20_000) -> str:
    try:
        downloaded = trafilatura.fetch_url(url, timeout=timeout)
        if not downloaded: return ""
        text = trafilatura.extract(downloaded, include_comments=False, include_tables=False, no_fallback=False)
        return text or ""
    except Exception:
        return ""

def research_pack(topic: str, seed_queries: List[str], per_query=4) -> Dict[str, Any]:
    print("🔎 Research agent: collecting sources…")
    hits = []
    for q in seed_queries:
        try:
            hits.extend(ddg_text(q, max_results=per_query))
        except Exception:
            continue
    # Deduplicate by URL
    seen = set(); hits2 = []
    for h in hits:
        url = h.get("href") or h.get("url")
        if not url or url in seen: continue
        seen.add(url); hits2.append({"title": h.get("title",""), "url": url})

    docs = []
    for h in tqdm(hits2[:22], desc="Fetching"):
        txt = fetch_clean(h["url"])
        if not txt: continue
        docs.append({"title": h["title"], "url": h["url"], "text": txt[:120000]})

    if not docs:
        return {"topic": topic, "facts": [], "timeline": [], "citations": [], "summary": ""}

    # Summarize with the planner (JSON)
    SUMM_PROMPT = PromptTemplate(
        input_variables=["topic","docs"],
        template=(
"""You are an impartial researcher. From these sources, produce STRICT JSON:
{{
 "facts": ["..."],                       # 12–18 atomic, dated facts
 "timeline": [{{"date":"YYYY-MM","event":"...","why_it_matters":"..."}}, ...],  # 10–14 items
 "policy_buckets": {{
   "economy_tax":"...", "immigration":"...", "trade":"...", "foreign_policy":"...", "covid":"...", "justice_impeachments":"..."
 }},
 "summary": "(200–280 words, neutral)",
 "citations": [{{"title":"...","url":"..."}}, ...]  # 10–16 items
}}
Topic: {topic}
Sources (title + excerpts): {docs}
"""))
    chain = SUMM_PROMPT | planner_llm_json
    doc_blurbs = [{"title": d["title"], "url": d["url"], "snippet": (d["text"][:1000] + ("…" if len(d["text"])>1000 else ""))} for d in docs[:14]]
    raw = chain.invoke({"topic": topic, "docs": json.dumps(doc_blurbs, ensure_ascii=False)})
    data = parse_json_value(raw) or {}
    data.setdefault("citations", [])
    for d in doc_blurbs:
        if not any(c.get("url")==d["url"] for c in data["citations"]):
            data["citations"].append({"title": d["title"], "url": d["url"]})
    return data

def brief_to_md(brief: Dict[str,Any]) -> str:
    lines = [f"# Research Brief: {brief.get('topic','')}", ""]
    if brief.get("summary"):
        lines += ["## Summary", brief["summary"], ""]
    if brief.get("timeline"):
        lines += ["## Timeline"]
        for t in brief["timeline"]:
            lines.append(f"- **{t.get('date','')}** — {t.get('event','')} — _{t.get('why_it_matters','')}_")
        lines.append("")
    if brief.get("policy_buckets"):
        lines += ["## Policy Buckets"]
        for k,v in brief["policy_buckets"].items():
            lines.append(f"- **{k}**: {v}")
        lines.append("")
    if brief.get("facts"):
        lines += ["## Facts"]
        for f in brief["facts"][:20]:
            lines.append(f"- {f}")
        lines.append("")
    if brief.get("citations"):
        lines += ["## Sources"]
        for c in brief["citations"][:20]:
            lines.append(f"- [{c.get('title','source')}]({c.get('url','')})")
    return "\n".join(lines)

DEFAULT_TRUMP_QUERIES = [
    "Presidency of Donald Trump 2017 2021 summary",
    "Tax Cuts and Jobs Act 2017 summary site:wikipedia.org",
    "Executive Order 13769 travel ban summary",
    "USMCA enters into force July 1 2020 site:ustr.gov",
    "First impeachment of Donald Trump 2019 summary",
    "Second impeachment of Donald Trump 2021 summary",
    "COVID-19 response CARES Act March 2020 CRS summary site:crsreports.congress.gov",
    "Operation Warp Speed overview site:gao.gov"
]

# ─────────────────────────────────────────────────────────────────────────────
# 6) Booster-grade OUTLINE generator (JSON→NDJSON; planner→writer; avoid lists)
# ─────────────────────────────────────────────────────────────────────────────
def clean_outline_items(items):
    out = []
    for obj in items:
        if not isinstance(obj, dict): continue
        title = (obj.get("title") or "").strip()
        desc  = (obj.get("description") or "").strip()
        if title and desc: out.append({"title": title, "description": desc})
    return out

def robust_outline_batch(topic: str, ask: int, avoid_titles: List[str], avoid_phrases: List[str]):
    vseed = random.randint(10_000, 999_999)
    payload = {
        "topic": topic,
        "count": ask,
        "avoid_titles": ", ".join(sorted(set(avoid_titles))[:50]),
        "avoid_phrases": ", ".join(sorted(set(avoid_phrases))[:50]),
        "vseed": vseed,
    }
    # Planner JSON
    try:
        raw = outline_chain_json_planner.invoke(payload)
        arr = parse_json_value(raw)
        if isinstance(arr, list) and arr:
            print("   ✓ planner JSON")
            return clean_outline_items(arr)
        objs = extract_json_objects(str(raw))
        if objs:
            print("   ✓ planner JSON (rescued)")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · planner JSON failed: {e}")
    # Planner NDJSON
    try:
        raw = outline_chain_ndjson_plan.invoke(payload)
        lines = [ln for ln in strip_think(raw).splitlines() if ln.strip()]
        objs = []
        for ln in lines:
            try: objs.append(json.loads(ln))
            except Exception:
                fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                try: objs.append(json.loads(fix))
                except Exception: pass
        if objs:
            print("   ✓ planner NDJSON")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · planner NDJSON failed: {e}")
    # Writer NDJSON
    try:
        raw = outline_chain_ndjson_writer.invoke(payload)
        lines = [ln for ln in strip_think(raw).splitlines() if ln.strip()]
        objs = []
        for ln in lines:
            try: objs.append(json.loads(ln))
            except Exception:
                fix = ln.replace("“","\"").replace("”","\"").replace("’","'")
                fix = re.sub(r",\s*}", "}", fix)
                try: objs.append(json.loads(fix))
                except Exception: pass
        if objs:
            print("   ✓ writer NDJSON")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · writer NDJSON failed: {e}")
    # Writer JSON last
    try:
        raw = (OUTLINE_JSON_PROMPT | writer_llm).invoke(payload)
        arr = parse_json_value(raw)
        if isinstance(arr, list) and arr:
            print("   ✓ writer JSON")
            return clean_outline_items(arr)
        objs = extract_json_objects(str(raw))
        if objs:
            print("   ✓ writer JSON (rescued)")
            return clean_outline_items(objs)
    except Exception as e:
        print(f"   · writer JSON failed: {e}")
    # Stub fallback
    print("   ⚠️ stub fallback")
    TEMPLATES = [
        ("First 100 Days, Rewired", "A new administration rewrites norms; a misread memo sets up the season’s first conflict."),
        ("The Unseen Docket", "A Supreme Court vacancy collides with backchannel promises and an ethics snag."),
        ("Trade Winds", "Tariffs, treaties, and a leak force an unlikely coalition to form—or fracture."),
        ("Outbreak Narratives", "A public-health rehearsal becomes real; data, politics, and trust fall out of sync."),
        ("Backchannel Summit", "A surprise foreign breakthrough carries a personal cost that ricochets at home."),
        ("Platform Immunities", "A tech-policy skirmish pulls private lives into the open, with legal fallout."),
        ("Counting Rooms", "An election-year rule change triggers a chain of unintended consequences.")
    ]
    random.shuffle(TEMPLATES)
    return [{"title": t, "description": d} for t,d in TEMPLATES[:ask]]

print(f"→ Generating {NUM_CH}-chapter outline (booster mode) …")
outline, seen_titles = [], set()
needed = NUM_CH
chunk = 3
attempts, MAX_ATTEMPTS = 0, 60
stalled = 0

while len(outline) < needed and attempts < MAX_ATTEMPTS:
    attempts += 1
    ask = min(chunk, needed - len(outline))
    avoid_titles = list(seen_titles)
    recent_phrases = []
    for it in outline[-12:]:
        recent_phrases.extend(top_trigrams(it.get("description",""), k=6))
    batch = robust_outline_batch(SEED_IDEA, ask, avoid_titles, recent_phrases)
    added = 0
    for ch in batch:
        if not ch["title"] or not ch["description"]:
            continue
        if ch["title"] in seen_titles:
            continue
        if any(too_similar_relaxed(ch, e) for e in outline):
            continue
        outline.append(ch)
        seen_titles.add(ch["title"])
        added += 1
        if len(outline) >= needed:
            break
    print(f"   → accepted {added}; total now {len(outline)}/{needed}")
    if added == 0:
        stalled += 1
        if stalled >= 6:
            # relax dedupe and push stubs
            print("   · stalled; relaxing filter and adding stub seeds")
            while len(outline) < needed and len(batch) > 0:
                ch = batch.pop()
                title = (ch.get("title") or f"Thread {len(outline)+1}").strip()
                desc  = (ch.get("description") or "An escalated conflict reframes stakes.").strip()
                if title in seen_titles: title += f" #{random.randint(10,99)}"
                outline.append({"title": title, "description": desc})
                seen_titles.add(title)
            stalled = 0
    else:
        stalled = 0

print(f"✔ Final outline: {len(outline)} chapters\n")
if len(outline) < needed:
    print("⚠️ Could not reach target; proceeding with what we have.")

# ─────────────────────────────────────────────────────────────────────────────
# 7) Characters + Theme/Motif Bible
# ─────────────────────────────────────────────────────────────────────────────
def robust_characters(outline_list, n_chars):
    try:
        raw = character_chain_json.invoke({"outline": json.dumps(outline_list, ensure_ascii=False),
                                           "num_chars": n_chars})
        arr = parse_json_value(raw)
        if isinstance(arr, list) and len(arr) >= min(3, n_chars//2):
            return [c for c in arr if isinstance(c, dict)][:n_chars]
    except Exception: pass
    try:
        raw = (CHAR_JSON_PROMPT | writer_llm).invoke({"outline": json.dumps(outline_list, ensure_ascii=False),
                                                      "num_chars": n_chars})
        arr = parse_json_value(raw)
        if isinstance(arr, list) and len(arr) >= min(3, n_chars//2):
            return [c for c in arr if isinstance(c, dict)][:n_chars]
    except Exception: pass
    return [
        {"name":"Alex Vega","role":"Chief of Staff","development_arc":"From risk-averse gatekeeper to bold coalition-builder."},
        {"name":"Ruth Delgado","role":"Solicitor General","development_arc":"Learns to balance principle with political reality."},
        {"name":"Jonah Price","role":"Data Journalist","development_arc":"Truth-telling collides with personal loyalties."},
    ][:n_chars]

NUM_CHAR = max(3, min(10, len(outline)//8))
print(f"→ Generating {NUM_CHAR} characters…")
characters = robust_characters(outline, NUM_CHAR)
print(f"✔ Got {len(characters)} characters\n")

print("→ Building theme/motif bible…")
THEME_BIBLE = parse_strict_json(theme_chain.invoke({
    "topic": SEED_IDEA,
    "outline": json.dumps(outline, ensure_ascii=False)
})) or {"themes":[],"motifs":[],"promises":[],"logline":"","genre_signals":[]}
MOTIF_LEDGER = list(THEME_BIBLE.get("motifs", []))
print("✔ Theme bible ready.\n")

# ─────────────────────────────────────────────────────────────────────────────
# 8) Run Research Agent (if enabled) → History Brief + Counterfactual seeds
# ─────────────────────────────────────────────────────────────────────────────
WORLD_BRIEF = ""
COUNTERFACTUAL_POINTS = []
if RESEARCH_AGENT_ENABLED:
    brief = research_pack(
        topic="U.S. Presidency 2017–2021 baseline for counterfactual (HRC wins 2016).",
        seed_queries=DEFAULT_TRUMP_QUERIES, per_query=4
    )
    # Save brief
    pathlib.Path("research").mkdir(exist_ok=True)
    with open("research/history_brief.json","w",encoding="utf-8") as f:
        json.dump(brief, f, ensure_ascii=False, indent=2)
    md = brief_to_md(brief)
    pathlib.Path("research/history_brief.md").write_text(md, encoding="utf-8")
    print("🧾 Saved research/history_brief.{json,md}")

    # Compact bullets for prompts
    bullets = []
    for t in (brief.get("timeline") or [])[:10]:
        bullets.append(f"{t.get('date','')}: {t.get('event','')}")
    if not bullets and brief.get("facts"):
        bullets = (brief["facts"])[:10]
    WORLD_BRIEF = " | ".join(bullets)[:1200]

    # Counterfactual divergence points (robustly normalized)
    cf_raw = counterfactual_chain.invoke({
        "history_brief": md[:6000],
        "premise": "Hillary Clinton wins 2016; explore plausible policy and geopolitical divergences 2017–2021."
    })
    parsed = parse_json_value(cf_raw)
    # Normalizer: accept list of dicts or list of strings; coerce to dict skeletons
    COUNTERFACTUAL_POINTS = []
    if isinstance(parsed, list):
        for item in parsed:
            if isinstance(item, dict):
                title = str(item.get("title","")).strip() or "Divergence"
                what  = str(item.get("what_changes","")).strip() or "A plausible shift in policy or personnel."
                rip   = str(item.get("downstream_ripples","")).strip() or "Cascading effects across agencies and geopolitics."
                confs = item.get("conflicts", [])
                if not isinstance(confs, list): confs = [str(confs)]
                COUNTERFACTUAL_POINTS.append({
                    "title": title, "what_changes": what,
                    "downstream_ripples": rip, "conflicts": [str(c) for c in confs][:3]
                })
            elif isinstance(item, str):
                title = item.strip()
                if title:
                    COUNTERFACTUAL_POINTS.append({
                        "title": title,
                        "what_changes": "Policy emphasis and staffing differ; agendas and committee priorities shift.",
                        "downstream_ripples": "Budget allocations, diplomatic posture, and regulatory timelines diverge.",
                        "conflicts": ["Institutional pushback", "Media battles", "Coalition fractures"]
                    })
    # If still empty, fabricate 10 skeletons so DOCX never crashes
    if not COUNTERFACTUAL_POINTS:
        COUNTERFACTUAL_POINTS = [{
            "title": f"Divergence #{i+1}",
            "what_changes": "Course correction on a key agenda item.",
            "downstream_ripples": "Knock-on effects across agencies and allies.",
            "conflicts": ["Stakeholder backlash","Legal hurdles","Messaging wars"]
        } for i in range(10)]
else:
    print("ℹ️ Research agent disabled; continuing without WORLD_BRIEF.")

# ─────────────────────────────────────────────────────────────────────────────
# 9) Chapter Generation (beats → draft → eval/polish) + checkpointing
# ─────────────────────────────────────────────────────────────────────────────
print("→ Generating chapters…")
chap_texts = [None]*len(outline)
editor_notes = [None]*len(outline)
BIGRAM_THRESHOLD = 0.22

def write_one(idx):
    meta = outline[idx]
    title = meta["title"]

    # Resume if cached
    cached_txt, cached_notes = load_ckpt_if_any(idx, title)
    if cached_txt:
        return cached_txt, cached_notes

    # Plan beats (with world brief if available)
    plan = parse_strict_json(beats_chain.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "theme_bible": json.dumps(THEME_BIBLE, ensure_ascii=False),
        "motif_ledger": json.dumps(MOTIF_LEDGER[-12:], ensure_ascii=False),
        "worldbrief": WORLD_BRIEF or "(none)"
    })) or {}
    dialogue_target = float(plan.get("dialogue_target_pct", 0.36))
    sensory_palette = plan.get("sensory_palette", ["sight","sound"])
    plan_json = json.dumps(plan.get("beats", []), ensure_ascii=False)

    # Draft
    res = chapter_beats_llm.invoke({
        "title": meta["title"],
        "description": meta["description"],
        "idea": SEED_IDEA,
        "plan": plan_json,
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "sensory_palette": ", ".join(sensory_palette),
        "dialogue_target": dialogue_target,
        "worldbrief": WORLD_BRIEF or "(none)"
    })
    chapter_txt = strip_think(res)

    # Repetition guard
    ledger = []
    for j in range(idx):
        prev = chap_texts[j]
        if not prev: continue
        if bigram_overlap(chapter_txt, prev) > BIGRAM_THRESHOLD:
            ledger.extend(top_trigrams(prev, k=10))
    ledger = list(dict.fromkeys(ledger))[:60]
    if ledger:
        chapter_txt = strip_think(revision_chain.invoke({
            "chapter": chapter_txt,
            "title": meta["title"],
            "description": meta["description"],
            "ledger": "; ".join(ledger)
        }))

    # Evaluation + conditional polish
    data = None
    try:
        report_txt = strip_think(eval_chain.invoke({"chapter": chapter_txt}))
        m = re.search(r"\{[\s\S]*\}\s*$", report_txt)
        if m:
            data = json.loads(m.group(0))
        if data and "scores" in data:
            scores = data["scores"]
            avg = sum(float(scores[k]) for k in scores)/len(scores)
            dr = approx_dialogue_ratio(chapter_txt)
            if abs(dr - dialogue_target) > 0.10:
                chapter_txt = strip_think(dialogue_tuner.invoke({"chapter": chapter_txt, "target": dialogue_target}))
            if (avg < 7.6) or (not FAST_MODE):
                chapter_txt = strip_think(decliche_chain.invoke({"chapter": chapter_txt}))
            if avg < 7.4:
                edits = " | ".join(data.get("three_micro_edits", [])) or "Sharpen hook; escalate midpoint; add concrete sensory beats."
                chapter_txt = strip_think(punchup_chain.invoke({"chapter": chapter_txt, "edits": edits}))
    except Exception:
        data = None

    # Mine motifs
    try:
        mined = parse_strict_json(motif_miner.invoke({"chapter": chapter_txt})) or {}
        for m in (mined.get("motifs") or []):
            if m not in MOTIF_LEDGER: MOTIF_LEDGER.append(m)
    except Exception: pass

    save_ckpt(idx, title, chapter_txt, data)
    return chapter_txt, data

# Early calibration on first 3 (serial)
PREGEN = min(3, len(outline))
for i in range(PREGEN):
    ch_txt, notes = write_one(i); chap_texts[i] = ch_txt; editor_notes[i] = notes

# Resize outline based on observed words/chapter to hit page target
actual_avg = max(500, sum(word_count(chap_texts[i]) for i in range(PREGEN)) // PREGEN)
recalc_num_ch = max(12, min(80, (TARGET_WORDS + actual_avg - 1) // actual_avg))
# Respect CPU cap
if CPU_CHAPTER_CAP and not HAS_GPU:
    recalc_num_ch = min(recalc_num_ch, CPU_CHAPTER_CAP)

if recalc_num_ch != len(outline):
    delta = recalc_num_ch - len(outline)
    if delta > 0:
        print(f"🔁 Resizing outline: need +{delta} chapters…")
        extra = robust_outline_batch(SEED_IDEA, delta, list(seen_titles), [])
        for obj in extra:
            cand = {"title": (obj.get("title") or "").strip(),
                    "description": (obj.get("description") or "").strip()}
            if not cand["title"] or not cand["description"]: continue
            if cand["title"] in seen_titles: continue
            if any(too_similar_relaxed(cand, e) for e in outline): continue
            outline.append(cand); seen_titles.add(cand["title"])
            chap_texts.append(None); editor_notes.append(None)
            if len(outline) >= recalc_num_ch: break
        print(f"   → total {len(outline)} chapters after resize")
    elif delta < 0:
        keep = max(PREGEN, recalc_num_ch)
        outline = outline[:keep]; chap_texts = chap_texts[:keep]; editor_notes = editor_notes[:keep]
        print(f"✂️  Trimmed outline to {len(outline)} chapters")

# Finish remaining chapters
remaining_idxs = [i for i, t in enumerate(chap_texts) if t is None]
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = { ex.submit(write_one, i): i for i in remaining_idxs }
    for fut in tqdm(as_completed(futures), total=len(futures), desc="Chapters"):
        idx = futures[fut]
        try:
            ch_txt, notes = fut.result()
        except Exception as e:
            ch_txt, notes = f"[Generation failed: {e}]", None
        chap_texts[idx] = ch_txt; editor_notes[idx] = notes

# ─────────────────────────────────────────────────────────────────────────────
# 10) Build DOCX (robust against mixed counterfactual output) + Download
# ─────────────────────────────────────────────────────────────────────────────
total_words = sum(word_count(t or "") for t in chap_texts)
est_pages  = total_words / WORDS_PER_PAGE
suggested_ch = max(12, min(80, round(total_words / CH_TARGET_WORDS)))
print(f"🧮 Total words: {total_words:,}  → est. pages ≈ {est_pages:.0f}")
print(f"🔧 If you rerun, suggested chapters for this style ≈ {suggested_ch}")

doc = docx.Document()
doc.add_heading(BOOK_TITLE, 0)
doc.add_paragraph(f"Seed idea: {SEED_IDEA}")
doc.add_paragraph(f"Estimated pages: ~{est_pages:.0f}")

# Character Bible
if characters:
    doc.add_page_break()
    doc.add_heading("Character Development", level=1)
    for c in characters:
        name = c.get("name","(Unnamed)")
        role = c.get("role",""); arc = c.get("development_arc","")
        doc.add_heading(name, level=2)
        if role: doc.add_paragraph(f"Role: {role}")
        if arc:  doc.add_paragraph(arc)

# Research Brief
if RESEARCH_AGENT_ENABLED and pathlib.Path("research/history_brief.md").exists():
    doc.add_page_break()
    doc.add_heading("Historical Brief (2017–2021 Baseline)", level=1)
    md_text = pathlib.Path("research/history_brief.md").read_text(encoding="utf-8")
    for para in md_text.splitlines()[:300]:
        if para.startswith("#"):
            if para.startswith("## "): doc.add_heading(para.replace("## ",""), level=2)
            elif para.startswith("# "): doc.add_heading(para.replace("# ",""), level=1)
        elif para.strip():
            doc.add_paragraph(para.strip())

# Counterfactual Divergence Points (robust loop)
if COUNTERFACTUAL_POINTS:
    doc.add_page_break()
    doc.add_heading("Counterfactual: 10 Divergence Points (HRC 2017–2021)", level=1)
    for i, d in enumerate(COUNTERFACTUAL_POINTS[:10], 1):
        try:
            title = d.get("title","Divergence")
            what  = d.get("what_changes","A plausible shift in policy or personnel.")
            rip   = d.get("downstream_ripples","Knock-on effects across agencies and allies.")
            confs = d.get("conflicts", [])
        except AttributeError:
            # If d is a string, wrap it
            title = str(d)
            what  = "Policy emphasis and staffing differ; agendas and committee priorities shift."
            rip   = "Budget allocations, diplomatic posture, and regulatory timelines diverge."
            confs = ["Institutional pushback", "Media battles", "Coalition fractures"]
        doc.add_heading(f"{i}. {title}", level=2)
        doc.add_paragraph("What changes: " + what)
        doc.add_paragraph("Downstream ripples: " + rip)
        for c in (confs or [])[:3]:
            doc.add_paragraph("• " + str(c))

# Chapters
for i, (meta, text) in enumerate(zip(outline, chap_texts), start=1):
    doc.add_page_break()
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=1)
    doc.add_paragraph(meta['description'], style="Intense Quote")
    doc.add_paragraph((text or "").strip())

# Editor’s Notes
doc.add_page_break()
doc.add_heading("Editor’s Notes (Auto-Eval)", level=1)
for i, (meta, notes) in enumerate(zip(outline, editor_notes), start=1):
    doc.add_heading(f"Chapter {i}: {meta['title']}", level=2)
    if not notes or "scores" not in (notes or {}):
        doc.add_paragraph("No evaluation available."); continue
    scores = notes["scores"]; one_liner = notes.get("one_sentence_note","")
    edits = notes.get("three_micro_edits", [])
    try: avg_score = sum(float(scores[k]) for k in scores)/len(scores)
    except Exception: avg_score = None
    doc.add_paragraph("Scores: " + ", ".join(f"{k}: {scores[k]}" for k in scores))
    if avg_score is not None: doc.add_paragraph(f"Average: {avg_score:.2f}")
    if one_liner: doc.add_paragraph(f"Note: {one_liner}")
    for e in (edits or []): doc.add_paragraph(f"• {e}")

# Back-cover copy
blurb_data = {}
try:
    blurb_data = parse_strict_json(blurb_chain.invoke({
        "title": BOOK_TITLE,
        "logline": THEME_BIBLE.get("logline",""),
        "themes": ", ".join(THEME_BIBLE.get("themes", [])),
        "promises": ", ".join(THEME_BIBLE.get("promises", []))
    })) or {}
except Exception:
    blurb_data = {}

doc.add_page_break()
doc.add_heading("Back-Cover Copy & Retailer Hook", level=1)
if blurb_data.get("blurb"): doc.add_paragraph(blurb_data["blurb"])
if blurb_data.get("product_hook"): doc.add_paragraph(f"\nRetailer Hook: {blurb_data['product_hook']}")
if blurb_data.get("snippets"):
    doc.add_heading("Short Social Snippets", level=2)
    for s in blurb_data["snippets"]:
        doc.add_paragraph(f"• {s}")

# Save & download
fn = BOOK_TITLE.replace(" ", "_") + ".docx"
doc.save(fn)
print(f"📘 Saved {fn}")
files.download(fn)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.7/40.7 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m837.9/837.9 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.5/315.5 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.7/274.7 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Ollama health: 200
🖥️ GPU: not detected


Fetching: 100%|██████████| 22/22 [00:00<00:00, 89068.23it/s]


🧾 Saved research/history_brief.{json,md}
→ Generating chapters…
🔁 Resizing outline: need +1 chapters…
   ✓ planner JSON (rescued)
   → total 24 chapters after resize


Chapters: 100%|██████████| 21/21 [8:03:47<00:00, 1382.28s/it]


🧮 Total words: 18,233  → est. pages ≈ 66
🔧 If you rerun, suggested chapters for this style ≈ 12
📘 Saved What_Could_Have_Been:_An_Alternative_History.docx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>