# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [2]:
# CONSTANTS

USE_CACHED_RESPONSES = True
# TODO: KEEP CHANGING THIS TO GET THE NEXT CHAPTER, STARTING FROM 1
CHAPTER_INDEX = 1

In [3]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
import json
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from IPython.display import Markdown

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

STORY_DIR = Path("../backend/tests/resources/pokemon_amber")

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [4]:
# Streaming response
from typing import Type
from shuscribe.schemas.base import BaseOutputSchema
from shuscribe.schemas.llm import ThinkingConfig
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    model: str,
    messages: list[Message], 
    response_schema: Type[BaseOutputSchema] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None,
    thinking_config: ThinkingConfig | None = None,
    config: GenerationConfig | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        if not config:
            config = GenerationConfig(
                temperature=temp or 0.7,
                response_schema=response_schema if response_schema else None,
                max_output_tokens=max_tokens,
                thinking_config=thinking_config
            )
        
        print(f"{model if model else config.model}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=model,
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status in (StreamStatus.COMPLETE, StreamStatus.ERROR):
            return chunk
    else:
        return None


In [5]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open(STORY_DIR / "story" / "_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(STORY_DIR / "story" / chapter, "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue

CHAPTERS

 Chapter(title='[Chapter 2] All Aboard!', id=2, content='Sleep wasn\'t just elusive—it felt dangerous. Each time exhaustion started to drag me under, some part of my brain would jolt awake, terrified that closing these eyes meant losing whatever thread still connected me to myself.\nThe hotel room\'s ceiling fan spun lazy circles, each rotation collecting shadows that looked too much like the fractals of psychic energy I\'d seen during Mewtwo\'s awakening. That had been real. All of it had been real. The clone tank\'s fluid still burned in borrowed lungs whenever I breathed too deeply, a sharp chemical taste that belonged in sci-fi stories, not reality. Not supposed to be my reality.\nI caught another glimpse of mint-green hair on the dark TV screen and had to look away. Ten years old. I was ten years old again, or at least this body was. The wrongness of it sat like lead in my stomach—adult thoughts trapped in a child\'s form, muscle memory that belonged to someone else entirely. Even

# Summarization Pipeline

## Chapter [X]

In [6]:
# Set Provider Name
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.wiki import WikiPage
from shuscribe.schemas.wikigen.summary import ChapterSummary

with open(STORY_DIR / f"{CHAPTER_INDEX-1}out" / "comprehensive_wiki.json", "r") as f:
    COMPREHENSIVE_WIKI = WikiPage.from_wiki_content("Comprehensive Wiki Page", json.loads(f.read())["accumulated_text"])

with open(STORY_DIR / f"{CHAPTER_INDEX-1}out" / "chapter_summary.json", "r") as f:
    PREV_SUMMARY = ChapterSummary.from_chapter_summary(CHAPTER_INDEX-1, json.loads(f.read())["accumulated_text"])

with open(STORY_DIR / f"{CHAPTER_INDEX-1}out" / "upsert_entities.json", "r") as f:
    UPSERT_ENTITIES = UpsertEntitiesOutSchema.model_validate_json(f.read())


In [7]:
from shuscribe.schemas.wikigen.entity import TempEntityDBRepresentation

with open(STORY_DIR / f"{CHAPTER_INDEX-1}out" / "entity_db.json", "r") as f:
    entities_db = TempEntityDBRepresentation.model_validate_json(f.read())
from shuscribe.schemas.wikigen.entity import TempEntityDB

entity_db = TempEntityDB()
entity_db.entities_db = entities_db

!!!!!!!!!!!!megablocks not available, using torch.matmul instead
<All keys matched successfully>


### Chapter Summary

In [8]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()

summary_messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    summary_so_far=COMPREHENSIVE_WIKI,
    recent_summaries=[PREV_SUMMARY],
)

# print(messages[-1].content)

summary_response = None

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json", "r") as f:
            summary_response = StreamChunk.model_validate_json(f.read())

# if we don't have a cached response, generate one
if not summary_response:
    summary_response = await run_async(stream(
        "", 
        "", 
        summary_messages, 
        config=templates.chapter.summary.default_config
        ))

print(summary_response.usage)
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json", "w") as f:
    f.write(summary_response.model_dump_json(indent=2))

chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)


prompt_tokens=5307 completion_tokens=972


### Extract Entities

In [9]:
print(chapter_summary.to_prompt())

<Content>


## Lingering Trauma and Existential Dread

*   The protagonist, now "Amber," struggles to sleep, haunted by the events of Mewtwo's escape and the feeling of being disconnected from her former self. [!CHARACTER] [!THEME]
    *   She is disturbed by shadows that resemble psychic energy fractals, a callback to Mewtwo's awakening. [!SYMBOL] [!CALLBACK]
    *   The lingering physical sensations of the amber fluid reinforce the reality of her situation. [!WORLD]
*   Amber grapples with her new, young body and the adult thoughts trapped within it. [!CHARACTER]
    *   She feels alienated by the foreignness of her own body, including her fingerprints. [!THEME]
*   She feels guilt and displacement, knowing she occupies a body that belongs to the original, deceased Amber. [!CHARACTER] [!THEME]
*   Amber reflects on Dr. Fuji's actions and the implications of her existence. [!CHARACTER]
    *   She acknowledges the lengths he went to in order to bring her back.
    *   She questions he

In [10]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
extract_messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
    summary_so_far=COMPREHENSIVE_WIKI,
    recent_summaries=[PREV_SUMMARY],
)

# print(extract_messages[-1].content)
extract_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json", "r") as f:
            extract_response = StreamChunk.model_validate_json(f.read())
            
# if we don't have a cached response, generate one
if not extract_response:
    extract_response = await run_async(stream(
        "", 
        "", 
        extract_messages, 
        config=templates.entity.extract.default_config
        ))
    
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json", "w") as f:
    f.write(extract_response.model_dump_json(indent=2))

extracted_entities = ExtractEntitiesOutSchema.model_validate_json(extract_response.accumulated_text)
print(extract_response.usage)


prompt_tokens=6484 completion_tokens=1005


### Search for Entities


In [11]:
from shuscribe.schemas.wikigen.entity import EntitySigLvl, TempEntityRecord

ent_list = extracted_entities.filter_entities(EntitySigLvl.RELEVANT)

existing_entities: set[TempEntityRecord] = set()

for entity in ent_list:
    for result in entity_db.search(json.dumps(entity.to_upsert_dict(ent_list), indent=2)):
        existing_entities.add(result[0])

print(len(existing_entities))

for entity in existing_entities:
    print(entity)


6
TempEntityRecord(identifier=Amber Fluid (Reincarnation))
TempEntityRecord(identifier=Dr. Fuji's Laboratory)
TempEntityRecord(identifier=Dr. Fuji's Laboratory (Post-Mewtwo Rampage))
TempEntityRecord(identifier=Mewtwo)
TempEntityRecord(identifier=Dr. Fuji)
TempEntityRecord(identifier=Amber (Reincarnated))


### Upsert Entities and Relationships

In [12]:
from shuscribe.schemas.provider import LLMUsage
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.entity import EntitySigLvl

upsert_entities = UpsertEntitiesOutSchema(entities=[])

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json", "r") as f:
            upsert_entities = UpsertEntitiesOutSchema.model_validate_json(f.read())

if len(upsert_entities.entities) == 0:
    upsert_entities = UpsertEntitiesOutSchema(entities=[])
    total_usage = LLMUsage(prompt_tokens=0, completion_tokens=0)
    templates.entity.upsert.reload()        

    for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT, chunk_size=5):
        upsert_messages: list[Message] = templates.entity.upsert.format( 
            current_chapter=CHAPTERS[CHAPTER_INDEX],
            entity_batch=batch,
            story_metadata=STORY_METADATA,
            chapter_summary=chapter_summary,
            
            existing_entities=[entity.to_dict() for entity in existing_entities],
            summary_so_far=COMPREHENSIVE_WIKI,
            recent_summaries=[PREV_SUMMARY],
        )
        upsert_response = await run_async(stream(
            "", 
            "", 
            upsert_messages, 
            config=templates.entity.upsert.default_config
            ))
        upsert_entities.entities.extend(UpsertEntitiesOutSchema.model_validate_json(upsert_response.accumulated_text).entities)
        total_usage.prompt_tokens += upsert_response.usage.prompt_tokens
        total_usage.completion_tokens += upsert_response.usage.completion_tokens
        print(upsert_response.usage)

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json", "w") as f:
    f.write(upsert_entities.model_dump_json(indent=2))


### Update Entity DB

In [13]:
entity_db.upsert(upsert_entities.entities)
entity_db.size()

Upserted 4 entities, 3 new entities
entities_updated=[TempEntityRecord(identifier=Amber (Reincarnated)), TempEntityRecord(identifier=Dr. Fuji), TempEntityRecord(identifier=Mewtwo), TempEntityRecord(identifier=Dr. Fuji's Laboratory)]
entities_new=[TempEntityRecord(identifier=Existential Dread), TempEntityRecord(identifier=Reincarnation), TempEntityRecord(identifier=Pidgeot (Storm))]


10

In [14]:
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "entity_db.json", "w") as f:
    f.write(entity_db.entities_db.model_dump_json(indent=2))

os.makedirs(STORY_DIR / "latest_out", exist_ok=True)
with open(STORY_DIR / "latest_out" / "entity_db.json", "w") as f:
    f.write(entity_db.entities_db.model_dump_json(indent=2))

### Story So Far Summary

In [15]:

from shuscribe.schemas.wikigen.wiki import WikiPage


templates.story.comprehensive_wiki.reload()
comprehensive_wiki_messages: list[Message] = templates.story.comprehensive_wiki.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    chapter_summary=chapter_summary,
    key_entities=upsert_entities,
    
    story_metadata=STORY_METADATA,
    # summary_so_far=summary_so_far,
    # recent_summaries=recent_summaries_prompt,
)

# print(comprehensive_summary_messages[-1].content)
comprehensive_wiki_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json", "r") as f:
            comprehensive_wiki_response = StreamChunk.model_validate_json(f.read())

if not comprehensive_wiki_response:
    comprehensive_wiki_response = await run_async(stream(
        "", 
        "", 
        comprehensive_wiki_messages, 
        config=templates.story.comprehensive_wiki.default_config
        ))

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json", "w") as f:
    f.write(comprehensive_wiki_response.model_dump_json(indent=2))

print(comprehensive_wiki_response.usage)
comprehensive_wiki = WikiPage.from_wiki_content("Comprehensive Wiki Page", comprehensive_wiki_response.accumulated_text)


prompt_tokens=7179 completion_tokens=1831


In [16]:
# display markdown
Markdown(comprehensive_wiki.content)




## World Overview

### The [[Pokemon]] World

The world of Pokemon: Ambertwo appears to be a complex fusion of various Pokemon media, including elements from the games, anime, and manga. This world contains actual, living Pokemon with significant physical presence and impact. The regions follow similar geography to the canonical Pokemon world, with locations like [[Cinnabar Island]] and [[Celadon City]] being key areas in the narrative.

Transportation in this world utilizes Pokemon in practical ways. The [[Celadon Express]] is a flight service that uses multiple [[Pidgeot]] for passenger transport between islands and cities. This service demonstrates how flying Pokemon are integrated into society for practical purposes beyond battling.

### [[Team Rocket]]

Team Rocket operates with significant resources and influence. They maintain secret laboratories for genetic experimentation, including the facility where [[Dr. Fuji]] worked on creating [[Mewtwo]]. Following Mewtwo's escape, they represent a looming threat, with their potential pursuit of both Mewtwo and knowledge of any surviving experiments.

## Key Characters

### [[Amber (Reincarnated)]]

The protagonist of the story is a reincarnated Pokemon fan who now inhabits the body of [[Dr. Fuji]]'s daughter, Amber. Formerly an adult Pokemon enthusiast in the real world, she died while playing Pokemon GO and found herself awakening in a clone tank in Dr. Fuji's laboratory. Her consciousness now resides in the body of a 10-year-old girl with mint-green hair.

Amber struggles significantly with her new identity, experiencing severe [[Existential Dread]] as she tries to reconcile her adult mind with her child body. She feels guilt knowing she occupies the body meant for Dr. Fuji's deceased daughter, describing herself as "an accident of consciousness in a form that should have failed." Her fingerprints, body movements, and even breathing feel foreign to her.

She retains memories of her past life, including her mother, her cat (nicknamed Snorlax), collecting Pokemon cards during lockdown, and playing Pokemon games. Her final moments involved a Pokemon battle using a Gyarados named Tsunami before being hit by a truck.

Amber possesses meta-knowledge about the Pokemon world from her previous life, understanding different potential timelines from the games, anime, and manga. This knowledge causes her anxiety about Team Rocket potentially discovering and exploiting both her and her knowledge.

### [[Dr. Fuji]]

A middle-aged scientist and Amber's father in her new life. He is emotionally invested in Amber, showing deep care and protection toward her. Dr. Fuji was responsible for creating [[Mewtwo]] and apparently succeeded in cloning his deceased daughter, though the consciousness inhabiting the clone's body is not his original daughter's.

He serves as a source of stability for Amber, adjusting her safety straps during their Pidgeot flight and generally looking after her welfare. His primary motivation appears to be protecting Amber and rebuilding their life together after the chaos of Mewtwo's escape.

### [[Mewtwo]]

A powerful Psychic-type Pokemon created through genetic engineering by Dr. Fuji. Mewtwo escaped from Dr. Fuji's laboratory in a destructive rampage that forced Dr. Fuji and Amber to flee. Described as raw, primal, and terrifyingly real with unstable form and immense power.

When Mewtwo made eye contact with Amber during its escape, there appeared to be a flicker of recognition. Amber reflects that Mewtwo is "another consciousness that never should have been," though she notes that at least Mewtwo was created with a clear purpose: to be the most powerful Pokemon in existence.

Amber contemplates the different possible futures for Mewtwo based on various Pokemon media, including it being captured by Team Rocket, escaping to Cerulean Cave, or following other canonical storylines.

## Locations

### [[Dr. Fuji's Laboratory]]

An underground facility where Amber awakened in a tank filled with amber fluid. The laboratory contained equipment for Pokemon experimentation, including multiple tanks similar to Amber's. The facility was located beneath Cinnabar Island, hidden from the knowledge of tourists and regular visitors to the island.

During Mewtwo's escape, the laboratory was severely damaged when Mewtwo blasted through the ceiling to escape.

### [[Cinnabar Island]]

A volcanic island that serves as a tourist destination, with visitors typically unaware of the secret laboratory beneath it. The island features black volcanic sand beaches and a visible volcano silhouette. After escaping the laboratory, Amber and Dr. Fuji stayed at a hotel on the island before arranging transportation to leave.

## Events

### Awakening and Escape

Amber awakened in a tank filled with amber fluid, initially confused about her situation. As she gained awareness, she realized she had been reincarnated into the Pokemon world as Dr. Fuji's daughter. During this disorientation, Mewtwo also awoke and escaped in a destructive rampage, forcing Amber and Dr. Fuji to flee the laboratory.

### Journey to [[Celadon City]]

After spending a night in a hotel on Cinnabar Island, Amber and Dr. Fuji joined other passengers for the [[Celadon Express]], a Pokemon air transport service. The service uses a formation of eight [[Pidgeot]] escorted by a [[Fearow]] and a [[Noctowl]], all under the command of Captain Lin and Captain Reed.

Amber experienced her first direct contact with Pokemon during this journey, riding on a Pidgeot named [[Pidgeot (Storm)|Storm]]. The encounter left her awestruck as she reconciled the reality of actual Pokemon with her previous understanding of them as fictional creatures. Despite her excitement, exhaustion eventually overtook her, and she fell asleep during the flight, comforted by the rhythm of Storm's wings and Dr. Fuji's protective presence.

## Ongoing Conflicts

### Internal Identity Struggle

Amber experiences significant internal conflict as she adjusts to her new existence. She struggles with insomnia due to fear that sleep might cause her to lose her connection to her former self. She grapples with feeling like an imposter in Amber's body, viewing herself as "an accident" rather than the intended consciousness for this form.

Her existential questions expand to include wondering about her purpose in this world, especially in comparison to Mewtwo who was created with the clear purpose of being the strongest Pokemon.

### Threat of [[Team Rocket]]

Though not immediately present after their escape from the laboratory, Team Rocket represents a significant looming threat. Amber worries about what they might do if they discover a "failed experiment walking around with a head full of meta knowledge." Her awareness of multiple possible timelines from different Pokemon media adds complexity to her concern about Team Rocket's future actions.

