# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [2]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from IPython.display import Markdown

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

STORY_DIR = Path("../backend/tests/resources/pokemon_amber")

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [3]:
USE_CACHED_RESPONSES = False

In [4]:
# Streaming response
from typing import Type
from shuscribe.schemas.base import BaseOutputSchema
from shuscribe.schemas.llm import ThinkingConfig
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    model: str,
    messages: list[Message], 
    response_schema: Type[BaseOutputSchema] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None,
    thinking_config: ThinkingConfig | None = None,
    config: GenerationConfig | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        if not config:
            config = GenerationConfig(
                temperature=temp or 0.7,
                response_schema=response_schema if response_schema else None,
                max_output_tokens=max_tokens,
                thinking_config=thinking_config
            )
        
        print(f"{model if model else config.model}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=model,
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status in (StreamStatus.COMPLETE, StreamStatus.ERROR):
            return chunk
    else:
        return None


In [5]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open(STORY_DIR / "_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(STORY_DIR / chapter, "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue



# Summarization Pipeline

## Chapter [1]

In [6]:
# Set Provider Name
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.story import WikiPage
from shuscribe.schemas.wikigen.summary import ChapterSummary


PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 1

with open(STORY_DIR / f"{CHAPTER_INDEX-1}out" / "comprehensive_wiki.yaml", "r") as f:
    COMPREHENSIVE_WIKI = WikiPage.from_wiki_content("Comprehensive Wiki Page", f.read())

with open(STORY_DIR / f"{CHAPTER_INDEX-1}out" / "chapter_summary.yaml", "r") as f:
    PREV_SUMMARY = ChapterSummary.from_chapter_summary(CHAPTER_INDEX-1, f.read())

with open(STORY_DIR / f"{CHAPTER_INDEX-1}out" / "upsert_entities.yaml", "r") as f:
    UPSERT_ENTITIES = UpsertEntitiesOutSchema.model_validate_json(f.read())
# USE_CACHED_RESPONSES = False

### Chapter Summary

In [7]:
from shuscribe.schemas.wikigen.story import WikiPage
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()

summary_messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    summary_so_far=COMPREHENSIVE_WIKI,
    recent_summaries=[PREV_SUMMARY],
)

# print(messages[-1].content)

summary_response = None

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml", "r") as f:
            summary_response = StreamChunk.model_validate_json(f.read())

# if we don't have a cached response, generate one
if not summary_response:
    summary_response = await run_async(stream(
        "", 
        "", 
        summary_messages, 
        config=templates.chapter.summary.default_config
        ))

print(summary_response.usage)
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml", "w") as f:
    f.write(summary_response.model_dump_json(indent=2))

chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)


gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

## Lingering Trauma and Existential Dread

*   The protagonist, now going by **Amber**, struggles to sleep, haunted by the memory of Mewtwo's awakening and the feeling of being disconnected from her old self. [!CHARACTER]
    *   She is disturbed by the mint-green hair she sees on the TV screen, a reminder of her new, ten-year-old body. [!CHARACTER]
    *   She feels like an adult trapped in a child's body, with muscle memory that doesn't belong to her. [!CHARACTER]
*   Amber reflects on Dr. Fuji's actions, acknowledging his desperate attempt to bring his daughter back to life. [!CHARACTER]
    *   She feels like an accident, a consciousness that shouldn't exist in this form. [!CHARACTER]
    *   She grapples with the guilt of occupying a body that belongs to someone who is deceased. [!CHARACTER] [!THEME]
*   Amber contemplates the ignorance of the island's tourists regarding the hidden laboratory and its experiments. [!WORLD]
    *   She consi

Database module not implemented. Skipping save.


prompt_tokens=5066 completion_tokens=1074


### Extract Entities

In [8]:
print(chapter_summary.to_prompt())

<Content>


## Lingering Trauma and Existential Dread

*   The protagonist, now going by **Amber**, struggles to sleep, haunted by the memory of Mewtwo's awakening and the feeling of being disconnected from her old self. [!CHARACTER]
    *   She is disturbed by the mint-green hair she sees on the TV screen, a reminder of her new, ten-year-old body. [!CHARACTER]
    *   She feels like an adult trapped in a child's body, with muscle memory that doesn't belong to her. [!CHARACTER]
*   Amber reflects on Dr. Fuji's actions, acknowledging his desperate attempt to bring his daughter back to life. [!CHARACTER]
    *   She feels like an accident, a consciousness that shouldn't exist in this form. [!CHARACTER]
    *   She grapples with the guilt of occupying a body that belongs to someone who is deceased. [!CHARACTER] [!THEME]
*   Amber contemplates the ignorance of the island's tourists regarding the hidden laboratory and its experiments. [!WORLD]
    *   She considers Mewtwo's existence and it

In [9]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
extract_messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
    summary_so_far=COMPREHENSIVE_WIKI,
    recent_summaries=[PREV_SUMMARY],
)

# print(extract_messages[-1].content)
extract_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml", "r") as f:
            extract_response = StreamChunk.model_validate_json(f.read())
            
# if we don't have a cached response, generate one
if not extract_response:
    extract_response = await run_async(stream(
        "", 
        "", 
        extract_messages, 
        config=templates.entity.extract.default_config
        ))
    
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml", "w") as f:
    f.write(extract_response.model_dump_json(indent=2))

extracted_entities = ExtractEntitiesOutSchema.model_validate_json(extract_response.accumulated_text)
print(extract_response.usage)


gpt-4o-mini:
{"entities":[{"description":"A ten-year-old girl who is the reincarnation of Dr. Fuji's deceased daughter, Amber struggles with her identity and the implications of her new life. She retains memories from her previous life as a Pokémon fan, which causes her existential dread and confusion.","narrative_role":"Protagonist navigating her new reality and grappling with her past life and current form.","significance_level":"Central","entity_type":"Character","identifier":"Amber (Dr. Fuji's Daughter)","aliases":["Amber","Protagonist"] ,"related_entities":["Dr. Fuji","Mewtwo"]},{"description":"A middle-aged scientist who cloned his daughter, Amber. He is determined to protect her and has deep emotional ties to her, reflecting on his past and the consequences of his scientific endeavors.","narrative_role":"Amber's father, who is trying to rebuild his family after losing his daughter.","significance_level":"Major","entity_type":"Character","identifier":"Dr. Fuji","aliases":["Dr. Fu

Database module not implemented. Skipping save.


prompt_tokens=6305 completion_tokens=1063


### Upsert Entities and Relationships

In [10]:
from shuscribe.schemas.wikigen.entity import EntitySigLvl

ent_list = extracted_entities.filter_entities(EntitySigLvl.RELEVANT)
print(len(ent_list))
for entity in ent_list:
    print(entity.identifier)


10
Amber (Dr. Fuji's Daughter)
Dr. Fuji
Mewtwo
Dr. Fuji's Laboratory
The Pokémon World
Arrival of the Pidgeot
Identity and Existence
Team Rocket
Possible Futures
Reincarnation and Guilt


In [11]:
# TODO
# manually define the entities to find...

for i, entity in enumerate(UPSERT_ENTITIES.entities):
    print(f"{i}: {entity.identifier}")

0: Mewtwo
1: Amber (Dr. Fuji's daughter)
2: Dr. Fuji
3: Dr. Fuji's Lab
4: Truck-kun Incident
5: Chaotic Laboratory Environment (Pokemon: Ambertwo)


In [13]:
INDEXES = [0, 1, 2, 3, 4, 5]
existing_entities = [UPSERT_ENTITIES.entities[i].model_dump() for i in INDEXES]
type(existing_entities[0])

dict

In [14]:
# for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT):
#     for entity in batch:
#         print(entity)


In [29]:
from shuscribe.schemas.provider import LLMUsage
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.entity import EntitySigLvl

upsert_entities = UpsertEntitiesOutSchema(entities=[])

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml", "r") as f:
            upsert_entities = UpsertEntitiesOutSchema.model_validate_json(f.read())

if len(upsert_entities.entities) == 0:
    upsert_entities = UpsertEntitiesOutSchema(entities=[])
    total_usage = LLMUsage(prompt_tokens=0, completion_tokens=0)
    templates.entity.upsert.reload()        

    for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT, chunk_size=5):
        upsert_messages: list[Message] = templates.entity.upsert.format( 
            current_chapter=CHAPTERS[CHAPTER_INDEX],
            entity_batch=batch,
            story_metadata=STORY_METADATA,
            chapter_summary=chapter_summary,
            
            existing_entities=existing_entities,
            summary_so_far=COMPREHENSIVE_WIKI,
            recent_summaries=[PREV_SUMMARY],
        )

        upsert_response = await run_async(stream(
            "", 
            "", 
            upsert_messages, 
            config=templates.entity.upsert.default_config
            ))
        upsert_entities.entities.extend(UpsertEntitiesOutSchema.model_validate_json(upsert_response.accumulated_text).entities)
        total_usage.prompt_tokens += upsert_response.usage.prompt_tokens
        total_usage.completion_tokens += upsert_response.usage.completion_tokens
        print(upsert_response.usage)

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml", "w") as f:
    f.write(upsert_entities.model_dump_json(indent=2))
    
print(total_usage)

TypeError: Object of type EntityFactType is not JSON serializable

In [None]:
# print(upsert_entities.model_dump_json(indent=2))

{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Mewtwo",
      "detailed_description": "*   A powerful psychic Pokémon created by humans.\n*   Known for its intelligence and strength.\n*   Depicted as a raw and primal force in this chapter.\n*   Wreaks havoc in Dr. Fuji's lab after escaping its containment.\n*   Makes eye contact with Amber (formerly Alexa), possibly showing recognition or memory.\n*   Attacked by an Arcanine with a stream of flames.\n*   Escapes the lab by blasting upward through the ceiling with psychic energy.\n*   Classified as the world's strongest Pokémon in the games.",
      "narrative_role": "Antagonist in the current chaotic situation, representing the consequences of human experimentation and the power of Pokémon. Serves as the catalyst for the events unfolding in the lab.",
      "facts": [
        {
          "fact": "Mewtwo is a raw and primal force.",
          "type": "Explicit"
        },
        {
          "fact": "Mewtwo e

### Story So Far Summary

In [82]:

from shuscribe.schemas.wikigen.story import WikiPage


templates.story.comprehensive_wiki.reload()
comprehensive_wiki_messages: list[Message] = templates.story.comprehensive_wiki.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    chapter_summary=chapter_summary,
    key_entities=upsert_entities,
    
    story_metadata=STORY_METADATA,
    # summary_so_far=summary_so_far,
    # recent_summaries=recent_summaries_prompt,
)

# print(comprehensive_summary_messages[-1].content)
comprehensive_wiki_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml", "r") as f:
            comprehensive_wiki_response = StreamChunk.model_validate_json(f.read())

if not comprehensive_wiki_response:
    comprehensive_wiki_response = await run_async(stream(
        "", 
        "", 
        comprehensive_wiki_messages, 
        config=templates.story.comprehensive_wiki.default_config
        ))

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml", "w") as f:
    f.write(comprehensive_wiki_response.model_dump_json(indent=2))

print(comprehensive_wiki_response.usage)
comprehensive_wiki = WikiPage.from_wiki_content("Comprehensive Wiki Page", comprehensive_wiki_response.accumulated_text)


claude-3-7-sonnet-20250219:
<ANTHROPIC_THINKING>I need to create a comprehensive wiki document that integrates the new information from the latest chapter with any existing narrative. The task requires me to:

1. Structure the wiki with clear markdown headings
2. Cross-reference entities using wiki-links
3. Focus on what happened rather than analysis
4. Balance detail with readability
5. Avoid chapter-by-chapter recaps

Since this is the first chapter, I'll be creating the initial wiki document. Let me identify the major sections needed:

- Overview/Introduction: Brief introduction to the story
- World: The Pokemon world setting
- Characters: Main characters and their roles
- Events: Major events that have occurred
- Locations: Key locations in the story

Now, let me organize the content from the chapter into these sections.</ANTHROPIC_THINKING><|STARTWIKI|>

## World Overview

In the world of "Pokemon: Ambertwo," a human from our world has been transported into the body of [[Amber (fo

Database module not implemented. Skipping save.


prompt_tokens=8457 completion_tokens=1645


In [83]:
# display markdown
Markdown(comprehensive_wiki.content)




## World Overview

In the world of "Pokemon: Ambertwo," a human from our world has been transported into the body of [[Amber (formerly Alexa)]], a genetically engineered clone created by [[Dr. Fuji]]. The story combines elements from various Pokemon media, presenting a darker, more realistic interpretation of the Pokemon universe where genetic experimentation and cloning play significant roles in shaping the narrative.

## Characters

### Amber

[[Amber (formerly Alexa)]] is the protagonist of the story, originally a college student named Alexa who was reincarnated into the body of Dr. Fuji's deceased daughter's clone. Before her reincarnation, she was an avid Pokemon player, participating in both classic Pokemon games through emulators and the augmented reality game Pokemon Go.

After being struck by a truck while rushing to participate in a [[Shadow Mewtwo Raid (Pokemon Go Event)]], Alexa awakened in a tank filled with amber fluid within [[Dr. Fuji's Lab]]. She quickly realized she had been reincarnated as Ambertwo, possessing the body of a young girl.

Despite her disorientation, Amber retained all memories of her previous life and recognized both Dr. Fuji and [[Mewtwo]] from Pokemon lore, giving her unique insight into the chaotic situation unfolding around her.

### Dr. Fuji

[[Dr. Fuji]] is a brilliant scientist responsible for creating both Mewtwo and Ambertwo through genetic engineering and cloning. The death of his daughter Amber appears to have driven him to create these clones, motivated by grief and a desire to restore what he lost.

Dr. Fuji shows immense emotional attachment to Amber, believing her to be a successful resurrection of his daughter. During [[Mewtwo's Escape]], he prioritizes Amber's safety above all else, enduring significant physical injury to protect her from falling debris and guide her to safety.

His parting words to Amber – "Everything will be different now. We can fix it all—our family, our life. Everything" – reveal his deep-seated hope that Amber's existence represents a second chance for his shattered family.

### Mewtwo

[[Mewtwo]] is a powerful Psychic-type Pokemon created through genetic engineering, known as the most powerful Pokemon in existence. Unlike its depiction in games, anime, or movies, this version of Mewtwo appears as a raw, primal force of nature with immense destructive capabilities.

During its escape from the laboratory, Mewtwo demonstrates devastating psychic abilities, shattering reinforced structures and telekinetically moving objects. Despite being attacked by security Pokemon, including an Arcanine, Mewtwo ultimately escapes through the ceiling of the laboratory.

The brief moment of eye contact between Mewtwo and Amber suggests a potential connection or recognition between the two engineered beings, though the nature of this connection remains unexplored.

## Locations

### Dr. Fuji's Laboratory

[[Dr. Fuji's Lab]] serves as the primary setting for the story's opening events. The facility appears to be a sophisticated research installation hidden beneath or within a mansion-like structure. The laboratory contains multiple containment tanks filled with amber fluid, used for housing and developing cloned entities.

The lab's interior features clinical white walls, exposed machinery, and monitoring equipment, contrasting sharply with the refined wood paneling and ornate fixtures of the mansion above. This juxtaposition highlights the secretive nature of the genetic experiments conducted within.

The laboratory also houses various other experiments besides Mewtwo and Amber, including a partially-formed Kadabra variant that briefly glitches into existence during Mewtwo's rampage. References to an "enhancement lab" and "evolution acceleration chamber" suggest extensive genetic manipulation of Pokemon is conducted at the facility.

## Major Events

### Alexa's Death and Reincarnation

The story begins with Alexa, a college student and Pokemon enthusiast, playing a permadeath version of Pokemon on an emulator. After receiving notification about a [[Shadow Mewtwo Raid (Pokemon Go Event)]] nearby, she rushes to participate but is struck by a truck while crossing the street, resulting in her death.

Instead of permanent death, Alexa's consciousness is somehow transferred to the body of Ambertwo, a clone created by Dr. Fuji. She awakens disoriented in a tank of amber fluid, struggling to reconcile her new physical form with her previous identity.

### Mewtwo's Escape

[[Mewtwo's Escape]] serves as the primary catalyst for the story's events. As Amber awakens in her new form, Mewtwo simultaneously breaks free from its containment, unleashing devastating psychic power throughout the facility.

The escape creates widespread chaos and destruction, with emergency systems failing, fires breaking out, and various experiments being compromised. Security personnel attempt to subdue Mewtwo using Pokemon, including an Arcanine, but their efforts prove futile as Mewtwo blasts through the ceiling to freedom.

During the chaos, Dr. Fuji prioritizes getting Amber to safety, carrying her through the damaged facility despite sustaining injuries from falling debris. Their escape is complicated by structural damage and spreading fires, highlighting the dangerous and unstable nature of the situation.

## Science and Technology

### Cloning and Genetic Engineering

The story presents a world where genetic engineering and cloning have advanced significantly beyond our current capabilities. Dr. Fuji's facility demonstrates the ability to not only clone Pokemon (creating Mewtwo from Mew's DNA) but also humans, as evidenced by Amber's existence.

The amber fluid contained in the tanks appears to serve as both a preservation medium and possibly a growth medium for the clones. The technology seems capable of not only replicating physical forms but potentially transferring or implanting consciousness, as suggested by Amber retaining Alexa's memories despite inhabiting a new body.

References to "containment fields," "enhancement labs," and "evolution acceleration chambers" suggest sophisticated technology designed to manipulate and control the development of Pokemon abilities and evolutionary processes.

### Security Systems

The laboratory employs both technological and Pokemon-based security. Security guards with Pokemon (notably an Arcanine) attempt to contain Mewtwo during its escape, demonstrating the integration of Pokemon into security operations.

The facility features various security measures including reinforced doors, containment fields, and monitoring systems, though these prove insufficient when faced with Mewtwo's overwhelming psychic power.

