# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [2]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from IPython.display import Markdown

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

STORY_DIR = Path("../backend/tests/resources/pokemon_amber")

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [3]:
# Streaming response
from typing import Type
from shuscribe.schemas.base import BaseOutputSchema
from shuscribe.schemas.llm import ThinkingConfig
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    model: str,
    messages: list[Message], 
    response_schema: Type[BaseOutputSchema] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None,
    thinking_config: ThinkingConfig | None = None,
    config: GenerationConfig | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        if not config:
            config = GenerationConfig(
                temperature=temp or 0.7,
                response_schema=response_schema if response_schema else None,
                max_output_tokens=max_tokens,
                thinking_config=thinking_config
            )
        
        print(f"{model if model else config.model}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=model,
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status in (StreamStatus.COMPLETE, StreamStatus.ERROR):
            return chunk
    else:
        return None


In [4]:
from shuscribe.schemas.wikigen.entity import TempEntityDB

entity_db = TempEntityDB()

!!!!!!!!!!!!megablocks not available, using torch.matmul instead
<All keys matched successfully>


In [5]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open(STORY_DIR / "story" / "_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(STORY_DIR / "story" / chapter, "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue


# Summarization Pipeline

## Chapter [0]

In [6]:
# Set Provider Name
PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 0

USE_CACHED_RESPONSES = True

### Chapter Summary

In [7]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()
summary_messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter.")
)

# print(messages[-1].content)

summary_response = None

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json", "r") as f:
            summary_response = StreamChunk.model_validate_json(f.read())

# if we don't have a cached response, generate one
if not summary_response:
    summary_response = await run_async(stream(
        "", 
        "", 
        summary_messages, 
        config=templates.chapter.summary.default_config
        ))

print(summary_response.usage)
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json", "w") as f:
    f.write(summary_response.model_dump_json(indent=2))

chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)


gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

## Isekai'd from Pokemon Emulator

*   The protagonist is playing a Pokemon ROM hack on their phone, struggling in the Pokemon Mansion.
    *   Their Gyarados is on low health against a Vulpix.
    *   The protagonist is worried about losing more Pokemon due to perma-death rules.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid at the library gym in Pokemon GO.
*   The protagonist saves their game and rushes to the library for the raid.
*   While crossing the street, the protagonist is hit by a truck.
    *   The protagonist's last thought is about their Gyarados.

## Rebirth as Amber

*   The protagonist awakens in an amber fluid-filled container. [!WORLD]
    *   The protagonist feels the fluid pressing against them.
    *   The protagonist's lungs don't feel like they need air.
*   The container breaks, and the protagonist is caught by a middle-aged man, Dr. Fuji, who is overjoyed and calls her "Amber." [!CHARACTER]


Database module not implemented. Skipping save.


prompt_tokens=3791 completion_tokens=713


### Extract Entities

In [8]:
print(chapter_summary.to_prompt())

<Content>


## Isekai'd from Pokemon Emulator

*   The protagonist is playing a Pokemon ROM hack on their phone, struggling in the Pokemon Mansion.
    *   Their Gyarados is on low health against a Vulpix.
    *   The protagonist is worried about losing more Pokemon due to perma-death rules.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid at the library gym in Pokemon GO.
*   The protagonist saves their game and rushes to the library for the raid.
*   While crossing the street, the protagonist is hit by a truck.
    *   The protagonist's last thought is about their Gyarados.

## Rebirth as Amber

*   The protagonist awakens in an amber fluid-filled container. [!WORLD]
    *   The protagonist feels the fluid pressing against them.
    *   The protagonist's lungs don't feel like they need air.
*   The container breaks, and the protagonist is caught by a middle-aged man, Dr. Fuji, who is overjoyed and calls her "Amber." [!CHARACTER]
    *   Dr. Fuji wraps a lab c

In [9]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
extract_messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
)

# print(extract_messages[-1].content)
extract_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json", "r") as f:
            extract_response = StreamChunk.model_validate_json(f.read())
            
# if we don't have a cached response, generate one
if not extract_response:
    extract_response = await run_async(stream(
        "", 
        "", 
        extract_messages, 
        config=templates.entity.extract.default_config
        ))
    
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json", "w") as f:
    f.write(extract_response.model_dump_json(indent=2))

extracted_entities = ExtractEntitiesOutSchema.model_validate_json(extract_response.accumulated_text)
print(extract_response.usage)


gpt-4o-mini:
{"entities":[{"description":"The protagonist of the story, who is reincarnated as Dr. Fuji's daughter Amber. She is initially a gamer who plays Pokémon and becomes aware of her new identity and body after waking up in a lab.","narrative_role":"Central character who experiences the transition from gamer to a real-life character in the Pokémon world.","significance_level":"Central","entity_type":"Character","identifier":"Amber (Dr. Fuji's daughter)","aliases":["Protagonist","Reincarnated Amber"] ,"related_entities":[]},{"description":"A middle-aged scientist who is the protagonist's father in this new life. He is depicted as emotional and caring, having been deeply affected by the loss of his daughter.","narrative_role":"Supporting character who provides emotional depth and context to Amber's situation.","significance_level":"Major","entity_type":"Character","identifier":"Dr. Fuji","aliases":[],"related_entities":["Amber (Dr. Fuji's daughter)"]},{"description":"A powerful Ps

Database module not implemented. Skipping save.


prompt_tokens=4744 completion_tokens=764


### Search Entity DB



In [10]:
# TODO
entity_db.search("What is the main character's name?")

[]

In [11]:
from shuscribe.schemas.wikigen.entity import EntitySigLvl

ent_list = extracted_entities.filter_entities(EntitySigLvl.RELEVANT)
print(len(ent_list))
for entity in ent_list:
    print(entity.identifier)


7
Amber (Dr. Fuji's daughter)
Dr. Fuji
Mewtwo
Pokémon Mansion (in-game)
Dr. Fuji's Laboratory
Amber Fluid
Lab Chaos Environment


### Upsert Entities and Relationships

In [12]:
# for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT):
#     for entity in batch:
#         print(entity)


In [13]:
from shuscribe.schemas.provider import LLMUsage
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.entity import EntitySigLvl

upsert_entities = UpsertEntitiesOutSchema(entities=[])

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json", "r") as f:
            upsert_entities = UpsertEntitiesOutSchema.model_validate_json(f.read())

if len(upsert_entities.entities) == 0:
    upsert_entities = UpsertEntitiesOutSchema(entities=[])
    total_usage = LLMUsage(prompt_tokens=0, completion_tokens=0)
    templates.entity.upsert.reload()        

    for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT, chunk_size=5):
        upsert_messages: list[Message] = templates.entity.upsert.format( 
            current_chapter=CHAPTERS[CHAPTER_INDEX],
            entity_batch=batch,
            story_metadata=STORY_METADATA,
            chapter_summary=chapter_summary,
        )

        upsert_response = await run_async(stream(
            "", 
            "", 
            upsert_messages, 
            config=templates.entity.upsert.default_config
            ))
        upsert_entities.entities.extend(UpsertEntitiesOutSchema.model_validate_json(upsert_response.accumulated_text).entities)
        total_usage.prompt_tokens += upsert_response.usage.prompt_tokens
        total_usage.completion_tokens += upsert_response.usage.completion_tokens
        print(upsert_response.usage)
    print(total_usage)

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json", "w") as f:
    f.write(upsert_entities.model_dump_json(indent=2))
    

gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": "Amber (Dr. Fuji's daughter)",
      "identifier": "Amber (Reincarnated)",
      "detailed_description": "*   Reincarnated protagonist of the story.\n*   Formerly a Pokemon fan who was hit by a truck while playing Pokemon GO.\n*   Woke up in an amber fluid-filled container in Dr. Fuji's lab.\n*   Currently inhabiting the body of Dr. Fuji's deceased daughter, Amber.\n*   Has the voice of a young girl.\n*   Is aware of her past life and knowledge of Pokemon.\n*   Expresses concern and caring towards Dr. Fuji despite the chaos.",
      "narrative_role": "Central character experiencing the transition from gamer to a real-life character in the Pokémon world, grappling with a new identity and the dangers surrounding her.",
      "facts": [
        {
          "fact": "Currently inhabiting the body of Dr. Fuji's deceased daughter, Amber.",
          "type": "Explicit"
        },
        {
          "fact": "Expresses concer

Database module not implemented. Skipping save.


prompt_tokens=5745 completion_tokens=1817
gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": "Amber Fluid",
      "identifier": "Amber Fluid (Reincarnation)",
      "detailed_description": "*   A viscous, orange-tinted fluid in which the protagonist is suspended after being reincarnated as Amber.\n*   Symbolizes rebirth, transformation, and the unknown aspects of Amber's new existence.\n*   Feels thicker than water, almost syrupy.\n*   Muffles sounds.\n*   Leaves a scent of antiseptic on Dr. Fuji's lab coat.",
      "narrative_role": "Symbolic element representing transformation and the unknown, also serves as a sensory detail enhancing the description of Amber's rebirth.",
      "facts": [],
      "removed_facts": [],
      "entity_types": [
        "Item",
        "Symbolism"
      ],
      "aliases": [
        "amber fluid"
      ],
      "related_entities": []
    },
    {
      "old_identifier": "Lab Chaos Environment",
      "identifier": "Dr. Fuji's Laboratory

Database module not implemented. Skipping save.


prompt_tokens=5276 completion_tokens=525
prompt_tokens=11021 completion_tokens=2342


In [None]:
print(upsert_entities.model_dump_json(indent=2))

{
  "entities": [
    {
      "old_identifier": "Amber (Dr. Fuji's daughter)",
      "identifier": "Amber (Reincarnated)",
      "detailed_description": "*   Reincarnated protagonist of the story.\n*   Formerly a Pokemon fan who was hit by a truck while playing Pokemon GO.\n*   Woke up in an amber fluid-filled container in Dr. Fuji's lab.\n*   Currently inhabiting the body of Dr. Fuji's deceased daughter, Amber.\n*   Has the voice of a young girl.\n*   Is aware of her past life and knowledge of Pokemon.\n*   Expresses concern and caring towards Dr. Fuji despite the chaos.",
      "narrative_role": "Central character experiencing the transition from gamer to a real-life character in the Pokémon world, grappling with a new identity and the dangers surrounding her.",
      "facts": [
        {
          "fact": "Currently inhabiting the body of Dr. Fuji's deceased daughter, Amber.",
          "type": "Explicit"
        },
        {
          "fact": "Expresses concern and caring towards D

### Update Entity DB

In [15]:
entity_db.upsert(upsert_entities.entities)
entity_db.size()

Upserted 0 entities, 7 new entities
entities_updated=[]
entities_new=[TempEntityRecord(identifier=Amber (Reincarnated)), TempEntityRecord(identifier=Dr. Fuji), TempEntityRecord(identifier=Mewtwo), TempEntityRecord(identifier=Pokémon Mansion (Emulator)), TempEntityRecord(identifier=Dr. Fuji's Laboratory), TempEntityRecord(identifier=Amber Fluid (Reincarnation)), TempEntityRecord(identifier=Dr. Fuji's Laboratory (Post-Mewtwo Rampage))]


7

In [16]:
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "entity_db.json", "w") as f:
    f.write(entity_db.entities_db.model_dump_json(indent=2))

os.makedirs(STORY_DIR / "latest_out", exist_ok=True)
with open(STORY_DIR / "latest_out" / "entity_db.json", "w") as f:
    f.write(entity_db.entities_db.model_dump_json(indent=2))

### Story So Far Summary

In [None]:

from shuscribe.schemas.wikigen.wiki import WikiPage


templates.story.comprehensive_wiki.reload()
comprehensive_wiki_messages: list[Message] = templates.story.comprehensive_wiki.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    chapter_summary=chapter_summary,
    
    story_metadata=STORY_METADATA,
    # summary_so_far=summary_so_far,
    # recent_summaries=recent_summaries_prompt,
)

# print(comprehensive_summary_messages[-1].content)
comprehensive_wiki_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json", "r") as f:
            comprehensive_wiki_response = StreamChunk.model_validate_json(f.read())

if not comprehensive_wiki_response:
    comprehensive_wiki_response = await run_async(stream(
        "", 
        "", 
        comprehensive_wiki_messages, 
        config=templates.story.comprehensive_wiki.default_config
        ))

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json", "w") as f:
    f.write(comprehensive_wiki_response.model_dump_json(indent=2))

print(comprehensive_wiki_response.usage)
comprehensive_wiki = WikiPage.from_wiki_content("Comprehensive Wiki Page", comprehensive_wiki_response.accumulated_text)


claude-3-7-sonnet-20250219:
<ANTHROPIC_THINKING>I need to update the comprehensive wiki document by integrating the new information from the latest chapter. I'll analyze what I've been given and create a cohesive wiki document.

From the new chapter, I see this is a Pokemon fanfiction/isekai story where the protagonist was playing Pokemon on an emulator, got hit by a truck while rushing to participate in a Pokemon GO raid, and woke up in the Pokemon world as Dr. Fuji's daughter Amber. The chapter introduces key elements like Mewtwo's escape from the lab, Dr. Fuji's joy at seeing his "daughter" alive, and the protagonist's realization that she's been reincarnated in a new body.

Since this appears to be the first chapter of the story, I need to build a comprehensive wiki document that establishes the foundation of the narrative. I'll organize it with clear sections about the protagonist, the world, and key plot points.

Let me structure this logically:

1. Overview - brief summary of th

Database module not implemented. Skipping save.


prompt_tokens=7518 completion_tokens=1722


In [18]:
# display markdown
Markdown(comprehensive_wiki.content)




# Pokemon: Ambertwo

## Overview

*Pokemon: Ambertwo* follows the story of a Pokemon fan who is reincarnated into the Pokemon world as [[Amber (Reincarnated)]], the cloned daughter of [[Dr. Fuji]]. After being hit by a truck while rushing to a Pokemon GO raid, the protagonist awakens in an amber-filled tank in [[Dr. Fuji's Laboratory]], witnessing the escape of [[Mewtwo]] and beginning a new life in a world previously only experienced through games.

## Characters

### Main Characters

#### [[Amber (Reincarnated)]]

The protagonist and central character of the story, formerly a Pokemon fan in the real world who was killed in a traffic accident. She has been reincarnated as Dr. Fuji's daughter, Amber, appearing to be a successful clone in contrast to the original storyline where Amber's cloning failed.

* Has memories of her previous life as a Pokemon fan
* Currently inhabits the body of a young girl
* Recognizes elements of the Pokemon world from her knowledge of the games and media
* Shows concern for Dr. Fuji despite her confusion

#### [[Dr. Fuji]]

A brilliant but emotionally damaged scientist working on Pokemon cloning and genetic engineering. Father to the original Amber and now caretaker of the reincarnated protagonist.

* Middle-aged scientist overjoyed at Amber's apparent resurrection
* Deeply emotional and caring toward Amber
* Injured during Mewtwo's escape but remains protective
* Has been conducting experiments related to Pokemon cloning and genetic engineering
* Expresses hope that "everything will be different now" and that they can "fix their family"

#### [[Mewtwo]]

A powerful Psychic-type Pokemon created through genetic engineering in [[Dr. Fuji's Laboratory]]. Unlike its pixelated game counterpart, this Mewtwo is described as raw, primal, and terrifyingly real.

* Currently unstable in form, with movements carrying immense power
* Escaped from the lab by destroying equipment and blasting through the ceiling
* Made eye contact with Amber, showing a possible flicker of recognition
* Was attacked by an Arcanine but demonstrated resilience

### Minor Characters

* Security guards attempting to contain Mewtwo
* An Arcanine that attacked Mewtwo with fire attacks
* Various scientists in the laboratory during Mewtwo's escape
* A partially-formed Kadabra variant seen glitching in and out of existence in the lab

## Setting

### Real World

The story begins in the real world, where the protagonist is a college student playing Pokemon games on an emulator while living in a dorm room. The protagonist was playing with self-imposed perma-death rules before receiving a notification about a Shadow Mewtwo raid in Pokemon GO, which prompted the fatal rush across the street.

#### [[Pokémon Mansion (Emulator)]]

A location in the Pokemon ROM hack the protagonist was playing before death. Described as a maze of broken tiles and forgotten experiments, it was the setting for a difficult battle where the protagonist's Gyarados was on low health.

### Pokemon World

The Pokemon world presented in the story appears to be a mixture of elements from various Pokemon media, consistent with the story's description as a "mishmash of Pokemon media."

#### [[Dr. Fuji's Laboratory]]

The primary setting where Amber awakens. A sophisticated underground facility where Pokemon experiments, particularly cloning, are conducted.

* Contains tanks filled with [[Amber Fluid (Reincarnation)]] used in the cloning process
* Located within what appears to be a wealthy estate or mansion
* Features specialized areas like "Enhancement Lab Three" and an "evolution acceleration chamber"
* Destroyed during [[Mewtwo]]'s escape

## Plot

### Reincarnation

The protagonist, while playing a Pokemon ROM hack on their phone in the real world, receives a notification about a Shadow Mewtwo raid in Pokemon GO. While rushing to participate in the raid, they are struck by a truck and killed. Their consciousness then awakens in the body of Amber, Dr. Fuji's daughter, who appears to have been successfully cloned in a laboratory tank.

### Mewtwo's Escape

Shortly after Amber's awakening, Mewtwo breaks free from its containment. The legendary Pokemon makes eye contact with Amber, showing a possible flicker of recognition before being attacked by an Arcanine. Mewtwo demonstrates its immense power by destroying much of the laboratory and escaping through the ceiling.

### Escape from the Laboratory

Dr. Fuji, overjoyed at Amber's apparent resurrection, protects her during Mewtwo's destructive escape. Despite sustaining injuries from falling debris, he guides Amber through the chaotic laboratory as systems fail and fires begin to spread. During their escape, they witness strange phenomena, including a partially-formed Kadabra variant glitching in and out of existence.

## Technology and Science

### Cloning Technology

The story features advanced cloning technology capable of recreating both Pokemon (like [[Mewtwo]]) and humans (like [[Amber (Reincarnated)]]). The cloning process appears to involve suspension in [[Amber Fluid (Reincarnation)]], a viscous orange-tinted substance that serves as both a medium for growth and potentially as a stabilizing agent.

### Genetic Engineering

Evidence of genetic manipulation is apparent throughout [[Dr. Fuji's Laboratory]]:

* Mewtwo itself is a genetically engineered Pokemon
* Enhancement Lab Three is mentioned during the chaos
* An evolution acceleration chamber is described as overloading
* A partially-formed Kadabra variant was seen glitching in and out of existence

## Mysteries and Unresolved Questions

* Why the protagonist was specifically reincarnated as Amber
* The nature of the "flicker of recognition" in Mewtwo's eyes when it made eye contact with Amber
* The full extent of Dr. Fuji's experiments and their purpose
* What Dr. Fuji means when he says they can "fix it all—our family, our life"
* The relationship between the glitching Kadabra variant and the laboratory's experiments

