# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [2]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from IPython.display import Markdown

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

STORY_DIR = Path("../backend/tests/resources/pokemon_amber")

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [3]:
# Streaming response
from typing import Type
from shuscribe.schemas.base import BaseOutputSchema
from shuscribe.schemas.llm import ThinkingConfig
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    model: str,
    messages: list[Message], 
    response_schema: Type[BaseOutputSchema] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None,
    thinking_config: ThinkingConfig | None = None,
    config: GenerationConfig | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        if not config:
            config = GenerationConfig(
                temperature=temp or 0.7,
                response_schema=response_schema if response_schema else None,
                max_output_tokens=max_tokens,
                thinking_config=thinking_config
            )
        
        print(f"{model if model else config.model}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=model,
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status in (StreamStatus.COMPLETE, StreamStatus.ERROR):
            return chunk
    else:
        return None


In [4]:
from shuscribe.schemas.wikigen.entity import TempEntityDB

entity_db = TempEntityDB()

!!!!!!!!!!!!megablocks not available, using torch.matmul instead
<All keys matched successfully>


In [5]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open(STORY_DIR / "story" / "_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(STORY_DIR / "story" / chapter, "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue


# Summarization Pipeline

## Chapter [0]

In [6]:
# Set Provider Name
PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 0

USE_CACHED_RESPONSES = True

### Chapter Summary

In [7]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()
summary_messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter.")
)

# print(messages[-1].content)

summary_response = None

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json", "r") as f:
            summary_response = StreamChunk.model_validate_json(f.read())

# if we don't have a cached response, generate one
if not summary_response:
    summary_response = await run_async(stream(
        "", 
        "", 
        summary_messages, 
        config=templates.chapter.summary.default_config
        ))

print(summary_response.usage)
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.json", "w") as f:
    f.write(summary_response.model_dump_json(indent=2))

chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)


gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

## The Gamer's Last Stand

*   The protagonist, a dedicated Pokémon player, is on the verge of losing a crucial battle in the Pokémon Mansion on an emulator.
    *   Her Gyarados is at critical health against a Vulpix.
    *   She contemplates switching Pokémon but faces unfavorable matchups.
*   A Discord notification alerts her to a Shadow Mewtwo raid at the library gym on campus.
    *   The event is happening with a limited time window.
    *   The protagonist decides to prioritize the raid over her emulator battle and saves her game.
*   Rushing to the library, she disregards traffic signals, focused on reaching the raid in time.
*   She is struck by a truck while crossing the street, her last thought being about her Gyarados.

## Rebirth in Amber

*   The protagonist awakens in a tank filled with amber fluid, disoriented and unable to breathe properly. [!WORLD]
    *   She experiences distorted sounds and vibrations.
    *   The tank shat

Database module not implemented. Skipping save.


prompt_tokens=3791 completion_tokens=603


### Extract Entities

In [8]:
print(chapter_summary.to_prompt())

<Content>


## The Gamer's Last Stand

*   The protagonist, a dedicated Pokémon player, is on the verge of losing a crucial battle in the Pokémon Mansion on an emulator.
    *   Her Gyarados is at critical health against a Vulpix.
    *   She contemplates switching Pokémon but faces unfavorable matchups.
*   A Discord notification alerts her to a Shadow Mewtwo raid at the library gym on campus.
    *   The event is happening with a limited time window.
    *   The protagonist decides to prioritize the raid over her emulator battle and saves her game.
*   Rushing to the library, she disregards traffic signals, focused on reaching the raid in time.
*   She is struck by a truck while crossing the street, her last thought being about her Gyarados.

## Rebirth in Amber

*   The protagonist awakens in a tank filled with amber fluid, disoriented and unable to breathe properly. [!WORLD]
    *   She experiences distorted sounds and vibrations.
    *   The tank shatters, and she is caught by a l

In [9]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
extract_messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
)

# print(extract_messages[-1].content)
extract_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json", "r") as f:
            extract_response = StreamChunk.model_validate_json(f.read())
            
# if we don't have a cached response, generate one
if not extract_response:
    extract_response = await run_async(stream(
        "", 
        "", 
        extract_messages, 
        config=templates.entity.extract.default_config
        ))
    
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.json", "w") as f:
    f.write(extract_response.model_dump_json(indent=2))

extracted_entities = ExtractEntitiesOutSchema.model_validate_json(extract_response.accumulated_text)
print(extract_response.usage)


gpt-4o-mini:
{"entities":[{"description":"A powerful Psychic-type Pokémon known for its intelligence and abilities, Mewtwo is a central figure in the story, representing both a threat and a pivotal element of the protagonist's new reality.","narrative_role":"Mewtwo serves as a catalyst for the protagonist's awakening and the chaos that ensues in the laboratory, embodying the conflict between human ambition and the wild nature of Pokémon.","significance_level":"Central","entity_type":"Character","identifier":"Mewtwo","aliases":["Shadow Mewtwo"] ,"related_entities":[]},{"description":"The protagonist of the story, who has been reincarnated into the body of Dr. Fuji's deceased daughter, Amber. She is a dedicated Pokémon player who has now found herself in a new and dangerous reality.","narrative_role":"Amber's journey begins with her awakening in a lab, facing the consequences of her past life and navigating her new identity and powers.","significance_level":"Central","entity_type":"Chara

Database module not implemented. Skipping save.


prompt_tokens=4635 completion_tokens=902


### Search Entity DB



In [10]:
# TODO
entity_db.search("What is the main character's name?")

[]

In [11]:
from shuscribe.schemas.wikigen.entity import EntitySigLvl

ent_list = extracted_entities.filter_entities(EntitySigLvl.RELEVANT)
print(len(ent_list))
for entity in ent_list:
    print(entity.identifier)


8
Mewtwo
Amber (formerly the protagonist)
Dr. Fuji
Dr. Fuji's Laboratory
Shadow Mewtwo Raid
Arcanine
Gyarados
Rebirth and Identity


### Upsert Entities and Relationships

In [12]:
# for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT):
#     for entity in batch:
#         print(entity)


In [13]:
from shuscribe.schemas.provider import LLMUsage
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.entity import EntitySigLvl

upsert_entities = UpsertEntitiesOutSchema(entities=[])

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json", "r") as f:
            upsert_entities = UpsertEntitiesOutSchema.model_validate_json(f.read())

if len(upsert_entities.entities) == 0:
    upsert_entities = UpsertEntitiesOutSchema(entities=[])
    total_usage = LLMUsage(prompt_tokens=0, completion_tokens=0)
    templates.entity.upsert.reload()        

    for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT, chunk_size=5):
        upsert_messages: list[Message] = templates.entity.upsert.format( 
            current_chapter=CHAPTERS[CHAPTER_INDEX],
            entity_batch=batch,
            story_metadata=STORY_METADATA,
            chapter_summary=chapter_summary,
        )

        upsert_response = await run_async(stream(
            "", 
            "", 
            upsert_messages, 
            config=templates.entity.upsert.default_config
            ))
        upsert_entities.entities.extend(UpsertEntitiesOutSchema.model_validate_json(upsert_response.accumulated_text).entities)
        total_usage.prompt_tokens += upsert_response.usage.prompt_tokens
        total_usage.completion_tokens += upsert_response.usage.completion_tokens
        print(upsert_response.usage)
    print(total_usage)

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.json", "w") as f:
    f.write(upsert_entities.model_dump_json(indent=2))
    

gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": "Mewtwo",
      "identifier": "Mewtwo (Pokemon: Ambertwo)",
      "detailed_description": "*   A powerful Psychic-type Pokémon known for its intelligence and psychic abilities.\n*   Created in Dr. Fuji's laboratory.\n*   Breaks free from its container and causes significant damage to the lab.\n*   Encounters Amber (the protagonist in Amber's body) and seems to recognize her.\n*   Attacked by an Arcanine with flames.\n*   Escapes through the ceiling after blasting a hole with psychic energy.\n*   Its form is described as raw, primal, and terrifyingly real, existing outside the natural order.\n*   Classified as the world’s strongest Pokémon in the games.",
      "narrative_role": "Mewtwo serves as a catalyst for the protagonist's awakening and the chaos that ensues in the laboratory, embodying the conflict between human ambition and the wild nature of Pokémon. Its escape sets the stage for future conflicts and plot dev

Database module not implemented. Skipping save.


prompt_tokens=5715 completion_tokens=2120
gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": "Arcanine",
      "identifier": "Arcanine (Security Guard's)",
      "detailed_description": "*   A Pok\n\n    *   A Pok\n\n    *   A Pok\n\n    *   A Pok\n\n    *   A Pokémon species known for its loyalty and power.\n*   Employed by security at Dr. Fuji's lab.\n*   Used in an attempt to contain Mewtwo after its escape from its tank.\n*   Attacks Mewtwo with a stream of flames, briefly knocking it off balance.",
      "narrative_role": "Arcanine represents the human effort to contain Mewtwo's power and the dangers posed by legendary Pokémon.",
      "facts": [],
      "removed_facts": [],
      "entity_types": [
        "Character"
      ],
      "aliases": [
        "Arcanine"
      ],
      "related_entities": [
        {
          "target_entity_id": "Mewtwo",
          "description": "Arcanine attempts to battle Mewtwo.",
          "relationship_type": "Causal",
         

Database module not implemented. Skipping save.


prompt_tokens=5372 completion_tokens=973
prompt_tokens=11087 completion_tokens=3093


In [14]:
print(upsert_entities.model_dump_json(indent=2))

{
  "entities": [
    {
      "old_identifier": "Mewtwo",
      "identifier": "Mewtwo (Pokemon: Ambertwo)",
      "detailed_description": "*   A powerful Psychic-type Pokémon known for its intelligence and psychic abilities.\n*   Created in Dr. Fuji's laboratory.\n*   Breaks free from its container and causes significant damage to the lab.\n*   Encounters Amber (the protagonist in Amber's body) and seems to recognize her.\n*   Attacked by an Arcanine with flames.\n*   Escapes through the ceiling after blasting a hole with psychic energy.\n*   Its form is described as raw, primal, and terrifyingly real, existing outside the natural order.\n*   Classified as the world’s strongest Pokémon in the games.",
      "narrative_role": "Mewtwo serves as a catalyst for the protagonist's awakening and the chaos that ensues in the laboratory, embodying the conflict between human ambition and the wild nature of Pokémon. Its escape sets the stage for future conflicts and plot developments.",
      "fa

### Update Entity DB

In [15]:
entity_db.upsert(upsert_entities.entities)
entity_db.size()

8

In [16]:
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "entity_db.json", "w") as f:
    f.write(entity_db.entities_db.model_dump_json(indent=2))

os.makedirs(STORY_DIR / "latest_out", exist_ok=True)
with open(STORY_DIR / "latest_out" / "entity_db.json", "w") as f:
    f.write(entity_db.entities_db.model_dump_json(indent=2))

FileNotFoundError: [Errno 2] No such file or directory: '../backend/tests/resources/pokemon_amber/latest_out/entity_db.json'

### Story So Far Summary

In [22]:

from shuscribe.schemas.wikigen.wiki import WikiPage


templates.story.comprehensive_wiki.reload()
comprehensive_wiki_messages: list[Message] = templates.story.comprehensive_wiki.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    chapter_summary=chapter_summary,
    key_entities=upsert_entities,
    
    story_metadata=STORY_METADATA,
    # summary_so_far=summary_so_far,
    # recent_summaries=recent_summaries_prompt,
)

# print(comprehensive_summary_messages[-1].content)
comprehensive_wiki_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json", "r") as f:
            comprehensive_wiki_response = StreamChunk.model_validate_json(f.read())

if not comprehensive_wiki_response:
    comprehensive_wiki_response = await run_async(stream(
        "", 
        "", 
        comprehensive_wiki_messages, 
        config=templates.story.comprehensive_wiki.default_config
        ))

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.json", "w") as f:
    f.write(comprehensive_wiki_response.model_dump_json(indent=2))

print(comprehensive_wiki_response.usage)
comprehensive_wiki = WikiPage.from_wiki_content("Comprehensive Wiki Page", comprehensive_wiki_response.accumulated_text)


claude-3-7-sonnet-20250219:
<ANTHROPIC_THINKING>Let me begin by understanding what I already have and what I need to integrate:

1. I have new information from the latest chapter (Chapter 1: Truck-kun Strikes Again), which details how the protagonist was playing a Pokemon game, then died while crossing the street to participate in a Pokemon GO raid, and woke up in Dr. Fuji's lab as his daughter Amber, witnessing the escape of Mewtwo.

2. I need to create a comprehensive wiki document that integrates this information seamlessly, as if the wiki was written from scratch.

3. The document needs to have proper sections and hierarchical markdown headings, using wiki-links to reference key entities.

Since this is the first chapter, I'm essentially creating the wiki from scratch. I'll need to organize the information into logical sections that will set up the structure for future updates.

For this wiki, I think the following main sections make sense:
- Overview (brief introduction to the sto

Database module not implemented. Skipping save.


prompt_tokens=9023 completion_tokens=1853


In [25]:
# display markdown
Markdown(comprehensive_wiki.content)




## Overview

**Pokemon: Ambertwo** is a story that follows the journey of a Pokemon fan who is reincarnated into the Pokemon world as Dr. Fuji's daughter, Amber. After being struck by a truck while rushing to a Pokemon GO raid, the protagonist awakens in a laboratory tank, discovering she now inhabits the body of what appears to be a successfully cloned version of Amber Fuji. As she navigates this new reality—a world that combines elements from various Pokemon media—she must come to terms with her new identity and the chaos surrounding her rebirth, including the escape of the legendary [[Mewtwo (Legendary Pokémon)]].

## The World

### Reality and Fiction

The story takes place in a version of the Pokemon world that appears to be a composite of various Pokemon media, including games, anime, and possibly other sources. Unlike the games, Pokemon in this world are physically real and potentially dangerous, as evidenced by [[Mewtwo (Legendary Pokémon)]]'s raw, primal presence and destructive capabilities.

### Scientific Advancements

The world features advanced genetic engineering capabilities, allowing for the cloning of both humans and Pokemon. This technology appears to be experimental and dangerous, as seen in [[Dr. Fuji (Scientist)]]'s laboratory. The existence of partially-formed Pokemon variants, such as the glitching Kadabra seen in the lab, suggests ongoing experimentation with Pokemon evolution and creation.

## Characters

### Main Characters

#### [[Amber (Dr. Fuji's Daughter)]]

The protagonist of the story, a Pokemon fan reincarnated into the body of Dr. Fuji's deceased daughter. After dying in her original world, she awakens in a tank filled with amber fluid in Dr. Fuji's laboratory. She retains her memories and awareness from her previous life while adapting to her new identity and child's body. Her sudden return to life brings joy to Dr. Fuji, who believes his daughter has been successfully revived.

#### [[Dr. Fuji (Scientist)]]

A middle-aged scientist specializing in genetic engineering and cloning. He is the creator of [[Mewtwo (Legendary Pokémon)]] and the father of [[Amber (Dr. Fuji's Daughter)]]. Upon seeing Amber alive, he is overcome with emotion, suggesting a deep connection to his daughter and possibly indicating that his scientific pursuits were motivated by a desire to bring her back. Despite being injured during Mewtwo's escape, his primary concern remains Amber's safety.

### Other Characters

#### [[Mewtwo (Legendary Pokémon)]]

A powerful Psychic-type Pokemon created through genetic engineering in [[Dr. Fuji (Scientist)]]'s laboratory. Unlike its depiction in games, this Mewtwo appears raw and primal, with unbridled power that hasn't fully settled. During its escape from the lab, it makes brief eye contact with [[Amber (Dr. Fuji's Daughter)]], suggesting a possible connection or recognition. It demonstrates tremendous destructive capabilities, blasting through the reinforced concrete ceiling after being attacked by an [[Arcanine (Security Pokémon)]].

#### [[Arcanine (Security Pokémon)]]

A security Pokemon at [[Dr. Fuji (Scientist)]]'s laboratory. It attempts to defend against [[Mewtwo (Legendary Pokémon)]]'s escape by attacking it with Flamethrower.

## Plot Summary

### Isekai Event

The protagonist, a dedicated Pokemon player, is in the middle of a challenging battle in the [[Pokemon Mansion (Pokemon Emulator Game)]] on an emulator when she receives a notification about a Shadow Mewtwo raid happening at her campus library in Pokemon GO. After saving her game state—her level 40 [[Gyarados (Protagonist's Pokémon)]] on the verge of fainting against a Vulpix—she rushes to join the raid. While crossing a street, she is struck by a truck and killed, her final thoughts centered on her Gyarados waiting for a command that would never come.

### Awakening in the Pokemon World

The protagonist regains consciousness in a container filled with amber fluid, experiencing sensations filtered through the thick substance. When the container shatters, she is caught by a middle-aged man later identified as [[Dr. Fuji (Scientist)]]. She quickly realizes she is in the body of a young girl named Amber, Dr. Fuji's daughter, who appears to have been successfully cloned.

### Mewtwo's Escape

As the protagonist adjusts to her new reality, she witnesses [[Mewtwo (Legendary Pokémon)]] wreaking havoc in the laboratory. The legendary Pokemon, appearing far more intimidating than its game counterpart, makes eye contact with her before being attacked by a security guard's [[Arcanine (Security Pokémon)]]. After being struck by a Flamethrower, Mewtwo blasts through the ceiling and escapes, causing significant damage to the laboratory.

### Fleeing the Destruction

[[Dr. Fuji (Scientist)]] prioritizes [[Amber (Dr. Fuji's Daughter)]]'s safety, shielding her from falling debris despite sustaining injuries himself. As they navigate through the damaged laboratory, they witness other genetic experiments, including a partially-formed Kadabra variant that briefly glitches into existence. The laboratory is revealed to be part of a larger mansion complex, with scientists and security personnel in various states of panic. As fires begin to spread through the facility, Dr. Fuji tells Amber that everything will be different now and that they can "fix it all"—their family and their life.

## Locations

### [[Dr. Fuji's Lab (Ambertwo)]]

A sophisticated research facility dedicated to genetic engineering and cloning experiments. The laboratory contains specialized equipment for these purposes, including tanks filled with amber fluid used in the cloning process. The lab is connected to a mansion with a refined interior, featuring wood paneling and ornate light fixtures. The complex houses multiple specialized research areas, including Enhancement Lab Three and an evolution acceleration chamber. The facility is severely damaged during [[Mewtwo (Legendary Pokémon)]]'s escape.

### [[Pokemon Mansion (Pokemon Emulator Game)]]

A location in the Pokemon game the protagonist was playing before her death. Described as a maze of broken tiles and forgotten experiments, it was the site of her final battle, where her [[Gyarados (Protagonist's Pokémon)]] was about to be defeated by a Vulpix.

