# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [2]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from IPython.display import Markdown

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

STORY_DIR = Path("../backend/tests/resources/pokemon_amber")

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [3]:
USE_CACHED_RESPONSES = False

In [4]:

# Streaming response
from typing import Type
from shuscribe.schemas.base import BaseOutputSchema
from shuscribe.schemas.llm import ThinkingConfig
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    model: str,
    messages: list[Message], 
    response_schema: Type[BaseOutputSchema] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None,
    thinking_config: ThinkingConfig | None = None,
    config: GenerationConfig | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        if not config:
            config = GenerationConfig(
                temperature=temp or 0.7,
                response_schema=response_schema if response_schema else None,
                max_output_tokens=max_tokens,
                thinking_config=thinking_config
            )
        
        print(f"{model if model else config.model}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=model,
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status in (StreamStatus.COMPLETE, StreamStatus.ERROR):
            return chunk
    else:
        return None


In [5]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open(STORY_DIR / "_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(STORY_DIR / chapter, "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue


# Summarization Pipeline

## Chapter [0]

In [6]:
# Set Provider Name
PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 0

# USE_CACHED_RESPONSES = False

### Chapter Summary

In [7]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()
summary_messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter.")
)

# print(messages[-1].content)

summary_response = None

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml", "r") as f:
            summary_response = StreamChunk.model_validate_json(f.read())

# if we don't have a cached response, generate one
if not summary_response:
    summary_response = await run_async(stream(
        "", 
        "", 
        summary_messages, 
        config=templates.chapter.summary.default_config
        ))

print(summary_response.usage)
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml", "w") as f:
    f.write(summary_response.model_dump_json(indent=2))

chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)


gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

## Isekai'd from Pokemon Go

*   The protagonist is playing a Pokemon game on their phone, struggling with a difficult battle in the Pokemon Mansion.
    *   Their Gyarados is on the verge of defeat against a Vulpix.
    *   The protagonist is worried about losing more Pokemon due to the game's perma-death rules.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid at the library gym.
*   The protagonist saves their game and rushes to the library for the raid.
*   While crossing the street, the protagonist is hit by a truck. [!ALLUSION]
    *   Their last thought is about their Gyarados in the game.

## Rebirth in a Tank

*   The protagonist awakens in a tank filled with amber fluid. [!WORLD]
    *   They feel disoriented and struggle to breathe.
    *   They hear strange sounds and feel vibrations.
*   The tank shatters, and the protagonist is caught by a middle-aged man, who is revealed to be Dr. Fuji. [!CHARACTER]
    * 

Database module not implemented. Skipping save.


prompt_tokens=3791 completion_tokens=591


### Extract Entities

In [8]:
print(chapter_summary.to_prompt())

<Content>


## Isekai'd from Pokemon Go

*   The protagonist is playing a Pokemon game on their phone, struggling with a difficult battle in the Pokemon Mansion.
    *   Their Gyarados is on the verge of defeat against a Vulpix.
    *   The protagonist is worried about losing more Pokemon due to the game's perma-death rules.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid at the library gym.
*   The protagonist saves their game and rushes to the library for the raid.
*   While crossing the street, the protagonist is hit by a truck. [!ALLUSION]
    *   Their last thought is about their Gyarados in the game.

## Rebirth in a Tank

*   The protagonist awakens in a tank filled with amber fluid. [!WORLD]
    *   They feel disoriented and struggle to breathe.
    *   They hear strange sounds and feel vibrations.
*   The tank shatters, and the protagonist is caught by a middle-aged man, who is revealed to be Dr. Fuji. [!CHARACTER]
    *   Dr. Fuji is overjoyed and ca

In [9]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
extract_messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
)

# print(extract_messages[-1].content)
extract_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml", "r") as f:
            extract_response = StreamChunk.model_validate_json(f.read())
            
# if we don't have a cached response, generate one
if not extract_response:
    extract_response = await run_async(stream(
        "", 
        "", 
        extract_messages, 
        config=templates.entity.extract.default_config
        ))
    
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml", "w") as f:
    f.write(extract_response.model_dump_json(indent=2))

extracted_entities = ExtractEntitiesOutSchema.model_validate_json(extract_response.accumulated_text)
print(extract_response.usage)


gpt-4o-mini:
{"entities":[{"description":"A powerful Psychic-type Pokémon created through genetic engineering, known for its intelligence and abilities. In this chapter, it is depicted as raw and primal, wreaking havoc in Dr. Fuji's lab.","narrative_role":"Antagonist and a significant force of chaos in the laboratory, representing the consequences of Dr. Fuji's experiments.","significance_level":"Central","entity_type":"Character","identifier":"Mewtwo","aliases":["Shadow Mewtwo"] ,"related_entities":[]},{"description":"The protagonist, who has been reincarnated into the body of Dr. Fuji's deceased daughter, Amber. She retains her memories and experiences from her previous life but now navigates a new reality as a child.","narrative_role":"Protagonist of the story, experiencing the challenges of her new life and the implications of her reincarnation.","significance_level":"Central","entity_type":"Character","identifier":"Amber (Dr. Fuji's daughter)","aliases":["Protagonist","Reincarnate

Database module not implemented. Skipping save.


prompt_tokens=4623 completion_tokens=844


### Upsert Entities and Relationships

In [10]:
from shuscribe.schemas.wikigen.entity import EntitySigLvl

ent_list = extracted_entities.filter_entities(EntitySigLvl.RELEVANT)
print(len(ent_list))
for entity in ent_list:
    print(entity.identifier)


6
Mewtwo
Amber (Dr. Fuji's daughter)
Dr. Fuji
Dr. Fuji's Lab
Truck-kun Incident
Chaotic Laboratory Environment


In [11]:
# for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT):
#     for entity in batch:
#         print(entity)


In [12]:
from shuscribe.schemas.provider import LLMUsage
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.entity import EntitySigLvl

upsert_entities = UpsertEntitiesOutSchema(entities=[])

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml", "r") as f:
            upsert_entities = UpsertEntitiesOutSchema.model_validate_json(f.read())

if len(upsert_entities.entities) == 0:
    upsert_entities = UpsertEntitiesOutSchema(entities=[])
    total_usage = LLMUsage(prompt_tokens=0, completion_tokens=0)
    templates.entity.upsert.reload()        

    for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT, chunk_size=5):
        upsert_messages: list[Message] = templates.entity.upsert.format( 
            current_chapter=CHAPTERS[CHAPTER_INDEX],
            entity_batch=batch,
            story_metadata=STORY_METADATA,
            chapter_summary=chapter_summary,
        )

        upsert_response = await run_async(stream(
            "", 
            "", 
            upsert_messages, 
            config=templates.entity.upsert.default_config
            ))
        upsert_entities.entities.extend(UpsertEntitiesOutSchema.model_validate_json(upsert_response.accumulated_text).entities)
        total_usage.prompt_tokens += upsert_response.usage.prompt_tokens
        total_usage.completion_tokens += upsert_response.usage.completion_tokens
        print(upsert_response.usage)

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml", "w") as f:
    f.write(upsert_entities.model_dump_json(indent=2))
    
print(total_usage)

gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Mewtwo",
      "detailed_description": "*   A powerful Psychic-type Pokémon created through genetic engineering.\n*   Known for its intelligence and psychic abilities.\n*   Depicted as raw, primal, and terrifying in this chapter.\n*   Wreaks havoc in Dr. Fuji's lab after escaping its tank.\n*   Attacked by an Arcanine with flames.\n*   Briefly makes eye contact with the protagonist (Amber).\n*   Blasts through the ceiling to escape the lab.\n*   Its form is described as rippling with power that hasn't quite settled.\n*   Classified as the world’s strongest Pokémon in the games.",
      "narrative_role": "Antagonist and a significant force of chaos in the laboratory, representing the consequences of Dr. Fuji's experiments. Its escape and rampage drive the immediate conflict and danger.",
      "facts": [
        {
          "fact": "Mewtwo's appearance is raw and primal, rippling with unstab

Database module not implemented. Skipping save.


prompt_tokens=5374 completion_tokens=2090
gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Chaotic Laboratory Environment (Pokemon: Ambertwo)",
      "detailed_description": "*   The laboratory is in a state of chaos due to Mewtwo's rampage and the resulting damage.\n*   Alarms are blaring, and fires are spreading throughout the facility.\n*   Emergency lighting casts harsh red pulses, creating a tense and dangerous atmosphere.\n*   Equipment is destroyed, and debris litters the floor.\n*   The environment emphasizes the urgency of Dr. Fuji and Amber's escape and the high stakes of the situation.\n*   A partially-formed Kadabra variant glitches in and out of existence within the lab, adding to the surreal and chaotic nature of the scene.",
      "narrative_role": "Setting element that heightens the tension and stakes of the narrative, emphasizing the danger faced by the characters. It also showcases the destructive power of Mewtwo and the

Database module not implemented. Skipping save.


prompt_tokens=4961 completion_tokens=539
prompt_tokens=10335 completion_tokens=2629


In [13]:
# print(upsert_entities.model_dump_json(indent=2))

### Story So Far Summary

In [14]:

from shuscribe.schemas.wikigen.story import WikiPage


templates.story.comprehensive_wiki.reload()
comprehensive_wiki_messages: list[Message] = templates.story.comprehensive_wiki.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    chapter_summary=chapter_summary,
    key_entities=upsert_entities,
    
    story_metadata=STORY_METADATA,
    # summary_so_far=summary_so_far,
    # recent_summaries=recent_summaries_prompt,
)

# print(comprehensive_summary_messages[-1].content)
comprehensive_wiki_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml", "r") as f:
            comprehensive_wiki_response = StreamChunk.model_validate_json(f.read())

if not comprehensive_wiki_response:
    comprehensive_wiki_response = await run_async(stream(
        "", 
        "", 
        comprehensive_wiki_messages, 
        config=templates.story.comprehensive_wiki.default_config
        ))

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml", "w") as f:
    f.write(comprehensive_wiki_response.model_dump_json(indent=2))

print(comprehensive_wiki_response.usage)
comprehensive_wiki = WikiPage.from_wiki_content("Comprehensive Wiki Page", comprehensive_wiki_response.accumulated_text)


claude-3-7-sonnet-20250219:
<ANTHROPIC_THINKING>I need to create a cohesive wiki document for the Pokemon: Ambertwo story, integrating the new information from Chapter 1 with any existing information. Since this appears to be the first chapter, I'll be creating the wiki document essentially from scratch.

From the new information, I understand that:

1. The protagonist was playing Pokemon games in the real world and got hit by a truck while rushing to a Shadow Mewtwo raid in Pokemon Go.
2. They've been reincarnated into the Pokemon world as Amber, the daughter of Dr. Fuji.
3. They've awakened in a laboratory where Mewtwo has just been created and is causing havoc.
4. Dr. Fuji is overjoyed to see "Amber" alive, indicating the real Amber may have died previously.

I'll structure the wiki with main sections for:
1. Overview/Setting
2. Main Characters
3. Plot Summary/Narrative
4. Key Locations
5. Significant Events

Let me craft this wiki document now, making sure to use the proper wiki-li

Database module not implemented. Skipping save.


prompt_tokens=7640 completion_tokens=1421


In [15]:
# display markdown
Markdown(comprehensive_wiki.content)




# Pokemon: Ambertwo

## Overview

Pokemon: Ambertwo follows the journey of a Pokemon fan who finds themselves reincarnated in the Pokemon world as [[Amber (Dr. Fuji's daughter)]]. After being struck by a truck while rushing to a Shadow Mewtwo raid in Pokemon Go, the protagonist awakens in [[Dr. Fuji's Lab]] to discover they've been reincarnated as the daughter of the renowned scientist. Their arrival coincides with the chaotic escape of [[Mewtwo]], setting in motion a series of events that will reshape their understanding of this new world.

## Main Characters

### Amber

[[Amber (Dr. Fuji's daughter)]] is the protagonist of the story, a former Pokemon fan who has been reincarnated into the body of Dr. Fuji's deceased daughter. Retaining memories from her previous life, Amber must navigate her new existence in a child's body while coming to terms with the implications of her reincarnation. Her first moments in this new world are marked by confusion and disorientation as she witnesses the chaotic escape of Mewtwo from the laboratory.

### Dr. Fuji

[[Dr. Fuji]] is a middle-aged scientist who has successfully cloned his deceased daughter, Amber. Overwhelmed with joy at seeing her alive, he immediately acts to protect her during Mewtwo's destructive escape from the laboratory. Despite suffering injuries from falling debris, his primary concern remains Amber's safety. His comments suggest a troubled past and a desire to rebuild his family, indicating deeper motivations behind his scientific work.

### Mewtwo

[[Mewtwo]] is a powerful Psychic-type Pokemon created through genetic engineering in Dr. Fuji's laboratory. Its awakening and subsequent escape serve as the catalyst for the initial chaos in the story. Described as raw, primal, and terrifyingly real, this Mewtwo transcends the pixelated versions familiar to the protagonist from games. Its brief moment of eye contact with Amber hints at a possible connection or recognition between them.

## Setting

### Dr. Fuji's Laboratory

[[Dr. Fuji's Lab]] is an advanced scientific facility dedicated to genetic experimentation, located beneath a mansion-like estate. The laboratory contains multiple tanks filled with amber fluid, including the one where Amber awakens. The facility houses various specialized areas, including an Enhancement Lab Three and an evolution acceleration chamber. Following Mewtwo's escape, the laboratory descends into [[Chaotic Laboratory Environment (Pokemon: Ambertwo)]], with blaring alarms, spreading fires, and damaged equipment.

### The Pokemon World

The story is set in a version of the Pokemon world that appears to combine elements from various Pokemon media. This world is notably different from the games the protagonist played in their previous life, as Pokemon are real, dangerous beings rather than digital entities confined to screens. The full extent and nature of this world remain to be explored as Amber begins her journey.

## Plot

### Reincarnation

The story begins in the real world with the protagonist playing a challenging Pokemon game on their phone. Upon receiving notification about a Shadow Mewtwo raid nearby, they save their game and rush toward the library. This haste leads to the [[Truck-kun Incident]], where they are struck by a truck while crossing the street, with their final thoughts being about their Gyarados in the game.

### Awakening

The protagonist regains consciousness in a tank filled with amber fluid within Dr. Fuji's laboratory. Disoriented and confused, they soon realize they've been reincarnated in the body of Dr. Fuji's daughter, Amber. Their awakening coincides with Mewtwo's violent escape from containment, creating immediate peril as the legendary Pokemon wreaks havoc throughout the facility.

### Escape

As the laboratory descends into chaos, Dr. Fuji carries Amber through the destruction, shielding her from falling debris despite sustaining injuries himself. They navigate through the facility's corridors, encountering panicked scientists and security personnel attempting to manage the crisis. Dr. Fuji's determination to protect Amber remains unwavering as they flee the spreading fires and destruction, with him expressing that "everything will be different now" and that they can "fix their family and life."

## Significant Events

### Mewtwo's Escape

Mewtwo's violent awakening and escape from the laboratory marks the first major event of the story. Its psychic powers cause extensive damage to the facility, shattering glass, destroying equipment, and eventually blasting through the ceiling to freedom. The brief moment of eye contact between Mewtwo and Amber suggests a possible connection that may be explored in future developments.

### Laboratory Destruction

The aftermath of Mewtwo's escape leaves the laboratory in ruins, with structural damage, fires, and failing systems. The chaotic environment is further unsettled by the appearance of a partially-formed Kadabra variant that glitches in and out of existence, hinting at the unstable nature of the genetic experiments conducted in the facility. The destruction forces Amber and Dr. Fuji to flee, setting them on their journey beyond the laboratory.

