# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [2]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from IPython.display import Markdown

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

STORY_DIR = Path("../backend/tests/resources/pokemon_amber")

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [3]:
USE_CACHED_RESPONSES = False

In [4]:

# Streaming response
from typing import Type
from shuscribe.schemas.base import BaseOutputSchema
from shuscribe.schemas.llm import ThinkingConfig
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    model: str,
    messages: list[Message], 
    response_schema: Type[BaseOutputSchema] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None,
    thinking_config: ThinkingConfig | None = None,
    config: GenerationConfig | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        if not config:
            config = GenerationConfig(
                temperature=temp or 0.7,
                response_schema=response_schema if response_schema else None,
                max_output_tokens=max_tokens,
                thinking_config=thinking_config
            )
        
        print(f"{model if model else config.model}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=model,
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status in (StreamStatus.COMPLETE, StreamStatus.ERROR):
            return chunk
    else:
        return None


In [None]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open(STORY_DIR / "story" / "_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(STORY_DIR / "story" / chapter, "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue


# Summarization Pipeline

## Chapter [0]

In [6]:
# Set Provider Name
PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 0

# USE_CACHED_RESPONSES = False

### Chapter Summary

In [7]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()
summary_messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter.")
)

# print(messages[-1].content)

summary_response = None

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml", "r") as f:
            summary_response = StreamChunk.model_validate_json(f.read())

# if we don't have a cached response, generate one
if not summary_response:
    summary_response = await run_async(stream(
        "", 
        "", 
        summary_messages, 
        config=templates.chapter.summary.default_config
        ))

print(summary_response.usage)
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml", "w") as f:
    f.write(summary_response.model_dump_json(indent=2))

chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)


gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

## Isekai'd from Pokemon Go

*   The protagonist is playing a Pokemon game on their phone, struggling with a difficult battle in the Pokemon Mansion.
    *   Their Gyarados is on the verge of defeat against a Vulpix.
    *   The protagonist is worried about losing more Pokemon due to the game's perma-death rules.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid at the library gym.
*   The protagonist saves their game and rushes to the library for the raid.
*   While crossing the street, the protagonist is hit by a truck. [!ALLUSION]
    *   Their last thought is about their Gyarados in the game.

## Rebirth in a Tank

*   The protagonist awakens in a tank filled with amber fluid. [!WORLD]
    *   They feel disoriented and struggle to breathe.
    *   They hear strange sounds and feel vibrations.
*   The tank shatters, and the protagonist is caught by a middle-aged man, who is revealed to be Dr. Fuji. [!CHARACTER]
    * 

Database module not implemented. Skipping save.


prompt_tokens=3791 completion_tokens=591


### Extract Entities

In [8]:
print(chapter_summary.to_prompt())

<Content>


## Isekai'd from Pokemon Go

*   The protagonist is playing a Pokemon game on their phone, struggling with a difficult battle in the Pokemon Mansion.
    *   Their Gyarados is on the verge of defeat against a Vulpix.
    *   The protagonist is worried about losing more Pokemon due to the game's perma-death rules.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid at the library gym.
*   The protagonist saves their game and rushes to the library for the raid.
*   While crossing the street, the protagonist is hit by a truck. [!ALLUSION]
    *   Their last thought is about their Gyarados in the game.

## Rebirth in a Tank

*   The protagonist awakens in a tank filled with amber fluid. [!WORLD]
    *   They feel disoriented and struggle to breathe.
    *   They hear strange sounds and feel vibrations.
*   The tank shatters, and the protagonist is caught by a middle-aged man, who is revealed to be Dr. Fuji. [!CHARACTER]
    *   Dr. Fuji is overjoyed and ca

In [9]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
extract_messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
)

# print(extract_messages[-1].content)
extract_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml", "r") as f:
            extract_response = StreamChunk.model_validate_json(f.read())
            
# if we don't have a cached response, generate one
if not extract_response:
    extract_response = await run_async(stream(
        "", 
        "", 
        extract_messages, 
        config=templates.entity.extract.default_config
        ))
    
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml", "w") as f:
    f.write(extract_response.model_dump_json(indent=2))

extracted_entities = ExtractEntitiesOutSchema.model_validate_json(extract_response.accumulated_text)
print(extract_response.usage)


gpt-4o-mini:
{"entities":[{"description":"A powerful Psychic-type Pokémon created through genetic engineering, known for its intelligence and abilities. In this chapter, it is depicted as raw and primal, wreaking havoc in Dr. Fuji's lab.","narrative_role":"Antagonist and a significant force of chaos in the laboratory, representing the consequences of Dr. Fuji's experiments.","significance_level":"Central","entity_type":"Character","identifier":"Mewtwo","aliases":["Shadow Mewtwo"] ,"related_entities":[]},{"description":"The protagonist, who has been reincarnated into the body of Dr. Fuji's deceased daughter, Amber. She retains her memories and experiences from her previous life but now navigates a new reality as a child.","narrative_role":"Protagonist of the story, experiencing the challenges of her new life and the implications of her reincarnation.","significance_level":"Central","entity_type":"Character","identifier":"Amber (Dr. Fuji's daughter)","aliases":["Protagonist","Reincarnate

Database module not implemented. Skipping save.


prompt_tokens=4623 completion_tokens=844


### Upsert Entities and Relationships

In [10]:
from shuscribe.schemas.wikigen.entity import EntitySigLvl

ent_list = extracted_entities.filter_entities(EntitySigLvl.RELEVANT)
print(len(ent_list))
for entity in ent_list:
    print(entity.identifier)


6
Mewtwo
Amber (Dr. Fuji's daughter)
Dr. Fuji
Dr. Fuji's Lab
Truck-kun Incident
Chaotic Laboratory Environment


In [11]:
# for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT):
#     for entity in batch:
#         print(entity)


In [16]:
from shuscribe.schemas.provider import LLMUsage
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.entity import EntitySigLvl

upsert_entities = UpsertEntitiesOutSchema(entities=[])

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml", "r") as f:
            upsert_entities = UpsertEntitiesOutSchema.model_validate_json(f.read())

if len(upsert_entities.entities) == 0:
    upsert_entities = UpsertEntitiesOutSchema(entities=[])
    total_usage = LLMUsage(prompt_tokens=0, completion_tokens=0)
    templates.entity.upsert.reload()        

    for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT, chunk_size=5):
        upsert_messages: list[Message] = templates.entity.upsert.format( 
            current_chapter=CHAPTERS[CHAPTER_INDEX],
            entity_batch=batch,
            story_metadata=STORY_METADATA,
            chapter_summary=chapter_summary,
        )

        upsert_response = await run_async(stream(
            "", 
            "", 
            upsert_messages, 
            config=templates.entity.upsert.default_config
            ))
        upsert_entities.entities.extend(UpsertEntitiesOutSchema.model_validate_json(upsert_response.accumulated_text).entities)
        total_usage.prompt_tokens += upsert_response.usage.prompt_tokens
        total_usage.completion_tokens += upsert_response.usage.completion_tokens
        print(upsert_response.usage)

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml", "w") as f:
    f.write(upsert_entities.model_dump_json(indent=2))
    
print(total_usage)

gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Mewtwo",
      "detailed_description": "*   A powerful Psychic-type Pokémon created through genetic engineering.\n*   Known for its immense psychic abilities and intelligence.\n*   Depicted as raw, primal, and terrifying in its initial appearance after being released from its tank.\n*   Wreaks havoc in Dr. Fuji's lab upon awakening.\n*   Classified as the world's strongest Pokémon in the games.\n*   Exhibits a moment of possible recognition or memory when making eye contact with the protagonist.\n*   Escapes the lab by blasting through the ceiling.",
      "narrative_role": "Antagonist and a significant force of chaos in the laboratory, representing the consequences of Dr. Fuji's experiments. Its escape sets the stage for future conflicts.",
      "facts": [
        {
          "fact": "Possesses a psychic shield that can materialize like heat shimmer.",
          "type": "Explicit"
       

Database module not implemented. Skipping save.


prompt_tokens=5437 completion_tokens=1873
gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Chaotic Laboratory Environment (Pokemon: Ambertwo)",
      "detailed_description": "*   A laboratory setting characterized by alarms, fires, broken equipment, and structural damage.\n*   It is the immediate environment where Amber awakens after being reborn.\n*   The environment is a direct result of Mewtwo's escape and rampage.\n*   The lab contains broken tanks filled with amber fluid.\n*   The environment heightens the tension and stakes of the narrative, emphasizing the danger faced by the characters.\n*   The lab transitions into a mansion-like interior, indicating a hidden or integrated facility within a larger estate.",
      "narrative_role": "Setting element that heightens the tension and stakes of the narrative, emphasizing the danger faced by the characters. It also serves as a backdrop for Amber's rebirth and escape.",
      "facts": [
 

Database module not implemented. Skipping save.


prompt_tokens=5024 completion_tokens=497
prompt_tokens=10461 completion_tokens=2370


In [13]:
# print(upsert_entities.model_dump_json(indent=2))

### Story So Far Summary

In [19]:

from shuscribe.schemas.wikigen.story import WikiPage


templates.story.comprehensive_wiki.reload()
comprehensive_wiki_messages: list[Message] = templates.story.comprehensive_wiki.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    chapter_summary=chapter_summary,
    key_entities=upsert_entities,
    
    story_metadata=STORY_METADATA,
    # summary_so_far=summary_so_far,
    # recent_summaries=recent_summaries_prompt,
)

# print(comprehensive_summary_messages[-1].content)
comprehensive_wiki_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml", "r") as f:
            comprehensive_wiki_response = StreamChunk.model_validate_json(f.read())

if not comprehensive_wiki_response:
    comprehensive_wiki_response = await run_async(stream(
        "", 
        "", 
        comprehensive_wiki_messages, 
        config=templates.story.comprehensive_wiki.default_config
        ))

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_wiki.yaml", "w") as f:
    f.write(comprehensive_wiki_response.model_dump_json(indent=2))

print(comprehensive_wiki_response.usage)
comprehensive_wiki = WikiPage.from_wiki_content("Comprehensive Wiki Page", comprehensive_wiki_response.accumulated_text)


claude-3-7-sonnet-20250219:
<ANTHROPIC_THINKING>I need to update a comprehensive wiki document for Pokemon: Ambertwo, incorporating the new content from Chapter 1. This is an isekai story about a Pokemon fan who has been reincarnated into the Pokemon world as Dr. Fuji's daughter, Amber.

Let me analyze the provided information and plan how to structure the wiki:

1. Key plot elements from Chapter 1:
   - Protagonist dies in the real world playing Pokemon Go
   - Reincarnated as Amber, Dr. Fuji's daughter (clone)
   - Witnesses Mewtwo's escape from the lab
   - Dr. Fuji is caring for Amber
   - Lab is being destroyed as they escape

2. Key entities to include:
   - Amber (Reincarnated) - the protagonist
   - Dr. Fuji - scientist, father figure
   - Mewtwo - powerful Pokemon that escapes
   - Dr. Fuji's Lab - setting
   - Truck-kun Incident - event causing reincarnation
   - Chaotic Laboratory Environment - setting element

Since this seems to be the first chapter, I'll need to create a 

Database module not implemented. Skipping save.


prompt_tokens=7338 completion_tokens=1642


In [20]:
# display markdown
Markdown(comprehensive_wiki.content)




# Pokemon: Ambertwo

## Introduction

Pokemon: Ambertwo is a story following a Pokemon fan who is unexpectedly reincarnated into the Pokemon world as [[Amber (Reincarnated)]], the daughter of scientist [[Dr. Fuji]]. After a fatal accident in the real world, the protagonist awakens in the body of Amber clone within [[Dr. Fuji's Lab]], just as the genetically engineered [[Mewtwo]] makes its dramatic escape. The narrative explores the protagonist's journey as she navigates her new existence in this familiar yet dangerous world, blending elements from various Pokemon media.

## Characters

### Main Characters

#### [[Amber (Reincarnated)]]

The protagonist of the story, formerly a dedicated Pokemon Go player from the real world who died in the [[Truck-kun Incident]]. She awakens in the body of Dr. Fuji's deceased daughter, Amber, retaining all memories from her previous life. Initially disoriented by her new smaller body and higher-pitched voice, she quickly grasps the dangerous situation around her. Despite her confusion, she exhibits concern for Dr. Fuji's well-being during their escape from the lab.

#### [[Dr. Fuji]]

A middle-aged scientist responsible for creating Mewtwo and apparently successfully cloning his deceased daughter, Amber. He shows immense emotional attachment to Amber, wrapping her in his lab coat and protecting her from falling debris despite sustaining injuries himself. Dr. Fuji expresses that "everything will be different now" and they can "fix it all—our family, our life," suggesting deep personal motivations behind his scientific work.

### Other Characters

#### [[Mewtwo]]

A powerful Psychic-type Pokémon created through genetic engineering in [[Dr. Fuji's Lab]]. Its appearance is described as raw, primal, and terrifyingly real, with violet energy trails hanging in the air behind its movements. Upon awakening, Mewtwo makes brief eye contact with Amber, possibly showing some form of recognition before being attacked by a security guard's Arcanine. Despite its immense power, Mewtwo chooses to escape through the ceiling rather than engage in prolonged combat.

## Settings

### [[Dr. Fuji's Lab]]

A sophisticated underground laboratory dedicated to genetic engineering and cloning experiments. The lab contains multiple tanks filled with amber fluid (one of which housed Amber), advanced equipment including evolution acceleration chambers and containment fields, and appears to be the birthplace of Mewtwo. The lab is located beneath what appears to be a wealthy estate or mansion, with clinical white walls and exposed machinery transitioning to wood paneling and ornate light fixtures in other areas.

### [[Chaotic Laboratory Environment (Pokemon: Ambertwo)]]

Following Mewtwo's escape, the laboratory devolves into chaos with emergency lights cutting through steam clouds, sparks raining from destroyed equipment, and alarms wailing in competing frequencies. The destruction includes shattered glass tanks, broken concrete, and spreading fires. During the chaos, a partially-formed Kadabra variant is seen glitching in and out of existence, leaving trails of psychic energy in the air, suggesting other experiments were underway in the facility.

## Plot

### Origin and Reincarnation

The story begins with the protagonist playing a difficult Pokemon game on their phone, specifically navigating the Pokemon Mansion with a team that includes a Gyarados, Charizard, Kadabra, Dugtrio, Nidoking, and a newly caught Growlithe. When the protagonist receives notifications about a Shadow Mewtwo raid at a nearby location, they decide to pause their game and rush to participate. While crossing a street, the protagonist is struck by a truck in the [[Truck-kun Incident]], with their final thoughts being about their Gyarados left waiting in the paused game.

### Awakening and Escape

Consciousness returns as the protagonist finds themselves floating in an amber-filled tank in [[Dr. Fuji's Lab]]. When the container shatters, they are caught by Dr. Fuji, who joyfully proclaims "Amber, you're alive." The protagonist quickly realizes they are now inhabiting the body of Dr. Fuji's deceased daughter, while simultaneously witnessing the awakening and escape of [[Mewtwo]].

As Mewtwo wreaks havoc throughout the facility, Dr. Fuji carries Amber through the chaotic laboratory, protecting her from falling debris at the cost of his own safety. They navigate through the damaged lab, passing panicked scientists, security guards, and various Pokemon. Throughout their escape, they witness the true extent of Dr. Fuji's experiments, including glimpses of other tanks and a partially-formed Kadabra variant glitching in and out of existence.

The chapter concludes with Dr. Fuji and Amber making their way through the burning facility, with Dr. Fuji expressing that everything will be different now and that they can fix their family and life, hinting at his deeper motivations and setting the stage for their future together in this new reality.

## Key Events

### [[Truck-kun Incident]]

The catalyst event where the protagonist, while rushing to a Shadow Mewtwo raid in Pokemon Go, is struck by a truck and killed in the real world. This accident leads to their consciousness being transferred into the body of Amber in the Pokemon world, setting the primary narrative in motion.

### Mewtwo's Escape

A pivotal moment where Mewtwo awakens and breaks free from containment, creating massive destruction throughout [[Dr. Fuji's Lab]]. During its escape, Mewtwo makes brief eye contact with Amber, suggesting a possible connection or recognition, before blasting through the ceiling and disappearing into the night after a brief confrontation with a security guard's Arcanine.

