# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [2]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from IPython.display import Markdown

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

STORY_DIR = Path("../backend/tests/resources/pokemon_amber")

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [3]:
USE_CACHED_RESPONSES = False

In [4]:

# Streaming response
from typing import Type
from shuscribe.schemas.base import BaseOutputSchema
from shuscribe.schemas.llm import ThinkingConfig
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    model: str,
    messages: list[Message], 
    response_schema: Type[BaseOutputSchema] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None,
    thinking_config: ThinkingConfig | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        config = GenerationConfig(
            temperature=temp or 0.7,
            response_schema=response_schema if response_schema else None,
            max_output_tokens=max_tokens,
            thinking_config=thinking_config
        )
        
        print(f"{model}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=model,
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status in (StreamStatus.COMPLETE, StreamStatus.ERROR):
            return chunk
    else:
        return None


In [5]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open(STORY_DIR / "_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(STORY_DIR / chapter, "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue


# Summarization Pipeline

## Chapter [0]

In [6]:
# Set Provider Name
PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 0

# USE_CACHED_RESPONSES = False

### Chapter Summary

In [7]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()
summary_messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter.")
)

# print(messages[-1].content)

summary_response = None

if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml", "r") as f:
            summary_response = StreamChunk.model_validate_json(f.read())

# if we don't have a cached response, generate one
if not summary_response:
    summary_response = await run_async(stream(PROVIDER_NAME, TEST_MODELS[PROVIDER_NAME], summary_messages, temp=0.4))

print(summary_response.usage)
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "chapter_summary.yaml", "w") as f:
    f.write(summary_response.model_dump_json(indent=2))

chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)


gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

## The Gamer's Last Stand

*   The protagonist, a dedicated Pokémon player, is on the verge of losing a crucial battle in Pokémon Mansion on an emulator.
    *   Her Gyarados is near defeat against a Vulpix.
    *   She contemplates switching to other Pokémon but acknowledges their weaknesses.
*   A Discord notification alerts her to a Shadow Mewtwo raid at the library gym.
    *   The event is time-sensitive, with only a 10-minute window.
*   She saves her game state and rushes to the library, disregarding traffic signals.
*   While crossing the street, she is struck by a vehicle.
    *   Her last thought is about her Gyarados.

## Rebirth

*   The protagonist regains consciousness in an amber fluid-filled container.
    *   She experiences distorted sensations and muffled sounds.
    *   The container shatters, and she is caught by a middle-aged man.
*   The man, Dr. Fuji, expresses relief and calls her "Amber." [!CHARACTER]
    *   He wraps 

Database module not implemented. Skipping save.


prompt_tokens=3791 completion_tokens=599


### Extract Entities

In [8]:
print(chapter_summary.to_prompt())

<Content>


## The Gamer's Last Stand

*   The protagonist, a dedicated Pokémon player, is on the verge of losing a crucial battle in Pokémon Mansion on an emulator.
    *   Her Gyarados is near defeat against a Vulpix.
    *   She contemplates switching to other Pokémon but acknowledges their weaknesses.
*   A Discord notification alerts her to a Shadow Mewtwo raid at the library gym.
    *   The event is time-sensitive, with only a 10-minute window.
*   She saves her game state and rushes to the library, disregarding traffic signals.
*   While crossing the street, she is struck by a vehicle.
    *   Her last thought is about her Gyarados.

## Rebirth

*   The protagonist regains consciousness in an amber fluid-filled container.
    *   She experiences distorted sensations and muffled sounds.
    *   The container shatters, and she is caught by a middle-aged man.
*   The man, Dr. Fuji, expresses relief and calls her "Amber." [!CHARACTER]
    *   He wraps her in his lab coat.
*   The p

In [10]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
upsert_messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
)

# print(extract_messages[-1].content)
extract_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml", "r") as f:
            extract_response = StreamChunk.model_validate_json(f.read())
            
# if we don't have a cached response, generate one
if not extract_response:
    extract_response = await run_async(stream(
        PROVIDER_NAME, 
        TEST_THINKING_MODELS[PROVIDER_NAME], 
        upsert_messages, 
        ExtractEntitiesOutSchema, 
        temp=0.4,
        thinking_config=ThinkingConfig(enabled=True)
        ))
    
os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "extract_entities.yaml", "w") as f:
    f.write(extract_response.model_dump_json(indent=2))

extracted_entities = ExtractEntitiesOutSchema.model_validate_json(extract_response.accumulated_text)
print(extract_response.usage)


gemini-2.0-flash-thinking-exp:
```json
{
  "entities": [
    {
      "identifier": "Protagonist (Amber)",
      "aliases": [
        "I",
        "AlexaTheGreat",
        "Amber"
      ],
      "entity_type": "Character",
      "significance_level": "Central",
      "narrative_role": "Reincarnated Pokemon Gamer in a child's body, experiencing the Pokemon world firsthand, specifically in the role of Dr. Fuji's cloned daughter. Initially a Pokemon trainer in the real world, now thrust into a dangerous and unfamiliar situation.",
      "description": "A female Pokemon gamer who is reincarnated into the body of Amber, Dr. Fuji's deceased daughter, in the Pokemon world. She retains her memories and knowledge of the Pokemon games, but is now physically a child in a perilous situation.",
      "related_entities": [
        "Gyarados",
        "Charizard",
        "Dr. Fuji",
        "Mewtwo",
        "Amber (daughter)"
      ]
    },
    {
      "identifier": "Dr. Fuji",
      "aliases": [
  

Database module not implemented. Skipping save.


prompt_tokens=4914 completion_tokens=2482


In [11]:
extract_response

StreamChunk(status=<StreamStatus.COMPLETE: 'complete'>, session_id='bfcddf38-9992-4620-a5d4-0bf24e35841e', text='', accumulated_text='```json\n{\n  "entities": [\n    {\n      "identifier": "Protagonist (Amber)",\n      "aliases": [\n        "I",\n        "AlexaTheGreat",\n        "Amber"\n      ],\n      "entity_type": "Character",\n      "significance_level": "Central",\n      "narrative_role": "Reincarnated Pokemon Gamer in a child\'s body, experiencing the Pokemon world firsthand, specifically in the role of Dr. Fuji\'s cloned daughter. Initially a Pokemon trainer in the real world, now thrust into a dangerous and unfamiliar situation.",\n      "description": "A female Pokemon gamer who is reincarnated into the body of Amber, Dr. Fuji\'s deceased daughter, in the Pokemon world. She retains her memories and knowledge of the Pokemon games, but is now physically a child in a perilous situation.",\n      "related_entities": [\n        "Gyarados",\n        "Charizard",\n        "Dr. Fuj

### Upsert Entities and Relationships

In [13]:
from shuscribe.schemas.provider import LLMUsage
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.entity import EntitySigLvl


upsert_entities = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml", "r") as f:
            upsert_entities = UpsertEntitiesOutSchema.model_validate_json(f.read())

if not upsert_entities:
    upsert_entities = UpsertEntitiesOutSchema(entities=[])
    total_usage = LLMUsage(prompt_tokens=0, completion_tokens=0)
    templates.entity.upsert.reload()        

    for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT):
        upsert_messages: list[Message] = templates.entity.upsert.format( 
            current_chapter=CHAPTERS[CHAPTER_INDEX],
            entity_batch=batch,
            story_metadata=STORY_METADATA,
            chapter_summary=chapter_summary,
        )
        templates.entity.upsert.response_schema

        upsert_response = await run_async(stream(
            PROVIDER_NAME, 
            TEST_MODELS[PROVIDER_NAME], 
            upsert_messages, 
            templates.entity.upsert.response_schema, 
            temp=0.4,
            thinking_config=ThinkingConfig(enabled=True)
            ))
        upsert_entities.entities.extend(UpsertEntitiesOutSchema.model_validate_json(upsert_response.accumulated_text).entities)
        total_usage.prompt_tokens += upsert_response.usage.prompt_tokens
        total_usage.completion_tokens += upsert_response.usage.completion_tokens

upsert_entities: UpsertEntitiesOutSchema

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "upsert_entities.yaml", "w") as f:
    f.write(upsert_entities.model_dump_json(indent=2))
    
print(total_usage)

gemini-2.0-flash-thinking-exp:
```json
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Protagonist (Amber)",
      "detailed_description": "* Reincarnated female Pokemon gamer.\n* Currently inhabiting the body of Amber, Dr. Fuji's deceased daughter.\n* Retains memories and knowledge from her previous life as a Pokemon gamer.\n* Initially confused and overwhelmed by her new reality.\n* Shows concern and care for Dr. Fuji despite her situation.\n* Alias in her previous life was 'AlexaTheGreat'.",
      "narrative_role": "Reincarnated Pokemon Gamer in a child's body, experiencing the Pokemon world firsthand, specifically in the role of Dr. Fuji's cloned daughter. Thrust into a dangerous and unfamiliar situation after dying in her original world.",
      "facts": [
        {
          "fact": "Reincarnated from a Pokemon gamer.",
          "type": "Explicit"
        },
        {
          "fact": "Is currently in the body of Amber, Dr. Fuji's deceased daughter.",

Database module not implemented. Skipping save.


gemini-2.0-flash-thinking-exp:
```json
{
  "properties": {
    "entities": {
      "description": "entities to upsert",
      "items": {
        "properties": {
          "old_identifier": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "old identifier of the entity if you are updating an existing entity",
            "title": "Old Identifier"
          },
          "identifier": {
            "description": "main identifier of the entity. you may change the identifier if you think it is no longer unique",
            "title": "Identifier",
            "type": "string"
          },
          "detailed_description": {
            "description": "detailed markdown bullet point description of the entity",
            "title": "Detailed Description",
            "type": "string"
          },
          "narrative_role": {
            "descriptio

Database module not implemented. Skipping save.


gemini-2.0-flash-thinking-exp:
```json
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Emulator (Pokemon Game)",
      "detailed_description": "* A software program running on the protagonist's phone.\n* Used to play a Pokemon game, specifically set in the Pokemon Mansion location.\n* Represents the protagonist's connection to the game world before her isekai.\n* Serves as a contrast to the real Pokemon world she is now experiencing.",
      "narrative_role": "Represents the protagonist's past life and connection to the Pokemon game world, contrasting with the real Pokemon world she is now in.",
      "facts": [
        {
          "fact": "Runs on the protagonist's phone.",
          "type": "Explicit"
        },
        {
          "fact": "Used to play Pokemon games, specifically in Pokemon Mansion.",
          "type": "Explicit"
        },
        {
          "fact": "Represents the protagonist's connection to her previous life and the game world.",
     

Database module not implemented. Skipping save.


prompt_tokens=19013 completion_tokens=7091


### Story So Far Summary

In [14]:

templates.story.comprehensive_summary.reload()
comprehensive_summary_messages: list[Message] = templates.story.comprehensive_summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    chapter_summary=chapter_summary,
    key_entities=upsert_entities,
    
    story_metadata=STORY_METADATA,
    # summary_so_far=summary_so_far,
    # recent_summaries=recent_summaries_prompt,
)

# print(comprehensive_summary_messages[-1].content)
comprehensive_summary_response = None
if USE_CACHED_RESPONSES:
    # if exists, load from file
    if (STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_summary.yaml").exists():
        with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_summary.yaml", "r") as f:
            comprehensive_summary_response = StreamChunk.model_validate_json(f.read())

if not comprehensive_summary_response:
    comprehensive_summary_response = await run_async(stream(
        PROVIDER_NAME, 
        TEST_THINKING_MODELS[PROVIDER_NAME], 
        comprehensive_summary_messages, 
        temp=0.4,
        thinking_config=ThinkingConfig(enabled=True)
        ))

os.makedirs(STORY_DIR / f"{CHAPTER_INDEX}out", exist_ok=True)
with open(STORY_DIR / f"{CHAPTER_INDEX}out" / "comprehensive_summary.yaml", "w") as f:
    f.write(comprehensive_summary_response.model_dump_json(indent=2))

print(comprehensive_summary_response.usage)
# comprehensive_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, comprehensive_summary_response.accumulated_text)


gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

# Pokemon: Ambertwo - Comprehensive Summary

## Premise

A dedicated [[Protagonist (Amber)]], known as AlexaTheGreat in her past life, is unexpectedly reincarnated into the Pokemon world after a fatal accident. She finds herself in the body of [[Amber Fuji]], the deceased daughter of [[Dr. Fuji]], the scientist behind the creation of [[Mewtwo]]. Thrust into a world of Pokemon she only knew through a screen, she must navigate the dangers and mysteries that await her, starting with the escape of Mewtwo from Dr. Fuji's laboratory.

## The Gamer's Demise and Rebirth

The story begins with the protagonist engrossed in a Pokemon game on an [[Emulator (Pokemon Game)]], battling within the [[Pokemon Mansion (Game)]]. Her prized [[Gyarados (Protagonist's Game)]] is on the verge of defeat, leading to a moment of intense concentration. A notification about a [[Shadow Mewtwo Raid (Pokemon GO)]] pulls her away, and she rushes out, eager to participate in th

Database module not implemented. Skipping save.


prompt_tokens=10116 completion_tokens=516


In [None]:
comprehensive_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, comprehensive_summary_response.accumulated_text)

# display markdown
Markdown(comprehensive_summary.summary)


## Synopsis
A Pokemon fan is [[Protagonist|isekai'd]] into the Pokemon world after a fatal truck accident. Reborn as [[Amber (Clone)|Amber]], the deceased daughter of [[Dr. Fuji]], she finds herself in a laboratory where the genetically engineered [[Mewtwo]] is escaping. As chaos erupts, [[Dr. Fuji]] rescues her, promising a chance to rebuild their family.
## The Protagonist's Previous Life
Before her reincarnation, the [[Protagonist]] was an avid player of a Pokemon ROM hack. While exploring the Pokemon Mansion, she faced a challenging battle with her [[Gyarados]] on the verge of defeat. A notification about a [[Shadow Mewtwo Raid]] prompted her to leave her game and rush to the library. Tragically, she was struck by a [[Truck Accident|truck]] while crossing the street, leading to her death.
## Rebirth and Initial Encounter with Mewtwo
The [[Protagonist]] awakens suspended in [[Amber Fluid Suspension|amber fluid]] within a laboratory. Upon breaking free, she is greeted by [[Dr. Fuji]], who identifies her as [[Amber (Clone)|Amber]]. The lab is in disarray as [[Mewtwo]] escapes from its container, wreaking havoc. The [[Protagonist]] witnesses [[Mewtwo]]'s immense power and feels primal terror upon making eye contact.
## Escape from the Laboratory
As [[Mewtwo]] rampages through the lab, [[Dr. Fuji]] carries the [[Protagonist]] to safety. They navigate through the damaged facility, witnessing the aftermath of [[Mewtwo]]'s escape and other failed experiments. They overhear snippets of conversations hinting at a security breach, containment failures, and an overloading evolution acceleration chamber within [[Dr. Fuji's Lab]]. [[Mewtwo]] blasts through the ceiling, and [[Dr. Fuji]] shields the [[Protagonist]] from falling debris. They eventually escape into a more refined area resembling a wealthy estate. Despite being injured, [[Dr. Fuji]] assures the [[Protagonist]] that they can fix their family and life.