# Setup

In [11]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from shuscribe.schemas.llm import MessageRole

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [16]:
# Streaming response
from typing import Type
from pydantic import BaseModel
from shuscribe.services.llm.streaming import StreamStatus


async def stream(provider_name: str, messages: list[Message], response_schema: Type[BaseModel] | None = None):
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        config = GenerationConfig(
            temperature=0.7,
            response_schema=response_schema if response_schema else None
        )
        
        print(f"{TEST_MODELS[provider_name]}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=TEST_MODELS[provider_name],
            config=config
        ):
            print(chunk.event.text, end="", flush=True)

    if chunk:
        if chunk.status == StreamStatus.COMPLETE:
            return chunk.accumulated_text


In [4]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open("../backend/tests/resources/pokemon_amber/_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(f"../backend/tests/resources/pokemon_amber/{chapter}", "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue


# Summarization Pipeline

## Chapter Summary

In [5]:
# Set Provider Name
PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 0

In [6]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()
messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter.")
)

# print(messages[-1].content)

summary_response = await run_async(stream(PROVIDER_NAME, messages))
chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response)

gemini-2.0-flash-001:
<|STARTOFSUMMARY|>
# Chapter Summary

### Gamer's Last Stand & Isekai

*   The protagonist, a dedicated Pokémon player, is on the verge of losing a crucial battle in Pokémon Mansion on an emulator.
    *   Her Gyarados is near defeat against a Vulpix, threatening weeks of progress in a permadeath run.
    *   She considers her limited options, weighing the risks of losing other valuable team members.
*   A Discord notification alerts her to a Shadow Mewtwo raid at the library gym on campus.
    *   Despite the ongoing battle, she saves her emulator state and rushes to participate in the raid.
*   While crossing the street, distracted by the Pokémon Go raid on her phone, she is struck by a vehicle.
    *   Her final thoughts are about her Gyarados, highlighting her dedication to the game.
    *   The experience is described as a sudden cessation of input, like a game cartridge being removed.

### Rebirth in Amber

*   The protagonist awakens in a tank filled with a

Database module not implemented. Skipping save.


## Extract Entities

In [7]:
chapter_summary.to_prompt()

'<Content>\n\n# Chapter Summary\n\n### Gamer\'s Last Stand & Isekai\n\n*   The protagonist, a dedicated Pokémon player, is on the verge of losing a crucial battle in Pokémon Mansion on an emulator.\n    *   Her Gyarados is near defeat against a Vulpix, threatening weeks of progress in a permadeath run.\n    *   She considers her limited options, weighing the risks of losing other valuable team members.\n*   A Discord notification alerts her to a Shadow Mewtwo raid at the library gym on campus.\n    *   Despite the ongoing battle, she saves her emulator state and rushes to participate in the raid.\n*   While crossing the street, distracted by the Pokémon Go raid on her phone, she is struck by a vehicle.\n    *   Her final thoughts are about her Gyarados, highlighting her dedication to the game.\n    *   The experience is described as a sudden cessation of input, like a game cartridge being removed.\n\n### Rebirth in Amber\n\n*   The protagonist awakens in a tank filled with amber fluid,

In [8]:


from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
)

# print(messages[-1].content)

streaming_response = await run_async(stream(PROVIDER_NAME, messages, ExtractEntitiesOutSchema))

gemini-2.0-flash-001:
{
  "entities": [
    {
      "description": "The protagonist of the story, a female Pokemon fan who is reincarnated into the Pokemon world as a clone of Dr. Fuji's deceased daughter, Amber. She retains her memories from her previous life.",
      "narrative_significance": "The central character whose experiences and decisions drive the plot. Her unique perspective as an isekai protagonist shapes her interactions with the Pokemon world and the unfolding events.",
      "significance_level": "central",
      "entity_type": "character",
      "identifier": "Protagonist (Amber)",
      "aliases": [
        "AlexaTheGreat",
        "Amber",
        "Dr. Fuji's daughter",
        "The girl in the tank"
      ],
      "related_entities": [
        "Dr. Fuji",
        "Mewtwo",
        "Gyarados"
      ]
    },
    {
      "description": "A powerful genetically engineered Pokemon created by Dr. Fuji. He is initially contained in the lab but breaks free, causing destructi

Database module not implemented. Skipping save.


In [9]:
# import json
# json.dumps(streaming_response)

extracted_entities = ExtractEntitiesOutSchema.model_validate_json(streaming_response)
# print(o.model_dump_json(indent=2))

In [10]:
from shuscribe.schemas.wikigen.entity import EntitySigLvl


important_entities = extracted_entities.filter_entities(EntitySigLvl.MAJOR)

for entity in important_entities:
    print(entity.to_upsert_str(important_entities))


character: Protagonist (Amber) - central (related: Mewtwo, Dr. Fuji)
character: Mewtwo - major (related: Protagonist (Amber), Dr. Fuji)
character: Dr. Fuji - major (related: Protagonist (Amber), Mewtwo)
location: Dr. Fuji's Laboratory - major (related: Protagonist (Amber), Mewtwo, Dr. Fuji)
event: Isekai Truck Incident - major (related: Protagonist (Amber))
event: Rebirth in Amber - major (related: Protagonist (Amber), Mewtwo, Dr. Fuji)
event: Mewtwo's Escape - major (related: Protagonist (Amber), Mewtwo, Dr. Fuji)
concept: Reincarnation - major (related: Protagonist (Amber))
concept: Genetic Engineering - major (related: Protagonist (Amber), Mewtwo, Dr. Fuji)
theme: Scientific Hubris - major (related: Mewtwo, Dr. Fuji, Genetic Engineering)
