# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [2]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from shuscribe.schemas.llm import MessageRole

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [3]:
# Streaming response
from typing import Type
from pydantic import BaseModel
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    messages: list[Message], 
    response_schema: Type[BaseModel] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        config = GenerationConfig(
            temperature=temp or 0.7,
            response_schema=response_schema if response_schema else None,
            max_output_tokens=max_tokens
        )
        
        print(f"{TEST_MODELS[provider_name]}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=TEST_MODELS[provider_name],
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status == StreamStatus.COMPLETE:
            return chunk
    else:
        return None


In [4]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open("../backend/tests/resources/pokemon_amber/_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(f"../backend/tests/resources/pokemon_amber/{chapter}", "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue


# Summarization Pipeline

## Chapter Summary

In [5]:
# Set Provider Name
PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 0

In [6]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()
messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter.")
)

# print(messages[-1].content)

summary_response = await run_async(stream(PROVIDER_NAME, messages, temp=0.4))
chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)

gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

## Isekai'd from Pokemon Go

*   The protagonist is playing a Pokemon game on their phone, struggling with a difficult battle in the Pokemon Mansion.
    *   Their Gyarados is on the verge of defeat against a Vulpix.
    *   The protagonist is worried about losing more Pokemon due to the game's perma-death rules.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid at the library gym.
*   The protagonist saves their game and rushes to the library for the raid event.
*   While crossing the street, the protagonist is hit by a truck. [!ALLUSION]
    *   Their last thought is about their Gyarados in the game.
*   The protagonist experiences a sudden cessation of consciousness.

## Rebirth as Amber

*   The protagonist regains awareness while suspended in amber fluid inside a container.
    *   The fluid is thick and syrupy.
    *   The protagonist's lungs don't feel the need to breathe.
*   The container breaks open, and the pr

Database module not implemented. Skipping save.


## Extract Entities

In [7]:
chapter_summary.to_prompt()

'<Content>\n\n\n## Isekai\'d from Pokemon Go\n\n*   The protagonist is playing a Pokemon game on their phone, struggling with a difficult battle in the Pokemon Mansion.\n    *   Their Gyarados is on the verge of defeat against a Vulpix.\n    *   The protagonist is worried about losing more Pokemon due to the game\'s perma-death rules.\n*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid at the library gym.\n*   The protagonist saves their game and rushes to the library for the raid event.\n*   While crossing the street, the protagonist is hit by a truck. [!ALLUSION]\n    *   Their last thought is about their Gyarados in the game.\n*   The protagonist experiences a sudden cessation of consciousness.\n\n## Rebirth as Amber\n\n*   The protagonist regains awareness while suspended in amber fluid inside a container.\n    *   The fluid is thick and syrupy.\n    *   The protagonist\'s lungs don\'t feel the need to breathe.\n*   The container breaks open, and the protagon

In [11]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
)

# print(messages[-1].content)

streaming_response = await run_async(stream(PROVIDER_NAME, messages, ExtractEntitiesOutSchema, temp=0.4))
extracted_entities = ExtractEntitiesOutSchema.model_validate_json(streaming_response.accumulated_text)


gemini-2.0-flash-001:
{
  "entities": [
    {
      "description": "The protagonist, a Pokemon fan who is hit by a truck while playing Pokemon Go and reincarnated into the Pokemon world as Amber, Dr. Fuji's clone daughter.",
      "narrative_significance": "The central character of the story, whose experiences and choices will drive the plot. Her past life as a Pokemon fan and her new identity as Amber will likely be major sources of conflict and development.",
      "significance_level": "Central",
      "entity_type": "Character",
      "identifier": "Protagonist (Amber)",
      "aliases": [
        "I",
        "AlexaTheGreat",
        "Amber",
        "Dr. Fuji's clone daughter"
      ],
      "related_entities": [
        "Gyarados",
        "Pokemon Mansion",
        "Shadow Mewtwo Raid",
        "Dr. Fuji",
        "Mewtwo"
      ]
    },
    {
      "description": "A level 40 Gyarados owned by the protagonist in the Pokemon game. It is described as carefully trained and a survi

Database module not implemented. Skipping save.


In [12]:
print(extracted_entities.entities[0].model_dump_json(indent=2))

{
  "description": "The protagonist, a Pokemon fan who is hit by a truck while playing Pokemon Go and reincarnated into the Pokemon world as Amber, Dr. Fuji's clone daughter.",
  "narrative_significance": "The central character of the story, whose experiences and choices will drive the plot. Her past life as a Pokemon fan and her new identity as Amber will likely be major sources of conflict and development.",
  "significance_level": "Central",
  "entity_type": "Character",
  "identifier": "Protagonist (Amber)",
  "aliases": [
    "I",
    "AlexaTheGreat",
    "Amber",
    "Dr. Fuji's clone daughter"
  ],
  "related_entities": [
    "Gyarados",
    "Pokemon Mansion",
    "Shadow Mewtwo Raid",
    "Dr. Fuji",
    "Mewtwo"
  ]
}


In [13]:
from shuscribe.schemas.wikigen.entity import EntitySigLvl

important_entities = extracted_entities.filter_entities(EntitySigLvl.MAJOR)

for entity in important_entities:
    print(entity.to_upsert_str(important_entities))


[Central] [Character] "Protagonist (Amber)" (related: Dr. Fuji, Mewtwo)
[Major] [Event] "Truck Accident" (related: Protagonist (Amber))
[Major] [Character] "Dr. Fuji" (related: Protagonist (Amber), Amber (original), Mewtwo, Laboratory)
[Major] [Character] "Amber (original)" (related: Protagonist (Amber), Dr. Fuji)
[Major] [Character] "Mewtwo" (related: Protagonist (Amber), Dr. Fuji, Laboratory)
[Major] [Location] "Laboratory" (related: Protagonist (Amber), Dr. Fuji, Amber (original), Mewtwo)
[Major] [Concept] "Reincarnation" (related: Protagonist (Amber))
[Major] [Concept] "Genetic Engineering/Cloning" (related: Dr. Fuji, Amber (original), Mewtwo)
[Major] [Other] "Pokemon" (related: Protagonist (Amber), Dr. Fuji, Mewtwo)
