# Setup

In [6]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from shuscribe.schemas.llm import MessageRole

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

In [8]:
# Streaming response
from typing import Type
from pydantic import BaseModel
from shuscribe.schemas.base import BaseOutputSchema
from shuscribe.schemas.streaming import StreamChunk
from shuscribe.services.llm.streaming import StreamStatus


async def stream(
    provider_name: str, 
    messages: list[Message], 
    response_schema: Type[BaseOutputSchema] | None = None, 
    max_tokens: int | None = None,
    temp: float | None = None
    ) -> StreamChunk | None:
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        config = GenerationConfig(
            temperature=temp or 0.7,
            response_schema=response_schema if response_schema else None,
            max_output_tokens=max_tokens
        )
        
        print(f"{TEST_MODELS[provider_name]}:")

        async for chunk in session.generate_stream(
            messages=messages,
            provider_name=provider_name,
            model=TEST_MODELS[provider_name],
            config=config
        ):
            print(chunk.text, end="", flush=True)

    if chunk:
        if chunk.status == StreamStatus.COMPLETE:
            return chunk
    else:
        return None


In [9]:
import yaml

from shuscribe.schemas.pipeline import Chapter, StoryMetadata

with open("../backend/tests/resources/pokemon_amber/_meta.yaml", "r") as f:
    meta = yaml.safe_load(f)
    STORY_METADATA = StoryMetadata(
        title=meta.get('story_title'),
        description=meta.get('story_description'),
        genres=meta.get('genres'),
        additional_tags=meta.get('additional_tags')
    )

CHAPTERS = []
for chapter in meta.get('chapters'):
    with open(f"../backend/tests/resources/pokemon_amber/{chapter}", "r") as f:
        chapter_id = chapter.split('.')[0]
        try:
            chapter_content = yaml.safe_load(f)
            CHAPTERS.append(Chapter(id=chapter_id, title=chapter_content.get('title'), content=chapter_content.get('content')))
        except Exception:
            continue


# Summarization Pipeline

## Chapter Summary

In [10]:
# Set Provider Name
PROVIDER_NAME = "gemini"
CHAPTER_INDEX = 0

In [11]:
from shuscribe.schemas.wikigen.summary import ChapterSummary
from shuscribe.services.llm.prompts import templates

templates.chapter.summary.reload()
summary_messages: list[Message] = templates.chapter.summary.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    story_metadata=STORY_METADATA,
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter.")
)

# print(messages[-1].content)

summary_response = await run_async(stream(PROVIDER_NAME, summary_messages, temp=0.4))
chapter_summary = ChapterSummary.from_chapter_summary(CHAPTER_INDEX, summary_response.accumulated_text)

gemini-2.0-flash-001:
<|STARTOFSUMMARY|>

## The Gamer's Last Stand

*   The protagonist, a dedicated Pokémon fan, is engrossed in a Pokémon game on their phone, facing a difficult battle in the Pokémon Mansion.
    *   Their Gyarados is on the verge of defeat against a Vulpix, with the protagonist already having lost two Pokémon.
    *   The protagonist contemplates their limited options, hesitant to risk their stronger Pokémon.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid event at the library gym on campus.
    *   The protagonist debates whether to continue the game or participate in the raid.
    *   The urgency of the notification and the allure of Shadow Mewtwo prompt the protagonist to save their game and head to the library.
*   As the protagonist rushes to the library, they disregard traffic signals, focused on reaching the raid in time.
*   Tragically, the protagonist is struck by a vehicle while crossing the street, resulting in their death.
    

Database module not implemented. Skipping save.


## Extract Entities

In [13]:
print(chapter_summary.to_prompt())

<Content>


## The Gamer's Last Stand

*   The protagonist, a dedicated Pokémon fan, is engrossed in a Pokémon game on their phone, facing a difficult battle in the Pokémon Mansion.
    *   Their Gyarados is on the verge of defeat against a Vulpix, with the protagonist already having lost two Pokémon.
    *   The protagonist contemplates their limited options, hesitant to risk their stronger Pokémon.
*   A Discord notification alerts the protagonist to a Shadow Mewtwo raid event at the library gym on campus.
    *   The protagonist debates whether to continue the game or participate in the raid.
    *   The urgency of the notification and the allure of Shadow Mewtwo prompt the protagonist to save their game and head to the library.
*   As the protagonist rushes to the library, they disregard traffic signals, focused on reaching the raid in time.
*   Tragically, the protagonist is struck by a vehicle while crossing the street, resulting in their death.
    *   The protagonist's final th

In [18]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema
print(ExtractEntitiesOutSchema.to_output_schema_str())

{
  "properties": {
    "entities": {
      "description": "important entities found in the chapter that carry narrative significance",
      "items": {
        "properties": {
          "description": {
            "description": "The description of the entity (what the entity is, what it does, etc)",
            "title": "Description",
            "type": "string"
          },
          "narrative_role": {
            "description": "The narrative role of the entity in the context of this chapter and the story as a whole",
            "title": "Narrative Role",
            "type": "string"
          },
          "key_facts": {
            "description": "List of key facts about the entity introduced in this chapter",
            "items": {
              "type": "string"
            },
            "title": "Key Facts",
            "type": "array"
          },
          "significance_level": {
            "enum": [
              "Central",
              "Major",
              "Relevant

In [33]:
from shuscribe.schemas.wikigen.entity import ExtractEntitiesOutSchema


templates.entity.extract.reload()
upsert_messages: list[Message] = templates.entity.extract.format( 
    current_chapter=CHAPTERS[CHAPTER_INDEX],
    # current_chapter=Chapter(id=1, title="Chapter 1", content="This is a test chapter."),
    story_metadata=STORY_METADATA,
    chapter_summary=chapter_summary,
)

# print(extract_messages[-1].content)

extract_response = await run_async(stream(PROVIDER_NAME, upsert_messages, ExtractEntitiesOutSchema, temp=0.4))
extracted_entities = ExtractEntitiesOutSchema.model_validate_json(extract_response.accumulated_text)
print(extract_response.usage)


gemini-2.0-flash-001:
{
  "entities": [
    {
      "description": "The protagonist, a Pokémon fan who is reincarnated into the Pokémon world as Amber, Dr. Fuji's clone daughter.",
      "narrative_role": "Central character who experiences death and rebirth, setting the stage for the story's exploration of identity, family, and the blending of game and reality.",
      "significance_level": "Central",
      "entity_type": "Character",
      "identifier": "Protagonist/Amber",
      "aliases": [
        "I",
        "AlexaTheGreat",
        "Amber"
      ],
      "related_entities": [
        "Gyarados",
        "Shadow Mewtwo",
        "Dr. Fuji",
        "Mewtwo"
      ]
    },
    {
      "description": "A level 40 Gyarados owned by the protagonist in the Pokémon game. It is a survivor of many battles and is currently in a precarious situation.",
      "narrative_role": "Represents the protagonist's dedication to the game and serves as a symbol of their past life. Its fate is the prot

Database module not implemented. Skipping save.


prompt_tokens=4633 completion_tokens=1784


## Upsert Entities and Relationships

In [None]:
from shuscribe.schemas.provider import LLMUsage
from shuscribe.schemas.wikigen.entity import UpsertEntitiesOutSchema
from shuscribe.schemas.wikigen.entity import EntitySigLvl

upsert_entities = UpsertEntitiesOutSchema(entities=[])
total_usage = LLMUsage(prompt_tokens=0, completion_tokens=0)
templates.entity.upsert.reload()
for batch in extracted_entities.batch_for_upsert(EntitySigLvl.RELEVANT):
    upsert_messages: list[Message] = templates.entity.upsert.format( 
        current_chapter=CHAPTERS[CHAPTER_INDEX],
        entity_batch=batch,
        story_metadata=STORY_METADATA,
        chapter_summary=chapter_summary,
    )

    upsert_response = await run_async(stream(PROVIDER_NAME, upsert_messages, UpsertEntitiesOutSchema, temp=0.4))
    upsert_entities.entities.extend(UpsertEntitiesOutSchema.model_validate_json(upsert_response.accumulated_text).entities)
    total_usage.prompt_tokens += upsert_response.usage.prompt_tokens
    total_usage.completion_tokens += upsert_response.usage.completion_tokens

print(total_usage)

gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Protagonist/Amber",
      "detailed_description": "*   The protagonist is a Pokémon fan who dies after being hit by a truck while rushing to a Shadow Mewtwo raid event.\n*   She is reincarnated into the Pokémon world as Amber, Dr. Fuji's clone daughter.\n*   She retains memories of her previous life.\n*   She is initially disoriented and confused by her new body and surroundings.\n*   She is referred to as \"Amber\" by Dr. Fuji.\n*   She is able to speak in her new body, though her voice is different.\n*   She urges Dr. Fuji to escape the laboratory after Mewtwo's rampage.\n*   She recognizes Dr. Fuji and understands the situation, including her new identity as his deceased daughter's clone.\n*   She is cared for and protected by Dr. Fuji.\n*   She experiences a mix of fear, confusion, and determination in her new life.\n*   She is aware of the dangers posed by Mewtwo.\n*   She is concerned

Database module not implemented. Skipping save.


gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Dr. Fuji",
      "detailed_description": "*   A middle-aged scientist deeply involved in cloning and genetic experiments.\n*   He revives the protagonist in the body of his deceased daughter, Amber.\n*   He shows immense relief and joy at Amber's (the protagonist's) revival, indicating a strong emotional attachment.\n*   He is injured during Mewtwo's escape but prioritizes the protagonist's safety.\n*   He expresses hope for the future and a desire to fix their family and life, hinting at past regrets or failures.\n*   He is associated with a hidden laboratory beneath a wealthy estate.\n*   He is referred to as having created a \"legendary clone\".",
      "narrative_role": "Plays a crucial role in the protagonist's reincarnation and is a key figure in the story's exploration of ethics, family, and the consequences of scientific ambition. He serves as a protector and guide for the protagoni

Database module not implemented. Skipping save.


gemini-2.0-flash-001:
{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Genetic Engineering",
      "detailed_description": "*   The manipulation of genes and DNA to create new organisms or modify existing ones.\n*   In this story, it's primarily practiced by Dr. Fuji.\n*   It is used to create Mewtwo and, seemingly, to revive Amber.\n*   Raises ethical questions about the boundaries of science and the morality of 'playing God'.\n*   Implies the potential for both scientific advancement and unforeseen consequences.\n*   The evolution acceleration chamber is mentioned, suggesting further applications of genetic engineering within the story's world.",
      "narrative_role": "Drives the plot, introduces ethical questions, and establishes the setting's technological capabilities. It is the core element behind the creation of Mewtwo and the protagonist's rebirth, shaping the central conflict and the protagonist's identity.",
      "facts": [],
      "removed_facts"

Database module not implemented. Skipping save.


prompt_tokens=16243 completion_tokens=4788


In [43]:
print(upsert_entities.model_dump_json(indent=2))

{
  "entities": [
    {
      "old_identifier": null,
      "identifier": "Protagonist/Amber",
      "detailed_description": "*   The protagonist is a Pokémon fan who dies after being hit by a truck while rushing to a Shadow Mewtwo raid event.\n*   She is reincarnated into the Pokémon world as Amber, Dr. Fuji's clone daughter.\n*   She retains memories of her previous life.\n*   She is initially disoriented and confused by her new body and surroundings.\n*   She is referred to as \"Amber\" by Dr. Fuji.\n*   She is able to speak in her new body, though her voice is different.\n*   She urges Dr. Fuji to escape the laboratory after Mewtwo's rampage.\n*   She recognizes Dr. Fuji and understands the situation, including her new identity as his deceased daughter's clone.\n*   She is cared for and protected by Dr. Fuji.\n*   She experiences a mix of fear, confusion, and determination in her new life.\n*   She is aware of the dangers posed by Mewtwo.\n*   She is concerned for Dr. Fuji's well-b

## Story So Far Summary