In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

lib_path = Path('../../novelinsights/backend')
sys.path.append(str(lib_path))

In [2]:
from google import genai
import anthropic

from dotenv import load_dotenv
import os

load_dotenv(lib_path / '.env')
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")

from novelinsights.services.ai.llmclient import AnthropicClient, GoogleGeminiClient

G_CLIENT = GoogleGeminiClient(genai.Client(api_key=GEMINI_API_KEY))
A_CLIENT = AnthropicClient(anthropic.Anthropic(api_key=ANTHROPIC_API_KEY))

USE_CACHED_RESPONSES = True

In [3]:
import json
from dataclasses import asdict
from typing import NamedTuple

with open('../../novelinsights/backend/tests/resources/pokemon_amber/_metadata.json', 'r') as f:
    metadata = json.load(f)
    STORY_TITLE = metadata['story_title']
    GENRES = metadata['genres']
    ADDITIONAL_TAGS = metadata['additional_tags']
    STORY_DESCRIPTION = metadata['story_description']


class Chapter(NamedTuple):
    chapter_title: str
    chapter_content: str
    chapter_number: int

CHAPTERS = []

num_chapters = 12

for i in range(1, num_chapters + 1):
    with open(f'../../novelinsights/backend/tests/resources/pokemon_amber/chapter{i}.json', 'r') as f:
        chapter = json.load(f)
        CHAPTER_TITLE = chapter['chapter_title']
        CHAPTER_CONTENT = chapter['chapter_content']
        CHAPTERS.append(Chapter(CHAPTER_TITLE, CHAPTER_CONTENT, i))
        

In [4]:
from novelinsights.services.ai.llmclient import LLMResponse
from novelinsights.services.ai.prompts.narrative.chapterbychapter.summarize import SummarizeChapterTemplate, SummarizeChapterPrompt

summarize_prompt_template = SummarizeChapterTemplate(
    story_title=STORY_TITLE,
    genres=GENRES,
    additional_tags=ADDITIONAL_TAGS,
    story_description=STORY_DESCRIPTION,
    chapter_title=CHAPTERS[0].chapter_title,
    chapter_content=CHAPTERS[0].chapter_content,
    structured_output_schema=None
)
summarize_prompt = SummarizeChapterPrompt(prompt_template=summarize_prompt_template)

summarize_response: LLMResponse = None # type: ignore

if USE_CACHED_RESPONSES:
    if os.path.exists('../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/summarize.json'):
        with open('../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/summarize.json', 'r') as f:
            json_dict = json.load(f)
            summarize_response = LLMResponse(response=json_dict.get("response"), usage_metadata=json_dict.get("usage_metadata"))

if not summarize_response:
    summarize_response = summarize_prompt.generate(client=A_CLIENT)

    with open('../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/summarize.json', 'w') as f:
        json.dump({
            "response": summarize_response.get("response"),
            "usage_metadata": summarize_response.get("usage_metadata"),
            "model_config": asdict(summarize_prompt.model_config),
        }, f, indent=2)

# print(summarize_prompt.render())

In [5]:
from novelinsights.services.ai.prompts import FindEntitiesTemplate, FindEntitiesPrompt
from novelinsights.schemas import FindEntitiesOutputSchema

find_entities_template = FindEntitiesTemplate(
    story_title=STORY_TITLE,
    genres=GENRES,
    additional_tags=ADDITIONAL_TAGS,
    story_description=STORY_DESCRIPTION,
    chapter_title=CHAPTERS[0].chapter_title,
    chapter_content=CHAPTERS[0].chapter_content,
    structured_output_schema=FindEntitiesOutputSchema
)


find_entities_prompt = FindEntitiesPrompt(prompt_template=find_entities_template)

find_entities_response: LLMResponse = None # type: ignore

if USE_CACHED_RESPONSES:
    if os.path.exists('../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/find_entities.json'):
        with open('../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/find_entities.json', 'r') as f:
            json_dict = json.load(f)
            validated_response = FindEntitiesOutputSchema.model_validate(json_dict.get("response"))
            find_entities_response = LLMResponse(response=validated_response, usage_metadata=json_dict.get("usage_metadata"))

if not find_entities_response:
    find_entities_response = find_entities_prompt.generate_structured(client=G_CLIENT)

    with open('../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/find_entities.json', 'w') as f:
        json.dump({
            "response": find_entities_response.get("response").model_dump(mode="json"),
            "usage_metadata": find_entities_response.get("usage_metadata"),
            "model_config": asdict(find_entities_prompt.model_config),
        }, f, indent=2)


In [6]:
# print(entities.keys())

from novelinsights.schemas.prompt_responses.narrative.chapterbychapter.find_entities import FoundEntity
from novelinsights.types.knowledge import EntitySignificanceLevel

# I want to be able to iterate over the entities, skip the ones that are not significant, and in chunks of 5 so the output context window is not exceeded
class KeyEntities:
    
    def __init__(self, entities: list[FoundEntity]):
        self.entities = entities
        entities_by_identifier = {entity.identifier: entity for entity in entities}
        
        self.related_entities_by_identifier = {} # entity id -> list of related entities
        for entity in entities:
            for related_entity_id in entity.related_entities:
                if related_entity_id in entities_by_identifier:
                    self.related_entities_by_identifier.setdefault(entity.identifier, []).append(entities_by_identifier.get(related_entity_id))
                    
    def get_sig_related_entities_for_upsert(self, entity_id: str, min_sig_level: EntitySignificanceLevel = EntitySignificanceLevel.SUPPORTING) -> list[str]:
        return [entity.identifier for entity in self.related_entities_by_identifier.get(entity_id, []) if entity.significance_level >= min_sig_level]
    
    def yield_for_upsert(self, min_sig_level: EntitySignificanceLevel = EntitySignificanceLevel.SUPPORTING, chunk_size: int = 5):
        significant_entities = []
        for entity in self.entities:
            if entity.significance_level >= min_sig_level:
                significant_entities.append(entity.to_upsert_str(sig_related_entities=self.get_sig_related_entities_for_upsert(entity.identifier, min_sig_level)))
            if len(significant_entities) == chunk_size:
                yield significant_entities
                significant_entities = []
                
        if significant_entities:
            yield significant_entities
    
    def __repr__(self):
        return f"KeyEntities(entities={self.entities})"
    
key_entities = KeyEntities(find_entities_response.get("response").entities)

for i, upsert_chunk in enumerate(key_entities.yield_for_upsert(min_sig_level=EntitySignificanceLevel.SUPPORTING, chunk_size=5)):   
    print(upsert_chunk)

['character: AlexaTheGreat/Amber - central (related: Dr. Fuji, Mewtwo, Amber (clone))', 'character: Dr. Fuji - major (related: AlexaTheGreat/Amber, Mewtwo, Amber (clone), Pokemon Mansion)', 'character: Mewtwo - major (related: Dr. Fuji, AlexaTheGreat/Amber, Pokemon Mansion)', 'location: Pokemon Mansion - major (related: Dr. Fuji, Mewtwo, AlexaTheGreat/Amber)', 'character: Amber (clone) - major (related: Dr. Fuji, AlexaTheGreat/Amber, Mewtwo)']


In [8]:
from novelinsights.schemas.prompt_responses.narrative.chapterbychapter.upsert_entities import UpsertEntitiesOutputSchema
from novelinsights.services.ai.prompts.narrative.chapterbychapter.upsert_entities import UpsertEntitiesTemplate, UpsertEntitiesPrompt

upsert_entities_template = UpsertEntitiesTemplate(
        story_title=STORY_TITLE,
        genres=GENRES,
        additional_tags=ADDITIONAL_TAGS,
        story_description=STORY_DESCRIPTION,
        chapter_title=CHAPTERS[0].chapter_title,
        chapter_content=CHAPTERS[0].chapter_content,
        new_entities=[],
        structured_output_schema=UpsertEntitiesOutputSchema
    )
upsert_entities_prompt = UpsertEntitiesPrompt(prompt_template=upsert_entities_template)

upsert_entities_response_list = []

for i, upsert_chunk in enumerate(key_entities.yield_for_upsert(min_sig_level=EntitySignificanceLevel.SUPPORTING, chunk_size=5)):   
    upsert_entities_prompt.update_prompt_template(new_entities=upsert_chunk)
    
    upsert_entities_response: LLMResponse = None # type: ignore
    
    if USE_CACHED_RESPONSES:
        if os.path.exists(f'../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/upsert_entities{i+1}.json'):
            with open(f'../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/upsert_entities{i+1}.json', 'r') as f:
                json_dict = json.load(f)
                validated_response = UpsertEntitiesOutputSchema.model_validate(json_dict.get("response"))
                upsert_entities_response = LLMResponse(response=validated_response, usage_metadata=json_dict.get("usage_metadata"))

    if not upsert_entities_response:
        upsert_entities_response = upsert_entities_prompt.generate_structured(client=G_CLIENT)

        with open(f'../../novelinsights/backend/tests/resources/pokemon_amber/chapter1_out/upsert_entities{i+1}.json', 'w') as f:
            json.dump({
                "response": upsert_entities_response.get("response").model_dump(mode="json"),
                "usage_metadata": upsert_entities_response.get("usage_metadata"),
                "model_config": asdict(upsert_entities_prompt.model_config),
            }, f, indent=2)
    
    upsert_entities_response_list.append(upsert_entities_response)