In [1]:
# ! pip install dspy

In [2]:
# print out

In [None]:
import dspy
from typing import List, Tuple, Dict, Any, Optional, Union
import collections # For deque in BFS
import json # For the dummy LM to produce valid JSON strings
from pydantic import BaseModel, Field # Import Pydantic components

# --- Helper: Define a simple Language Model (for conceptual purposes) ---
# In a real dspy setup, you would configure your LLM (e.g., GPT-3, Cohere, etc.)
# For this example, let's assume a turbo_lm is configured elsewhere.
# turbo_lm = dspy.OpenAI(model='gpt-3.5-turbo-instruct', max_tokens=2000)
# dspy.settings.configure(lm=turbo_lm)

# --- Pydantic Models for Data Structures ---
class SubtopicDetail(BaseModel):
    name: str = Field(description="The name of the subtopic.")
    description: str = Field(description="A brief description of the subtopic.")

class GeneratedArticleData(BaseModel):
    article: str = Field(description="The full Markdown content of the generated article.")
    facts: List[str] = Field(description="A list of discrete facts that formed the basis of the article. Should contain at least 5 facts if generated successfully.")
    subtopics: List[SubtopicDetail] = Field(description="A list of subtopics identified from this article's context for further exploration.")
    depth: int = Field(description="The depth of this topic in the generation graph (0 for initial topic).")

# Type alias for the overall corpus, mapping topic names to their generated data
GeneratedCorpus = Dict[str, GeneratedArticleData]


# --- Module 1: Fact and Subtopic Generation ---
class FactAndSubtopicGeneratorSignature(dspy.Signature):
    """
    Given a main topic, its description, and a formatted string of all ancestral facts for context,
    generate a list of discrete facts about the main topic and a list of relevant subtopics
    (with their descriptions) to explore further.
    IMPORTANT: You MUST generate at least 5 distinct facts for the 'facts' field.
    Aim for 2-4 highly relevant subtopics.
    Output 'facts' as a JSON list of strings.
    Output 'subtopics' as a JSON list of objects, where each object has a "name" (string) and "description" (string) key,
    conforming to the SubtopicDetail model.
    """
    topic_name: str = dspy.InputField(desc="The name of the main topic to generate facts and subtopics for.")
    topic_description: str = dspy.InputField(desc="A detailed description of the main topic.")
    ancestral_facts: list[str] = dspy.InputField(
        desc="All relevant ancestral facts."
    )

    facts: List[str] = dspy.OutputField(
        desc="A JSON list of strings, where each string is a discrete, specific fact about the topic_name. MUST contain at least 5 facts. Example: [\"Fact 1 about X.\", \"Fact 2 about X.\", \"Fact 3 about X.\", \"Fact 4 about X.\", \"Fact 5 about X.\"]"
    )
    subtopics: List[SubtopicDetail] = dspy.OutputField(
        desc="A list of subtopics"
    )


class FactAndSubtopicGenerator(dspy.Module):
    def __init__(self) -> None:
        super().__init__()
        self.generate: dspy.ChainOfThought = dspy.ChainOfThought(FactAndSubtopicGeneratorSignature)

    def forward(self, topic_name: str, topic_description: str, ancestral_facts: List[str]) -> dspy.Prediction:
        all_ancestral_facts_str_for_prompt: str
        if ancestral_facts:
            ancestral_summary_parts: List[str] = ["Key points from related topics include:"]
            for fact in ancestral_facts:
                ancestral_summary_parts.append(f"- {fact}")
            all_ancestral_facts_str_for_prompt = "\n".join(ancestral_summary_parts)
        else:
            all_ancestral_facts_str_for_prompt = "No ancestral facts provided."

        return self.generate(
            topic_name=topic_name,
            topic_description=topic_description,
            all_ancestral_facts_str=all_ancestral_facts_str_for_prompt,
        )


# --- Module 2: Article Expansion from Facts ---
class ArticleFromFactsSignature(dspy.Signature):
    """
    Given a topic name, a list of facts about it, and a formatted string of all ancestral facts, expand these facts into a coherent, well-structured article.
    The length of the article should be appropriate for the number and detail of facts provided.
    The article MUST be in Markdown format and MUST NOT contain any external or internal hyperlinks.
    """
    topic_name: str = dspy.InputField(desc="The name of the topic for the article.")
    facts: list[str] = dspy.InputField(desc="A list of facts specifically about this topic")
    ancestral_facts: list[str] = dspy.InputField(
        desc="All relevant ancestral facts, formatted as a list"
    )

    article_content: str = dspy.OutputField(
        desc="A well-structured article in Markdown format, based on the provided facts. NO HYPERLINKS."
    )


class ArticleFromFactsGenerator(dspy.Module):
    def __init__(self) -> None:
        super().__init__()
        self.generate: dspy.ChainOfThought = dspy.ChainOfThought(ArticleFromFactsSignature)

    def forward(self, topic_name: str, topic_facts: List[str], ancestral_facts: List[str]) -> dspy.Prediction:
        return self.generate(
            topic_name=topic_name,
            facts=topic_facts,
            ancestral_facts=ancestral_facts
        )


# --- Orchestrator: BFS World Corpus Builder ---
class BFSWorldCorpusBuilder:
    def __init__(self, initial_topic_name: str, initial_topic_description: str, max_depth: int = 2) -> None:
        self.initial_topic_name: str = initial_topic_name
        self.initial_topic_description: str = initial_topic_description
        self.max_depth: int = max_depth
        
        self.fact_subtopic_generator: FactAndSubtopicGenerator = FactAndSubtopicGenerator()
        self.article_generator: ArticleFromFactsGenerator = ArticleFromFactsGenerator()
        
        self.generated_corpus: GeneratedCorpus = {} 
        self.queue: collections.deque[Tuple[str, str, List[str], int]] = collections.deque()
        self.visited_topics: set[str] = set()

    def build_corpus(self) -> GeneratedCorpus:
        log_indent_char: str = "  "
        print(f"{log_indent_char * 0}Starting corpus build for initial topic: '{self.initial_topic_name}' with max_depth: {self.max_depth}")
        self.queue.append((self.initial_topic_name, self.initial_topic_description, [], 0)) 

        while self.queue:
            topic_name, topic_description, ancestral_facts, depth = self.queue.popleft()
            current_indent: str = log_indent_char * (depth + 1)

            if topic_name in self.visited_topics:
                print(f"{current_indent}Skipping already visited topic: '{topic_name}'")
                continue
            if depth > self.max_depth:
                print(f"{current_indent}Skipping topic '{topic_name}' due to exceeding max_depth ({depth} > {self.max_depth})")
                continue
            
            print(f"{current_indent}Processing Topic: '{topic_name}' (Depth: {depth})")
            self.visited_topics.add(topic_name)

            print(f"{current_indent}  Generating facts and subtopics for '{topic_name}'...")
            current_facts: List[str] = []
            
            fs_prediction: dspy.Prediction = self.fact_subtopic_generator(
                topic_name=topic_name,
                topic_description=topic_description,
                ancestral_facts=ancestral_facts
            )
            
            # If they are present but not the correct type (e.g. still a string), a TypeError might occur later.
            print(f"{current_indent}  Generated {len(fs_prediction.facts)} facts and {len(fs_prediction.subtopics)} subtopics for '{topic_name}'.")


            print(f"{current_indent}  Generating article for '{topic_name}'...")
            article_prediction: dspy.Prediction = self.article_generator(
                topic_name=topic_name,
                current_topic_facts=fs_prediction.subtopics,
                ancestral_facts=ancestral_facts
            )
            print(f"{current_indent}  Article generated for '{topic_name}'.")

            article_data_entry = GeneratedArticleData(
                article=article_prediction.article_content,
                facts=current_facts,
                subtopics=fs_prediction.subtopics, 
                depth=depth,
                ancestral_facts_provided_count=len(ancestral_facts)
            )
            self.generated_corpus[topic_name] = article_data_entry


            if depth < self.max_depth:
                new_ancestral_facts: List[str] = ancestral_facts + current_facts 
                if fs_prediction.subtopics:
                    print(f"{current_indent}  Queueing {len(fs_prediction.subtopics)} subtopics for next level (depth {depth + 1}):")
                    for sub_item_pydantic in fs_prediction.subtopics: 
                        # Ensure sub_item_pydantic is indeed a SubtopicDetail instance if list is not empty
                        if not isinstance(sub_item_pydantic, SubtopicDetail):
                            print(f"{current_indent}    - WARNING: Item in subtopics_pydantic_list is not a SubtopicDetail object: {sub_item_pydantic}. Skipping.")
                            continue
                        sub_name = sub_item_pydantic.name
                        sub_desc = sub_item_pydantic.description
                        if sub_name and sub_name not in self.visited_topics: 
                            print(f"{current_indent}    - Adding '{sub_name}' (Desc: '{sub_desc[:30]}...') to queue.")
                            self.queue.append((sub_name, sub_desc, new_ancestral_facts, depth + 1))
                        elif not sub_name:
                            print(f"{current_indent}    - Skipping subtopic with missing name: {sub_item_pydantic.model_dump_json()}")
                        else:
                            print(f"{current_indent}    - Subtopic '{sub_name}' already visited or queued at higher priority.")
                else:
                    print(f"{current_indent}  No subtopics generated or to queue for '{topic_name}'.")
            
            print(f"{current_indent}Finished processing for '{topic_name}'.")
        
        print(f"{log_indent_char * 0}Corpus build process complete. Total topics processed: {len(self.visited_topics)}")
        return self.generated_corpus

In [19]:
import dspy
# lm = dspy.LM(model="gemini/gemini-2.5-pro-preview-03-25")
lm = dspy.LM(model="gemini/gemini-2.5-pro-preview-03-25", allowed_openai_params=['thinking'], thinking={"type": "enabled", "budget_tokens": 1024},
)
dspy.configure(lm=lm)

In [20]:
initial_topic = "The Serene Theocracy of Lyra"
initial_description = (
    "Lyra is a nation governed by a council of scholar-priests who interpret the 'Whispers of the Cosmos,' "
    "celestial events and subtle magical energies they believe are the voice of the divine creator. "
    "Their society values knowledge, contemplation, intricate art forms inspired by celestial patterns, "
    "and the careful cultivation of unique flora that resonates with these cosmic energies. "
    "They are generally pacifist but possess advanced defensive 'Resonance' technologies. "
    "Situated on a large, mountainous island continent with diverse biomes."
)

corpus_builder = BFSWorldCorpusBuilder(
    initial_topic_name=initial_topic,
    initial_topic_description=initial_description,
    max_depth=1
)

print("\nStarting corpus build process...")
generated_corpus_data: GeneratedCorpus = corpus_builder.build_corpus()
print("\nCorpus build process finished.")




Starting corpus build process...
Starting corpus build for initial topic: 'The Serene Theocracy of Lyra' with max_depth: 1
  Processing Topic: 'The Serene Theocracy of Lyra' (Depth: 0)
    Generating facts and subtopics for 'The Serene Theocracy of Lyra'...




    Generated 9 facts and 4 subtopics for 'The Serene Theocracy of Lyra'.
    Generating article for 'The Serene Theocracy of Lyra'...


TypeError: ArticleFromFactsGenerator.forward() got an unexpected keyword argument 'current_topic_facts'

In [None]:
from pprint import pprint

for key, value in generated_corpus_data.items():
    pprint(key)
    pprint(value.article)

'The Serene Theocracy of Lyra'
('# The Serene Theocracy of Lyra\n'
 '\n'
 'Lyra stands as a unique nation, governed not by monarchs or elected '
 'officials, but by a council of scholar-priests. This Serene Theocracy is '
 'guided by principles deeply rooted in knowledge, contemplation, and a '
 'connection to the cosmos.\n'
 '\n'
 '## Governance and Guidance\n'
 '\n'
 'The core of Lyran governance lies with its ruling council, composed of '
 'esteemed scholar-priests. Their primary function is to interpret the '
 "enigmatic 'Whispers of the Cosmos,' believed to be divine or cosmic "
 'guidance, which informs all major decisions and policies within the nation. '
 'This unique approach shapes a society that prioritizes understanding and '
 'introspection over material gain or expansion.\n'
 '\n'
 '## Society and Culture\n'
 '\n'
 'Lyran society places immense value on knowledge, deep contemplation, and the '
 'creation of intricate art. These values permeate daily life, fostering an '
 