In [44]:
import dspy
from typing import List
from pydantic import BaseModel

class ConversationAnalysis(dspy.Signature):
    """Analyze conversation to identify learning patterns and user confusion signals"""
    conversation = dspy.InputField()
    user_confusion_signals = dspy.OutputField(desc="questions showing confusion or deep interest")
    concept_hierarchy = dspy.OutputField(desc="main concepts vs supporting details")
    follow_up_intensity = dspy.OutputField(desc="which concepts got most follow-up attention")

class ConceptPrioritization(dspy.Signature):
    """Rank concepts by learning importance based on conversation patterns"""
    concepts = dspy.InputField()
    confusion_signals = dspy.InputField()
    follow_ups = dspy.InputField()
    prioritized_concepts = dspy.OutputField(desc="concepts ranked by learning priority")


In [62]:
# construct the final signature
class Flashcard(BaseModel):
    """Structure for a complete flashcard with three components"""
    front: str
    back: str
    explanation: str

class FlashcardFront(dspy.Signature):
    """Generate concise, clear flashcard fronts (questions or concepts)"""
    concept = dspy.InputField()
    context = dspy.InputField()
    front = dspy.OutputField(desc="concise concept title or focused question")

class FlashcardBack(dspy.Signature):
    """Generate concise answers that directly address the flashcard front"""
    front = dspy.InputField()
    concept_details = dspy.InputField()
    back = dspy.OutputField(desc="brief, direct answer to the front question")

class FlashcardExplanation(dspy.Signature):
    """Generate detailed explanations with examples to reinforce learning"""
    front = dspy.InputField()
    back = dspy.InputField()
    concept_context = dspy.InputField()
    explanation = dspy.OutputField(desc="detailed explanation with examples that deepens understanding")

#  Both the class docstring and the desc parameter in output fields are incorporated into the prompt that guides the LLM's generation.
class FlashcardGeneration(dspy.Signature):
    """Generate flashcards following spaced repetition best practices.

    Each flashcard should have:
    1. Front: A concise concept title or focused question
    2. Back: A brief, direct answer that addresses the front
    3. Explanation: A detailed explanation with examples to deepen understanding. Generate detailed explanations with examples to reinforce learning. It can be long (4-5 sentences) as well.
    """
    prioritized_concepts = dspy.InputField()
    conversation_context = dspy.InputField()
    flashcards: List[Flashcard] = dspy.OutputField(
        desc="List of Flashcard models, each with front (concise concept/question), back (direct answer), and explanation (detailed with examples)"
    )

In [63]:

class DistinctnessChecker(dspy.Signature):
    """Ensure flashcards cover different concepts without overlap"""
    flashcards = dspy.InputField()
    distinct_flashcards = dspy.OutputField(desc="flashcards with overlapping content merged or removed")

class RepetitionRemover(dspy.Signature):
    """Remove repetitive phrasing between front and back of cards"""
    raw_flashcard = dspy.InputField()
    clean_flashcard = dspy.OutputField(desc="front/back with no repeated phrasing")

class NeutralityEnforcer(dspy.Signature):
    """Remove references to 'user', 'AI', 'conversation' from flashcards"""
    flashcard_with_references = dspy.InputField()
    neutral_flashcard = dspy.OutputField(desc="factual flashcard without conversational references")

class CardComplexityOptimizer(dspy.Signature):
    """Adjust flashcard complexity based on user's demonstrated understanding level"""
    concept = dspy.InputField()
    user_comprehension_level = dspy.InputField()
    optimal_flashcard = dspy.OutputField(desc="appropriately challenging flashcard")

In [64]:
import dspy
from dotenv import load_dotenv
import os

load_dotenv()
# TODO: build a timeline of how prompts evolve over time to demonstrate the value of DSPy
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
lm = dspy.LM(
    model="openrouter/google/gemini-2.0-flash-001",
    api_base="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)
dspy.configure(lm=lm)

# Define your modules
class ConversationToFlashcards(dspy.Module):
    def __init__(self):
        # Core pipeline
        self.analyze = dspy.ChainOfThought(ConversationAnalysis)
        self.prioritize = dspy.ChainOfThought(ConceptPrioritization)
        self.generate = dspy.ChainOfThought(FlashcardGeneration)

        # Quality control
        # self.check_distinct = dspy.ChainOfThought(DistinctnessChecker)
        # self.remove_repetition = dspy.ChainOfThought(RepetitionRemover)
        # self.enforce_neutrality = dspy.ChainOfThought(NeutralityEnforcer)

    def forward(self, conversation, **kwargs) -> List[Flashcard]:
        # 1. Analyze the conversation
        analysis = self.analyze(conversation=conversation)

        # 2. Rank/prioritize concepts
        priorities = self.prioritize(
            concepts=analysis.concept_hierarchy,
            confusion_signals=analysis.user_confusion_signals,
            follow_ups=analysis.follow_up_intensity
        )

        # 3. Generate the flashcards (returns an object whose .flashcards is List[Flashcard])
        generated = self.generate(
            prioritized_concepts=priorities.prioritized_concepts,
            conversation_context=conversation
        )
        # # Apply quality controls
        # distinct_cards = self.check_distinct(flashcards=raw_cards.flashcards)

        # # Process each card through quality pipeline
        # final_cards = []
        # for card in distinct_cards.distinct_flashcards:
        #     no_repetition = self.remove_repetition(raw_flashcard=card)
        #     neutral_card = self.enforce_neutrality(flashcard_with_references=no_repetition.clean_flashcard)
        #     final_cards.append(neutral_card.neutral_flashcard)

        return generated.flashcards

In [65]:
import pickle
import dspy

# Load the conversation object from the pickle file
file_path = r'C:\Users\lokar\flashcardgen\fcg\development\conversation.pkl'
with open(file_path, 'rb') as f:
    conversation = pickle.load(f)

# Test individual signatures
analyzer = dspy.ChainOfThought(ConversationAnalysis)
result = analyzer(conversation=conversation)


In [66]:
conversation

[ChatMessage(role=<ChatRole.USER: 'user'>, content='GEPA algorithm,'), ChatMessage(role=<ChatRole.ASSISTANT: 'assistant'>, content='Here’s a practical breakdown of the GEPA (Genetic-Pareto) algorithm:\n\nWhat is GEPA?\n\nGEPA, short for Genetic-Pareto, is a prompt-optimization method designed to improve how large language models (LLMs) solve tasks. Unlike conventional reinforcement learning (RL) that relies on scalar rewards and requires many expensive rollouts, GEPA uses natural language reflections and evolutionary ideas to make optimization more efficient and transparent \narXiv\nMedium\n.\n\nHow GEPA Works\n\nGEPA’s mechanics rely on three core ideas:\n\n1. Reflective Prompt Mutation\n\nGEPA collects execution traces—reasoning steps, tool calls, compiler error messages, evaluation logs—from the LLM system.\n\nAn LLM is then used to reflect on these traces, identify what failed or succeeded, and suggest improved prompt modifications in natural language \nMedium\narXiv\ndspy.ai\n.\n\

In [67]:
# Print the results of intermediate signals
print(result.user_confusion_signals)
print(result.concept_hierarchy)

"so the concepts of genetic programming applied to llm optimisation

I know about genetic algorithms applied to hyperparameter searches 

what are the similiraties and differences?"
Main concept: GEPA (Genetic-Pareto Algorithm)
    - Subconcept: Genetic programming applied to LLM optimization
    - Subconcept: Similarities to traditional Genetic Algorithms (GAs)
        - Supporting detail: Evolutionary principle
            - Candidate representation
            - Fitness evaluation
            - Selection
            - Mutation & Crossover
            - Generation iteration
    - Subconcept: Differences from traditional GAs
        - Supporting detail: Representation of candidates
        - Supporting detail: Mutation/Crossover
        - Supporting detail: Fitness evaluation
        - Supporting detail: Diversity management
        - Supporting detail: Exploration vs Exploitation
        - Supporting detail: Feedback


In [68]:

import pickle
import dspy

# Load the conversation object from the pickle file
file_path = r'C:\Users\lokar\flashcardgen\fcg\development\conversation.pkl'
with open(file_path, 'rb') as f:
    conversation = pickle.load(f)

# Test individual signatures
ctf = ConversationToFlashcards()
result = ctf(conversation=conversation)

# Print the results
print(result)

[Flashcard(front='What is the core similarity between traditional GAs and GEPA?', back='Both share the same evolutionary principle.', explanation='Both traditional GAs and GEPA are based on the same core principles of evolutionary algorithms: candidate representation, fitness evaluation, selection, mutation & crossover, and generation iteration. They both iteratively improve a population of candidates based on their performance.'), Flashcard(front='How are candidates represented in traditional GAs vs. GEPA?', back='Traditional GAs use numeric vectors; GEPA uses natural language prompts or structured prompt modules.', explanation='In traditional GAs for hyperparameter optimization, candidates are represented as numeric vectors of hyperparameters. In GEPA, candidates are natural language prompts or structured prompt modules that are designed to elicit desired behavior from an LLM. This difference in representation leads to different mutation and crossover strategies.'), Flashcard(front='

In [69]:
result[1].front

'How are candidates represented in traditional GAs vs. GEPA?'

In [70]:
result[1].back

'Traditional GAs use numeric vectors; GEPA uses natural language prompts or structured prompt modules.'

In [71]:
result[1].explanation

'In traditional GAs for hyperparameter optimization, candidates are represented as numeric vectors of hyperparameters. In GEPA, candidates are natural language prompts or structured prompt modules that are designed to elicit desired behavior from an LLM. This difference in representation leads to different mutation and crossover strategies.'

# Notes

- if each card is verified, too many llm calls add to card delay generation
- received junk output when I let all the verification calls run for some reason (talked about mandelbrots?? instead of gepa)
- Both the class docstring and the desc parameter in output fields are incorporated into the prompt that guides the LLM's generation.
