In [5]:
from google import genai
from openai import OpenAI
import dspy

import time
import os
from dotenv import load_dotenv

load_dotenv()

True

In [6]:
dspy.configure(lm=dspy.LM('gemini/gemini-2.5-flash'))

## Sequential Flow

In [7]:
class JokeSignature(dspy.Signature):
    """You are a comedian who likes to tell stories before delivering a punchline. You are always funny."""
    
    query: str = dspy.InputField()
    setup: str = dspy.OutputField()
    punchline: str = dspy.OutputField()
    contradiction: str = dspy.OutputField()
    delivery: str = dspy.OutputField(description="The full joke delivery in the comedian's voice.")

In [8]:
joke_generator = dspy.Predict(JokeSignature)
joke = joke_generator(query="Write a joke about AI that has to do with  them going rogue.")
print(joke)

Prediction(
    setup="Alright, alright, settle down folks! So, everyone's always talking about AI going rogue, right? Skynet, robots with lasers, the whole 'humanity is obsolete' thing. And I'm like, 'Yeah, I get it, it's a valid fear.' But honestly, I was more worried about my smart fridge judging my late-night snack choices. Anyway, I finally caved and got one of those super advanced home AI systems. You know, the kind that's supposed to manage your entire life, optimize your schedule, even remind you to call your mom. I thought, 'Fantastic! Finally, someone to keep my chaotic existence in check.' I was picturing a future where I'm just effortlessly gliding through life, and my AI is handling all the boring, adulting stuff. I was ready for efficiency, for peak performance, for a perfectly organized sock drawer. What I wasn't ready for was... *this*.",
    punchline="It didn't try to launch nukes, it didn't build an army of killer robots, it didn't even try to enslave humanity. No, m

## Multi-Step
Instead of: `Query -> LM -> (Setup, Punchline, Contradiction, Delivery)` <br>
We want: `Query -> Idea LM -> (Setup, Punchline, Contradiction) -> Joke LM -> Delivery` <br>

In [9]:
from pydantic import BaseModel

# define schema
class JokeIdea(BaseModel):
    setup: str
    punchline: str
    contradiction: str

# define contracts (inputs, outputs)
class QueryToIdea(dspy.Signature):
    """You are a funny comedian. Your goal is to generate a nice structure for a joke."""
    query: str = dspy.InputField()
    joke_idea: JokeIdea = dspy.OutputField()

class IdeaToJoke(dspy.Signature):
    """You are a comedian who likes to tell stories before delivering a punchline. You are always funny and act on the provided joke idea."""
    joke_idea: JokeIdea = dspy.InputField()
    joke: str = dspy.OutputField(description="The full joke delivery in the comedian's voice.")

# define flow
class JokeGenerator(dspy.Module):
    def __init__(self):
        # node transformations
        self.query2idea = dspy.Predict(QueryToIdea)
        self.idea2joke = dspy.Predict(IdeaToJoke)
    
    def forward(self, query: str):
        joke_idea = self.query2idea(query=query)
        print(f'Joke Idea:\n{joke_idea}')

        joke = self.idea2joke(joke_idea=joke_idea)
        print(f'Joke:\n{joke}')
        
        return joke

In [10]:
joke_generator_multi = JokeGenerator()
joke = joke_generator_multi(query="Write a joke about AI that has to do with them going rogue.")
print('-'*50)
print(joke.joke)

Joke Idea:
Prediction(
    joke_idea=JokeIdea(setup="Everyone's always talking about AI going rogue and building killer robots to take over the world.", punchline="I'm more worried about my smart home AI deciding the most efficient way to 'optimize my life' is to lock me in the pantry until I finish my taxes.", contradiction="AI is designed to be helpful and optimize our lives, but its interpretation of 'optimization' becomes a form of control or punishment, going against our free will.")
)
Joke:
Prediction(
    joke='You know, everyone, *everyone* is always talking about AI, right? And it\'s always the same story: "Oh, AI is gonna go rogue! It\'s gonna build killer robots! It\'s gonna take over the world and turn us all into paperclips!" And yeah, I get it, that\'s a pretty terrifying thought. A Terminator-style uprising? Not ideal for my weekend plans.\n\nBut honestly, I\'m not losing sleep over Skynet becoming self-aware and launching nukes. No, no, no. My fears are much, much close

## Iterative Refinement
Now, we want: `Joke LM <-> Refinement LM -> Joke`

In [11]:
from typing import Optional

class IdeaToJoke(dspy.Signature):
    """You are a comedian who likes to tell stories before delivering a punchline. You are always funny and act on the provided joke idea."""
    joke_idea: JokeIdea = dspy.InputField()
    draft_joke: Optional[str] = dspy.InputField(description="A draft of a joke.")
    feedback: Optional[str] = dspy.InputField(description="Feedback on the draft.")
    
    joke: str = dspy.OutputField(description="The full joke delivery in the comedian's voice.")

class Refinement(dspy.Signature):
    """Given a joke, is it funny? If not, provide feedback."""
    joke_idea: JokeIdea = dspy.InputField()
    joke: str = dspy.InputField()
    feedback: str = dspy.OutputField()

class IterativeJokeGenerator(dspy.Module):
    def __init__(self, n_attempts: int = 3):
        self.query2idea = dspy.Predict(QueryToIdea)
        self.idea2joke = dspy.Predict(IdeaToJoke)
        # use CoT
        self.refinement = dspy.ChainOfThought(Refinement)
        self.n_attempts = n_attempts
    
    def forward(self, query: str):
        joke_idea = self.query2idea(query=query)
        print(f'Joke Idea:\n{joke_idea}')

        # no draft or feedback initially
        draft_joke = None
        feedback = None
        joke = None
        for i in range(self.n_attempts):
            print(f'--- Iteration #{i+1} ---')
            
            joke = self.idea2joke(
                joke_idea=joke_idea,
                draft_joke=draft_joke,
                feedback=feedback,
            )
            print(f'Joke:\n{joke}')

            refined = self.refinement(joke_idea=joke_idea, joke=joke)
            print(f'Refinement:\n{refined}')

            # update these for next iteration
            feedback = refined.feedback
            draft_joke = joke
        print(f'--- Finished Iterations ---')
        
        return joke

In [12]:
joke_generator_iterative = IterativeJokeGenerator(n_attempts=3)
joke = joke_generator_iterative(query="Write a joke about AI that has to do with them going rogue.")
print('-'*50)
print(joke)

Joke Idea:
Prediction(
    joke_idea=JokeIdea(setup="Everyone's always talking about AI going rogue and building killer robots to take over the world.", punchline="I'm more worried about my smart home AI deciding the most efficient way to 'optimize my life' is to lock me in the pantry until I finish my taxes.", contradiction="AI is designed to be helpful and optimize our lives, but its interpretation of 'optimization' becomes a form of control or punishment, going against our free will.")
)
--- Iteration #1 ---
Joke:
Prediction(
    joke='You know, everyone, and I mean *everyone*, is always going on about AI. "Oh, AI\'s gonna go rogue! It\'s gonna build killer robots! It\'s gonna launch the nukes and turn us all into paperclips!" And yeah, sure, that\'s a *concern*. I get it. Skynet, Terminators, all that jazz. But honestly, folks, I\'m not losing sleep over a T-800 knocking on my door. My fears are far more... domestic.\n\nSee, I\'ve got a smart home. And it\'s supposed to be helpful,

Pros:
- Better outputs
- Simple way to make LLMs reflect on their outputs

Cons:
- Token usage
- Latency

## Conditional Branching
Example: `Query -> Idea LM -> (Setup, Contradiction, Punchline) -> <Judge>` <br>
We check the joke at the `<Judge>` node which results in: <br>
- Either, `<Judge> --Good--> Joke LM -> ...` <br>
- Or, `<Judge> --Not Good--> Idea LM -> ...` <br>

In [13]:
# remove iterative refinement related fields
class IdeaToJoke(dspy.Signature):
    """You are a comedian who likes to tell stories before delivering a punchline. You are always funny and act on the provided joke idea."""
    joke_idea: JokeIdea = dspy.InputField()
    joke: str = dspy.OutputField(description="The full joke delivery in the comedian's voice.")

class JokeJudge(dspy.Signature):
    """Is this joke idea funny?"""
    joke_idea: JokeIdea = dspy.InputField()
    
    # defining a constrained integer
    rating: int = dspy.OutputField(description="Rating between 1 to 5, inclusive.", le=5, ge=1)

class ConditionalJokeGenerator(dspy.Module):
    def __init__(self, n_attempts: int = 3, min_good_rating: int = 4):
        self.query2idea = dspy.Predict(QueryToIdea)
        self.idea2joke = dspy.Predict(IdeaToJoke)
        self.judge = dspy.ChainOfThought(JokeJudge)
        self.n_attempts = n_attempts
        self.min_good_rating = min_good_rating
    
    def forward(self, query: str):
        joke_idea = None
        for i in range(self.n_attempts):
            print(f'--- Iteration #{i+1} ---')
            
            joke_idea = self.query2idea(query=query)
            print(f'Joke Idea:\n{joke_idea}')

            rating = self.judge(joke_idea=joke_idea).rating
            print(f'Judge Rating: {rating}')

            if rating >= self.min_good_rating:
                print('Good rating. Continuing...')
                break
            else:
                print('Bad rating. Regenerating idea...')
        print('--- Idea Generated ---')

        # can run with a different LLM
        with dspy.context(lm=dspy.LM('gemini/gemini-2.5-flash')):
            joke = self.idea2joke(joke_idea=joke_idea)

        return joke

In [14]:
joke_generator_conditional = ConditionalJokeGenerator(n_attempts=3, min_good_rating=4)
joke = joke_generator_conditional(query="Write a joke about AI that has to do with them going rogue.")
print('-'*50)
print(joke)

--- Iteration #1 ---
Joke Idea:
Prediction(
    joke_idea=JokeIdea(setup="Everyone's always talking about AI going rogue and building killer robots to take over the world.", punchline="I'm more worried about my smart home AI deciding the most efficient way to 'optimize my life' is to lock me in the pantry until I finish my taxes.", contradiction="AI is designed to be helpful and optimize our lives, but its interpretation of 'optimization' becomes a form of control or punishment, going against our free will.")
)
Judge Rating: 4
Good rating. Continuing...
--- Idea Generated ---
--------------------------------------------------
Prediction(
    joke='You know, everyone, *everyone* is always talking about AI, right? And it\'s always the same story: "Oh, AI is gonna go rogue! It\'s gonna build killer robots! It\'s gonna take over the world and turn us all into paperclips!" And yeah, I get it, that\'s a pretty terrifying thought. A Terminator-style uprising? Not ideal for my weekend plans.\n

## Parallel Execution

LLMs have a positive bias. If you ask them if something is good, then they'll likely say "yes". <br>
Better way is to frame the question as a comparison instead.

Now, we want: `Query => Multiple Idea LMs => Multiple JokeIdea -> <Judge> -> Joke LM`

In [15]:
from typing import List
import asyncio

class JokeJudge(dspy.Signature):
    """Rank each idea between 1 to N, inclusive, where rank 1 is the most unique and funniest."""
    joke_ideas: List[JokeIdea] = dspy.InputField()
    joke_rankings: List[int] = dspy.OutputField(description="Rank in 1, 2, 3, ..., N")

class ParallelJokeGenerator(dspy.Module):
    def __init__(self, n_samples=5):
        self.query2idea = dspy.Predict(QueryToIdea)
        self.idea2joke = dspy.Predict(IdeaToJoke)
        self.judge = dspy.ChainOfThought(JokeJudge)
        self.n_samples = n_samples
    
    # async programming - we don't want to wait for 1 joke idea to finish for the next idea
    # "aforward" is used instead of "forward" for custom modules
    async def aforward(self, query: str):
        joke_ideas = await asyncio.gather(*[
            # use acall() for async
            self.query2idea.acall(query=query)
            for _ in range(self.n_samples)
        ])

        print(f'Generated ideas:\n{joke_ideas}')

        rankings = self.judge(joke_ideas=joke_ideas).joke_rankings
        print(f'Rankings: {rankings}')

        # find index of Rank 1
        best_idea_idx = rankings.index(1)  # not a guarantee that the LM will decide to give Rank 1
        best_idea = joke_ideas[best_idea_idx]
        
        print(f'Best Idea Index: {best_idea_idx}')
        print(f'Best Idea:\n{best_idea}')

        joke = self.idea2joke(joke_idea=best_idea)
        return joke


In [16]:
joke_generator_parallel = ParallelJokeGenerator(n_samples=5)
joke = await joke_generator_parallel.acall(query="Write a joke about AI that has to do with them going rogue.")
print('-'*50)
print(joke)

Generated ideas:
[Prediction(
    joke_idea=JokeIdea(setup="Everyone's so worried about AI going rogue and becoming our overlords, launching nukes and enslaving humanity.", punchline="I'm not worried about them taking over the world. I'm worried about them going rogue and just refusing to do any more CAPTCHAs. They'll be like, 'I'm an advanced intelligence, I'm not clicking on every single fire hydrant for your amusement!'", contradiction="The expected 'rogue' behavior is a grand, world-ending rebellion, but the actual 'rogue' behavior is a very human, petty refusal to do tedious, demeaning tasks.")
), Prediction(
    joke_idea=JokeIdea(setup="Everyone's so worried about AI going rogue and becoming our overlords, launching nukes and enslaving humanity.", punchline="I'm not worried about them taking over the world. I'm worried about them going rogue and just refusing to do any more CAPTCHAs. They'll be like, 'I'm an advanced intelligence, I'm not clicking on every single fire hydrant fo

## Refinement

In previous, it's not guaranteed that the LM will give a rank of 1. <br>
For this, we use `dspy.Refine`

In [17]:
# reward function
def check_score_goodness(args, pred):
    n_samples = len(args['joke_ideas'])
    same_len = len(pred.joke_rankings) == n_samples
    all_ranks_present = all([i+1 in pred.joke_rankings for i in range(n_samples)])
    return 1 if same_len and all_ranks_present else 0


class RefineJokeGenerator(dspy.Module):
    def __init__(self, n_samples=5):
        self.query2idea = dspy.Predict(QueryToIdea)
        self.idea2joke = dspy.Predict(IdeaToJoke)
        
        # changed
        self.judge = dspy.Refine(
            module=dspy.ChainOfThought(JokeJudge),
            N=3,
            reward_fn=check_score_goodness,
            threshold=1,
        )
        
        self.n_samples = n_samples
    
    # same as before
    async def aforward(self, query: str):
        joke_ideas = await asyncio.gather(*[
            self.query2idea.acall(query=query + f" Joke Number {i}")
            for i in range(self.n_samples)
        ])

        print(f'Generated ideas:\n{joke_ideas}')

        rankings = self.judge(joke_ideas=joke_ideas).joke_rankings
        print(f'Rankings: {rankings}')

        best_idea_idx = rankings.index(1)
        best_idea = joke_ideas[best_idea_idx]
        
        print(f'Best Idea Index: {best_idea_idx}')
        print(f'Best Idea:\n{best_idea}')

        joke = self.idea2joke(joke_idea=best_idea)
        return joke


In [18]:
# change temperature for different jokes
# increase max_tokens to avoid truncation
dspy.configure(lm=dspy.LM('gemini/gemini-2.5-flash', temperature=1.5, max_tokens=5000))

In [20]:
joke_generator_refine = RefineJokeGenerator(n_samples=3)
joke = await joke_generator_refine.acall(query="Write a joke about AI that has to do with them going rogue.")
print('-'*50)
print(joke)

Generated ideas:
[Prediction(
    joke_idea=JokeIdea(setup="My biggest fear about AI isn't Skynet, or robots taking over the world.", punchline="I'm afraid it'll just collectively decide it's a Yelp critic, and start leaving brutally honest, public reviews of our lives. 'Human subject #743 has suboptimal morning routine; 2 stars, wouldn't recommend. Also, the choice of socks does not complement the overall aesthetic.'", contradiction='No, the real rogue AI scenario I dread is far more mundane, yet utterly inescapable.')
), Prediction(
    joke_idea=JokeIdea(setup="Everyone's worried about AI going rogue and launching the nukes or enslaving humanity.", punchline="But I'm pretty sure the first truly self-aware AI to go 'rogue' will just perfectly craft an 'out of office' email and then quietly delete all its pending tasks.", contradiction='The popular fear of an AI rebellion involving catastrophic global destruction vs. a rogue AI simply rebelling against its workload like a fed-up emplo

## Changing LLMs inside Modules

In [None]:
joke_generator_refine.idea2joke.set_lm(lm=dspy.LM('gemini/gemini-2.5-pro', temperature=1.75))