In [1]:
# ============================================================
# 0) IMPORTS & CONFIG
# ============================================================
import dspy
import torch
import re
from datasets import load_dataset
from sentence_transformers import SentenceTransformer

# Base model for reasoning
lm = dspy.LM(model="ollama_chat/gpt-oss:20b", api_base="http://localhost:11434", api_key="", temperature=1.0)
dspy.configure(lm=lm)


In [2]:
dataset = load_dataset("mlabonne/smoltldr")

examples = [
    dspy.Example({"prompt": x["prompt"], "gold_tldr": x["completion"]}).with_inputs("prompt")
    for x in dataset["train"]
]

total = len(examples)  # should be ~2000

train_end = int(0.80 * total)   # 80%
val_end   = int(0.90 * total)   # next 10%

train_set = examples[:train_end]
val_set   = examples[train_end:val_end]
test_set  = examples[val_end:]

print(len(train_set), len(val_set), len(test_set))


1600 200 200


In [3]:
# ============================================================
# 2) SIGNATURE + MODULE
# ============================================================
class GenerateTLDR(dspy.Signature):
    """
    Given a Reddit post that begins with 'POST:', generate a single-line,
    concise TL;DR (~25 words). Avoid line breaks.
    """
    prompt: str = dspy.InputField()
    tldr: str = dspy.OutputField()


class TLDRModule(dspy.Module):
    def __init__(self):
        self.generator = dspy.ChainOfThought(GenerateTLDR)

    def forward(self, prompt: str):
        out = self.generator(prompt=prompt)
        return dspy.Prediction(tldr=out.tldr)

program = TLDRModule()


In [4]:
# ============================================================
# 3) HELPERS (semantic model, post text extractor)
# ============================================================
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"  # avoid HF tokenizer thread issues

# Load the semantic model ONCE, globally — no lazy loading in threads
sem_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def _word_count(s: str) -> int:
    return len(s.split())

def extract_post(prompt: str):
    m = re.search(r"POST:\s*(.*?)(?:\n\s*TL;DR:|$)", prompt, flags=re.DOTALL)
    return m.group(1).strip() if m else prompt.strip()



In [5]:
# ============================================================
# 4) FEEDBACK FUNCTIONS — EXACTLY LIKE YOUR STYLE
# ============================================================

def feedback_len(pred_tldr: str, target_words=25, max_words=70):
    """
    Length feedback (normalized 0–1 score).
    """
    n = _word_count(pred_tldr)
    max_diff = max(abs(target_words - 1), abs(target_words - max_words))
    score = 1 - (abs(target_words - n) / max_diff)
    score = max(0.0, min(1.0, score))

    if score == 1.0:
        fb = f"You produced the ideal length (~{n} words)."
    elif n > target_words:
        fb = f"Your TL;DR is too long ({n} words). Aim for ~{target_words}."
    else:
        fb = f"Your TL;DR is too short ({n} words). Add key content."
    return fb, score


def feedback_style(pred_tldr: str):
    """
    Style feedback (1 if single line, else 0).
    """
    ok = ("\n" not in pred_tldr) and ("\r" not in pred_tldr)
    score = 1.0 if ok else 0.0
    if score == 1.0:
        fb = "You kept it single-line with no line breaks."
    else:
        fb = "Your TL;DR contains line breaks. It must be a single line."
    return fb, score


def feedback_sem(prompt: str, pred_tldr: str):
    """
    Semantic similarity feedback (normalized 0–1).
    """
    post = extract_post(prompt)

    # Use the globally loaded sem_model (no lazy creation in threads)
    global sem_model
    with torch.no_grad():
        e_pred = sem_model.encode(pred_tldr, convert_to_tensor=True, normalize_embeddings=True)
        e_post = sem_model.encode(post,      convert_to_tensor=True, normalize_embeddings=True)
        sim = torch.sum(e_pred * e_post).item()

    score = (sim + 1) / 2        # [-1,1] -> [0,1]
    score = max(0.0, min(1.0, score))

    if score > 0.8:
        fb = "Excellent semantic alignment — you captured the core meaning."
    elif score > 0.6:
        fb = "Moderate alignment — include 1–2 more key ideas."
    else:
        fb = "Low alignment — your TL;DR misses core points of the post."
    return fb, score

In [6]:

# ============================================================
# 5) METRIC 
# ============================================================
def metric(example, pred, trace=None, pred_name=None, pred_trace=None):
    fb_len, s_len = feedback_len(pred.tldr)
    fb_sty, s_sty = feedback_style(pred.tldr)
    fb_sem, s_sem = feedback_sem(example["prompt"], pred.tldr)

    total = (s_len + s_sty + s_sem) / 3.0
    return total


In [7]:
# ============================================================
# 6) BASELINE EVALUATION
# ============================================================
evaluate = dspy.Evaluate(
    devset=test_set,
    metric=metric,
    num_threads=16,
    display_table=True,
    display_progress=True
)

print("\n== Baseline Evaluation ==")
evaluate(program)


== Baseline Evaluation ==
Average Metric: 181.83 / 200 (90.9%): 100%|██████████| 200/200 [00:08<00:00, 23.28it/s]

2025/11/17 12:33:33 INFO dspy.evaluate.evaluate: Average Metric: 181.8347770365852 / 200 (90.9%)





Unnamed: 0,prompt,gold_tldr,tldr,metric
0,SUBREDDIT: r/relationships TITLE: Me [16 F] with my boyfriend [18 ...,I get too worried when my bf goes out. I need help so I make it s...,16‑year‑old girl in 10‑month relationship feels anxious when boyfr...,✔️ [0.897]
1,SUBREDDIT: r/AskReddit TITLE: Indirect Demoralization at work POST...,"went up to ask for raise, got shut down before even asking, now I...",User feels demoralized after overhearing boss deny raises and beli...,✔️ [0.915]
2,SUBREDDIT: r/relationships TITLE: My boyfriend [21] hasn't made me...,"Boyf hasnt made me orgasm, I've been faking the whole time. To te...","Girl hasn't orgasmed in 5 months, faking while boyfriend notices. ...",✔️ [0.914]
3,SUBREDDIT: r/relationships TITLE: I [22F] matched with an ex [23M]...,Matched w a guy I used to date on tinder. Would like to talk to h...,I matched with ex on Tinder after 4 months apart; unsure if I shou...,✔️ [0.933]
4,SUBREDDIT: r/relationship_advice TITLE: [19/m] where do I go from ...,"talking to girl, not sure if she feels the same way as I do, but ...",He's out of a friendship with a girl and wants to know if she want...,✔️ [0.893]
...,...,...,...,...
195,SUBREDDIT: r/dating_advice TITLE: Where Does a Friend-Zone/Relatio...,! How do I know if she wants to be in the friend-zone? How do I k...,"She appears to see you as a friend; be honest about feelings, resp...",✔️ [0.911]
196,SUBREDDIT: r/AskReddit TITLE: Should I consider going back to Digg...,"Reddit is fading, especially with the lag..to the Digg user base!...","Reddit used to be fast and familiar, but now feels slower; Digg is...",✔️ [0.938]
197,SUBREDDIT: r/relationships TITLE: Boyfriend [25m] broke up with me...,Boyfriend of 3 1/2 years broke up with me and I can't move out fo...,"Lost after 3.5 years breakup, still share a home, stuck for two we...",✔️ [0.916]
198,SUBREDDIT: r/tifu TITLE: TIFU by ruining my best friends collegiat...,"Roundhoused my friend in the hip, he fell and fucked up his ankle...","After a prank, I tapped my friend, causing him to fall, break his ...",✔️ [0.919]


EvaluationResult(score=90.92, results=<list of 200 results>)

In [9]:

# ============================================================
# 7) GEPA OPTIMIZER
# ============================================================
from dspy import MIPROv2

optimizer = MIPROv2(
    metric=metric,
    auto="light",
    num_threads=16,
)

optimized_program = optimizer.compile(
    program,
    trainset=train_set,
)


2025/11/17 12:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 10
minibatch: True
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 100

2025/11/17 12:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/11/17 12:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/11/17 12:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


  1%|          | 4/600 [00:12<31:21,  3.16s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 4/6


  0%|          | 2/600 [00:05<29:01,  2.91s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 5/6


  0%|          | 2/600 [00:07<37:20,  3.75s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 6/6


  0%|          | 2/600 [00:05<29:50,  2.99s/it]
2025/11/17 12:34:28 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/11/17 12:34:28 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2025/11/17 12:35:49 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...

2025/11/17 12:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/11/17 12:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Given a Reddit post that begins with 'POST:', generate a single-line,
concise TL;DR (~25 words). Avoid line breaks.

2025/11/17 12:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a high‑stakes crisis analyst tasked with preparing a quick briefing for emergency responders.  
Given a Reddit post that starts with `POST:`, write a **single‑line** TL;DR (≈25 words) prefixed with `Tldr:` that captures the essential content.  
The summary will be used to decide urgent actions, so it must be concise, clear, and contain **no line breaks**.  
Example format:  
`Tldr: [Your concise summary here]`

2025/11/17 12:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: 2: You are an expert Reddit TL;DR summarizer.  
Given a Reddit post that begi

Average Metric: 90.99 / 100 (91.0%): 100%|██████████| 100/100 [04:49<00:00,  2.90s/it]

2025/11/17 12:41:12 INFO dspy.evaluate.evaluate: Average Metric: 90.99173666018027 / 100 (91.0%)
2025/11/17 12:41:12 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 90.99

2025/11/17 12:41:12 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 13 - Minibatch ==



Average Metric: 31.60 / 35 (90.3%): 100%|██████████| 35/35 [01:24<00:00,  2.40s/it]

2025/11/17 12:42:37 INFO dspy.evaluate.evaluate: Average Metric: 31.59574863590576 / 35 (90.3%)
2025/11/17 12:42:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 90.27 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/11/17 12:42:37 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27]
2025/11/17 12:42:37 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99]
2025/11/17 12:42:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 90.99


2025/11/17 12:42:37 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 13 - Minibatch ==



Average Metric: 32.29 / 35 (92.3%): 100%|██████████| 35/35 [02:27<00:00,  4.22s/it]

2025/11/17 12:45:05 INFO dspy.evaluate.evaluate: Average Metric: 32.29252757827441 / 35 (92.3%)
2025/11/17 12:45:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.26 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/11/17 12:45:05 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26]
2025/11/17 12:45:05 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99]
2025/11/17 12:45:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 90.99


2025/11/17 12:45:05 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 13 - Minibatch ==



Average Metric: 31.98 / 35 (91.4%): 100%|██████████| 35/35 [01:19<00:00,  2.26s/it]

2025/11/17 12:46:24 INFO dspy.evaluate.evaluate: Average Metric: 31.98051998813947 / 35 (91.4%)
2025/11/17 12:46:24 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 91.37 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/11/17 12:46:24 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26, 91.37]
2025/11/17 12:46:24 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99]
2025/11/17 12:46:24 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 90.99


2025/11/17 12:46:24 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 13 - Minibatch ==



Average Metric: 31.80 / 35 (90.9%): 100%|██████████| 35/35 [01:35<00:00,  2.74s/it]

2025/11/17 12:48:00 INFO dspy.evaluate.evaluate: Average Metric: 31.8038025131932 / 35 (90.9%)
2025/11/17 12:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 90.87 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2'].
2025/11/17 12:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26, 91.37, 90.87]
2025/11/17 12:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99]
2025/11/17 12:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 90.99


2025/11/17 12:48:00 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 13 - Minibatch ==



Average Metric: 32.08 / 35 (91.7%): 100%|██████████| 35/35 [01:14<00:00,  2.14s/it]

2025/11/17 12:49:15 INFO dspy.evaluate.evaluate: Average Metric: 32.08264905291575 / 35 (91.7%)
2025/11/17 12:49:15 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 91.66 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].
2025/11/17 12:49:15 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26, 91.37, 90.87, 91.66]
2025/11/17 12:49:15 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99]
2025/11/17 12:49:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 90.99


2025/11/17 12:49:16 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 13 - Full Evaluation =====
2025/11/17 12:49:16 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 92.26) from minibatch trials...



Average Metric: 92.04 / 100 (92.0%): 100%|██████████| 100/100 [05:47<00:00,  3.48s/it]

2025/11/17 12:55:04 INFO dspy.evaluate.evaluate: Average Metric: 92.04214500531002 / 100 (92.0%)
2025/11/17 12:55:04 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 92.04
2025/11/17 12:55:04 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99, 92.04]
2025/11/17 12:55:04 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 92.04
2025/11/17 12:55:04 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/11/17 12:55:04 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 8 / 13 - Minibatch ==



Average Metric: 32.24 / 35 (92.1%): 100%|██████████| 35/35 [00:01<00:00, 32.00it/s]

2025/11/17 12:55:05 INFO dspy.evaluate.evaluate: Average Metric: 32.23888657998156 / 35 (92.1%)
2025/11/17 12:55:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.11 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/11/17 12:55:05 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26, 91.37, 90.87, 91.66, 92.11]
2025/11/17 12:55:05 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99, 92.04]
2025/11/17 12:55:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 92.04


2025/11/17 12:55:05 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 9 / 13 - Minibatch ==



Average Metric: 31.92 / 35 (91.2%): 100%|██████████| 35/35 [01:59<00:00,  3.43s/it]

2025/11/17 12:57:05 INFO dspy.evaluate.evaluate: Average Metric: 31.915099740911415 / 35 (91.2%)
2025/11/17 12:57:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 91.19 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/11/17 12:57:05 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26, 91.37, 90.87, 91.66, 92.11, 91.19]
2025/11/17 12:57:05 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99, 92.04]
2025/11/17 12:57:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 92.04


2025/11/17 12:57:05 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 10 / 13 - Minibatch ==



Average Metric: 32.02 / 35 (91.5%): 100%|██████████| 35/35 [01:49<00:00,  3.12s/it]

2025/11/17 12:58:55 INFO dspy.evaluate.evaluate: Average Metric: 32.024137846628825 / 35 (91.5%)
2025/11/17 12:58:55 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 91.5 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4'].
2025/11/17 12:58:55 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26, 91.37, 90.87, 91.66, 92.11, 91.19, 91.5]
2025/11/17 12:58:55 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99, 92.04]
2025/11/17 12:58:55 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 92.04


2025/11/17 12:58:55 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 11 / 13 - Minibatch ==



Average Metric: 31.77 / 35 (90.8%): 100%|██████████| 35/35 [01:20<00:00,  2.31s/it]

2025/11/17 13:00:16 INFO dspy.evaluate.evaluate: Average Metric: 31.76568317402292 / 35 (90.8%)
2025/11/17 13:00:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 90.76 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 3'].
2025/11/17 13:00:16 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26, 91.37, 90.87, 91.66, 92.11, 91.19, 91.5, 90.76]
2025/11/17 13:00:16 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99, 92.04]
2025/11/17 13:00:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 92.04


2025/11/17 13:00:16 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 12 / 13 - Minibatch ==



Average Metric: 32.30 / 35 (92.3%): 100%|██████████| 35/35 [00:01<00:00, 31.82it/s]

2025/11/17 13:00:18 INFO dspy.evaluate.evaluate: Average Metric: 32.301840177178384 / 35 (92.3%)
2025/11/17 13:00:18 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.29 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/11/17 13:00:18 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [90.27, 92.26, 91.37, 90.87, 91.66, 92.11, 91.19, 91.5, 90.76, 92.29]
2025/11/17 13:00:18 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99, 92.04]
2025/11/17 13:00:18 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 92.04


2025/11/17 13:00:18 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 13 - Full Evaluation =====
2025/11/17 13:00:18 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 91.66) from minibatch trials...



Average Metric: 91.64 / 100 (91.6%): 100%|██████████| 100/100 [02:39<00:00,  1.59s/it]

2025/11/17 13:02:57 INFO dspy.evaluate.evaluate: Average Metric: 91.64462271288589 / 100 (91.6%)
2025/11/17 13:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [90.99, 92.04, 91.64]
2025/11/17 13:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 92.04
2025/11/17 13:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/11/17 13:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 92.04!





In [10]:
# ============================================================
# 8) SEE OPTIMIZED PROMPTS
# ============================================================
for name, pred in optimized_program.named_predictors():
    print("================================")
    print(f"Predictor: {name}")
    print("================================")
    print("Prompt:")
    print(pred.signature.instructions)
    print("*********************************")


Predictor: generator.predict
Prompt:
You are an expert Reddit TL;DR summarizer.  
Given a Reddit post that begins with `POST:`, first think step‑by‑step about the key points, then produce a concise single‑line summary (~25 words) that captures the gist, omits subreddit headers, titles, and tags, and ends with any direct request or question if present.  
Output two sections:  
`Reasoning:` followed by your chain of thought.  
`TL;DR:` followed by the final one‑sentence summary, with no line breaks.
*********************************


In [11]:
# ============================================================
# 9) FINAL POST-GEPA EVALUATION
# ============================================================
print("\n== GEPA-Optimized Evaluation ==")
evaluate(optimized_program)


== GEPA-Optimized Evaluation ==
Average Metric: 39.32 / 43 (91.4%):  22%|██▏       | 43/200 [03:12<10:44,  4.11s/it]

2025/11/17 15:27:26 ERROR dspy.utils.parallelizer: Error for Example({'prompt': "SUBREDDIT: r/relationship_advice\n\nTITLE: [22M] Ladies (and men too) of R_A, How do you feel about grand romantic gestures? I'm thinking this could backfire.\n\nPOST: Recently after a bunch of my co-workers hung out at my apartment for a day, I discovered I have feelings for one of them [21F]. We've talked here and there for about a semester at work and we have a lot in common. Besides the aforementioned hang out, we've never hung out outside of work. \n\nGrand romantic gesture:\n\nValentine's day is coming up, as if you didn't know, and I've written a short song asking her to come over and watch a movie for V-Day. My plan was to record a video of me playing the song and send it to her on FB (probably in a message to avoid any embarrassment). I don't profess my love for her in or anything. I just made some cute references to past conversations. It's pretty light-hearted and tongue-in-cheek.\n\nNow here's 

Average Metric: 182.26 / 199 (91.6%): 100%|██████████| 200/200 [13:57<00:00,  4.19s/it]

2025/11/17 15:38:08 INFO dspy.evaluate.evaluate: Average Metric: 182.25725337587022 / 200 (91.1%)





Unnamed: 0,prompt,gold_tldr,tldr,metric
0,SUBREDDIT: r/relationships TITLE: Me [16 F] with my boyfriend [18 ...,I get too worried when my bf goes out. I need help so I make it s...,"TL;DR: Feeling anxious and overly attached, I can’t sleep when my ...",✔️ [0.932]
1,SUBREDDIT: r/AskReddit TITLE: Indirect Demoralization at work POST...,"went up to ask for raise, got shut down before even asking, now I...","Having heard my boss dismiss raises for extra responsibility, I’m ...",✔️ [0.929]
2,SUBREDDIT: r/relationships TITLE: My boyfriend [21] hasn't made me...,"Boyf hasnt made me orgasm, I've been faking the whole time. To te...","After six months of sex, I’ve never orgasmed and have been faking,...",✔️ [0.951]
3,SUBREDDIT: r/relationships TITLE: I [22F] matched with an ex [23M]...,Matched w a guy I used to date on tinder. Would like to talk to h...,Got a mutual Tinder match with her 23‑year‑old ex after several mo...,✔️ [0.919]
4,SUBREDDIT: r/relationship_advice TITLE: [19/m] where do I go from ...,"talking to girl, not sure if she feels the same way as I do, but ...","He’s 19, hung out for a month, feels like he’s starting to like an...",✔️ [0.886]
...,...,...,...,...
195,SUBREDDIT: r/dating_advice TITLE: Where Does a Friend-Zone/Relatio...,! How do I know if she wants to be in the friend-zone? How do I k...,"He’s 17, close to a 16‑year‑old friend who shows no interest; she ...",✔️ [0.895]
196,SUBREDDIT: r/AskReddit TITLE: Should I consider going back to Digg...,"Reddit is fading, especially with the lag..to the Digg user base!...","Reddit used to be fast and familiar, but has slowed and lost appea...",✔️ [0.929]
197,SUBREDDIT: r/relationships TITLE: Boyfriend [25m] broke up with me...,Boyfriend of 3 1/2 years broke up with me and I can't move out fo...,"After a 3½‑year relationship ends, she’s devastated, can't sleep o...",✔️ [0.897]
198,SUBREDDIT: r/tifu TITLE: TIFU by ruining my best friends collegiat...,"Roundhoused my friend in the hip, he fell and fucked up his ankle...","I punched my friend in a drunken hallway prank, causing him to dro...",✔️ [0.918]


EvaluationResult(score=91.13, results=<list of 200 results>)