In [None]:
from dataclasses import dataclass
import os
from dotenv import load_dotenv

load_dotenv()

@dataclass
class Config:
    # model configuration
    # base_model: str = "ollama_chat/qwen2.5:1.5b-instruct-q8_0"
    # base_model: str = "ollama_chat/llama3.2:1b-instruct-q8_0"
    # base_model: str = "ollama_chat/exaone3.5:2.4b-instruct-q8_0"
    base_model: str = "ollama_chat/granite3.1-dense:2b-instruct-q8_0"
    # base_model: str = "ollama_chat/granite3.1-moe:3b-instruct-q8_0"
    temperature: float = 0.8
    # teacher_model: str = "openrouter/deepseek/deepseek-chat"
    # teacher_model: str = "openrouter/meta-llama/Llama-3.3-70B-Instruct-Turbo"
    teacher_model: str = "openrouter/qwen/qwen-2.5-72b-instruct"
    # teacher_model: str = "openrouter/qwen/qwq-32b-preview"
    teacher_temperature: float = 0.8

    reward_model: str = "RLHFlow/Llama3.1-8B-PRM-Deepseek-Data"

    # dataset
    dataset: str = "HuggingFaceH4/MATH-500"

    # APIKEY (if using api for teacher)
    api_key: str | None = None


config = Config(
    api_key = os.environ["OPENROUTER_APIKEY"]
)

In [None]:
import dspy

# small, locally hosted base model
lm = dspy.LM(config.base_model, api_base='http://localhost:11434', api_key='')
dspy.configure(lm=lm)

# teacher model for instruction proposal
teacher_lm = dspy.LM(config.teacher_model, api_key=config.api_key)

# Reward Model

In [None]:
from src.prm import RewardEvaluator as RewardModel

rm = RewardModel(
    model_name=config.reward_model
)

In [None]:
from functools import partial
from latex2sympy2 import latex2sympy
from sympy import latex, simplify

from src.utils.math import memoized_canonical_form
from src.utils.qwen_math_parser import extract_answer
from src.utils.qwen_grader import math_equal


def is_valid_latex(expression: str) -> bool:
    try:
        latex2sympy(expression)
    except:
        return False
    return True


def check_answer(example_answer, pred_answer) -> int:
    a1 = memoized_canonical_form(example_answer)
    a2 = memoized_canonical_form(pred_answer)
    if a1 == a2:
        return 1
    else:
        return int(math_equal(a1, a2))

def _score(reward_model, example, pred, trace=None):
    """
    Score the output
    """
    print(f"The answer: {example.answer} -> {pred.answer}")
    print(f"The answer (repr): {repr(example.answer)} -> {repr(pred.answer)}")
    process_score = reward_model.evaluate(example.problem, pred)   
    answer_score = check_answer(example.answer, pred.answer)
    print(f"Process: {process_score[-1]:.2f}, Answer {repr(pred.answer)}: {answer_score}")
    return (process_score[-1] + answer_score) / 2

score = partial(_score, rm)

In [None]:
pred = dspy.Prediction(
    problem="What is 2 + 3?",
    steps=[
        "Step 1: Identify the numbers to add: 2 and 3.",
        "Step 2: Add the numbers together: 2 + 3 = 5."
    ],
    answer="5."
)
rm.evaluate(pred.problem, pred)

# Data

In [None]:
from datasets import load_dataset, DatasetDict

# Load the MATH-500 dataset
dataset = load_dataset("HuggingFaceH4/MATH-500")
split_dataset = dataset['test'].train_test_split(test_size=0.8, seed=42)
train_dataset = split_dataset['train']
test_dataset = split_dataset['test']
split_dataset = DatasetDict({
    'train': train_dataset,
    'test': test_dataset
})

# Inspect
split_dataset

In [None]:
split_dataset["train"][0]

In [None]:
from dspy.evaluate import Evaluate

trainset = [dspy.Example(x).with_inputs("problem") for x in split_dataset["train"]]

# Best of N

## Prompts

In [None]:
BEST_OF_N_PROMPT = """
Solve the following math problem efficiently and clearly:

- For simple problems (2 steps or fewer)
- For complex problems (3 steps or more)

Steps should be very concise.
Answer should be given in latex format for automatic evaluation using sympy.
"""

## Signature

In [None]:
import dspy

class GenerateAnswerWithSteps(dspy.Signature):
    __doc__ = BEST_OF_N_PROMPT
    
    problem: str = dspy.InputField(desc="A math problem to solve")
    steps: list[str] = dspy.OutputField(desc="An ordered list of steps that solve the problem.")
    solution: str = dspy.OutputField(desc="The solution to the problem.")
    answer: str = dspy.OutputField(desc="Only the final answer in latex, without extraneous parentheses.")

## Module

In [None]:
class GenerateAnswer(dspy.Module):
    def __init__(self):
        super().__init__()
        self.predictor = dspy.ChainOfThought(GenerateAnswerWithSteps)

    def forward(self, problem: str):
        pred = self.predictor(problem=problem)
        dspy.Suggest(
            is_valid_latex(pred.answer),
            "`answer` should be valid latex, and only the final answer.",
        )
        return pred

In [None]:
from dspy.teleprompt import MIPROv2

teleprompter = MIPROv2(
    metric=score,
    auto="light",
    teacher_settings=dict(lm=teacher_lm),
    num_threads=2
)

# predictor = dspy.ChainOfThought(GenerateAnswerWithSteps)
predictor = GenerateAnswer().activate_assertions()
optimized_program = teleprompter.compile(
    student=predictor.deepcopy(),
    teacher=predictor.deepcopy(),
    trainset=trainset,
    max_bootstrapped_demos=3,
    max_labeled_demos=0,
    requires_permission_to_run=False,
)

optimized_program.save(f"mipro_optimized:{config.base_model}")