In [1]:
from pathlib import Path
from datasets import load_dataset, load_from_disk, Dataset
import hydra
from omegaconf import OmegaConf
from hydra import compose, initialize
import numpy as np

from evaluation.eval_utils import RewardEvaluator


# Point to your conf/ directory (where config.yaml lives)
with initialize(config_path="../config", version_base=None):
    cfg = compose(config_name="defaults")  # "config.yaml" is your default


test = load_from_disk("/ltstorage/home/strich/STASC/multirun/2025-08-21/16-26-23/2/iteration_0")
reward_function = RewardEvaluator(cfg)


In [2]:
df = test.to_pandas()
df["messages"] = df["messages"].apply(str)
df["messages_len"] = df["messages"].apply(len)

In [3]:
df

Unnamed: 0,question_id,question_text,reference,initial_generation,star_correction_0,messages,init_reward,corr_reward,__index_level_0__,messages_len
0,5a90d25655429916514e7585_gen,"Who is younger, Martin Luther King III or his ...",[Dexter],step 1: identify martin luther king iii and de...,step-by-step reasoning:\n1. martin luther king...,[{'content': 'You are a helpful reasoning assi...,True,True,9,2135
1,5a8492ab5542992a431d1a5b_gen,"Which plane has seen more combat, the Northrop...",[The Northrop P-61 Black Widow],step 1: identify the planes\n- northrop f-15 r...,step-by-step reasoning:\n1. identify the two p...,[{'content': 'You are a helpful reasoning assi...,True,True,10,3105
2,5a7e22ed5542995ed0d166a7_gen,"What writer is older, Bernard Malamud or Lois ...",[Bernard Malamud],step 1: identify the writers\nbernard malamud ...,step-by-step reasoning:\n1. identify the two w...,[{'content': 'You are a helpful reasoning assi...,True,True,14,1858
3,5a8f64e0554299458435d60b_gen,"Which documentary film was released first, Chi...",[Chicken Ranch],step 1: identify the films\nchicken ranch and ...,step-by-step reasoning:\n1. identify the two f...,[{'content': 'You are a helpful reasoning assi...,True,True,25,2072
4,5a887771554299206df2b294_gen,"Which artist was Italian, Sergio Leone or Jon ...",[Sergio Leone],step 1: identify the artists mentioned in the ...,step-by-step reasoning:\n1. identify the two a...,[{'content': 'You are a helpful reasoning assi...,True,True,36,2014
...,...,...,...,...,...,...,...,...,...,...
240,5ac1928655429964131be234_gen,What national team has the brother of Arturas ...,[Lithuanian],step 1: identify arturas javtokas.\narturas ja...,step-by-step reasoning:\n1. the initial answer...,[{'content': 'You are a helpful reasoning assi...,True,True,1484,2319
241,5ac07fff554299294b219006_gen,What sport does Simon Aspelin and Thomaz Koch ...,[tennis],step 1: identify simon aspelin and thomaz koch...,step-by-step reasoning:\n1. identify the quest...,[{'content': 'You are a helpful reasoning assi...,True,True,1487,2307
242,5a8a17685542993b751ca9f3_gen,Which rapper who worked with a star of the fil...,[Eminem],step 1: identify the star of the film 8 mile.\...,step-by-step reasoning:\n1. identify the rappe...,[{'content': 'You are a helpful reasoning assi...,True,True,1488,2263
243,5a8db19d5542994ba4e3dd00_gen,Are Local H and For Against both from the Unit...,[yes],step 1: identify local h and for against as po...,step-by-step reasoning:\n1. the question asks ...,[{'content': 'You are a helpful reasoning assi...,True,True,1489,1887


In [3]:



def filter_corrections(
    dataset,
    reward_function,
    prompt_builder,
    question_col="question",
    reference_col="reference",
    context_col="",
    init_answer_col="initial_answer",
    corr_answer_col="correction_answer",
    id_col="id",
    mode="improving",
):
    """Filter corrections based on reward function, using DataFrame ops."""
    # Convert dataset -> DataFrame
    df: DataFrame = dataset.to_pandas()  # type: ignore
    # Ensure reference_col is a single string, warn if not

    for idx, row in df.iterrows():
        row[reference_col] = list(row[reference_col]) if isinstance(row[reference_col], np.ndarray) else row[reference_col]

    # Compute rewards
    df["init_reward"] = df.apply(
        lambda r: reward_function(
            ground_truth=r[reference_col],
            model_answer=r[init_answer_col],
        ),
        axis=1,
    )
    df["corr_reward"] = df.apply(
        lambda r: reward_function(
            ground_truth=r[reference_col],
            model_answer=r[corr_answer_col],
        ),
        axis=1,
    )

    # Apply filtering logic
    if mode == "non_decreasing":
        # both correct
        df["use_sample"] = df["corr_reward"] & df["init_reward"]
    elif mode == "improving":
        # correction correct but init not
        df["use_sample"] = df["corr_reward"] & (~df["init_reward"])
    else:
        raise ValueError(f"Unknown mode: {mode}")
    df["use_sample"] = df["use_sample"].astype(bool)

    # Filter rows
    filtered_df = df[df["use_sample"]].copy()

    # Build messages for training
    filtered_df["messages"] = filtered_df.apply(
        lambda r: prompt_builder.build_correction_messages_with_final_answer(
            r[question_col],
            r[init_answer_col],
            r[corr_answer_col],
            r.get(context_col, [""]),
        ),
        axis=1,
    )

    # Append "_gen" to ids
    filtered_df[id_col] = filtered_df[id_col].astype(str) + "_gen"

    print(f"[INFO] Filtered {len(filtered_df)} corrections in mode={mode}")

    # Convert back to HF dataset
    return Dataset.from_pandas(
        filtered_df[
            [id_col, question_col, reference_col, init_answer_col, corr_answer_col, "messages"]
        ].reset_index(drop=True)
    )
    


Unnamed: 0,question_id,question_text,reference,initial_generation,star_correction_0,messages
0,5ac07327554299012d1db5f2_gen,Who is Colin Kaepernick and what is his prefer...,[an American football quarterback],step 1: identify colin kaepernick\ncolin kaepe...,step-by-step reasoning:\n1. identify the key i...,[{'content': 'You are a helpful reasoning assi...
1,5a76a1a35542993735360122_gen,What is the nickname of a professional German ...,[The BFG],step 1: identify the german footballer.\nthe q...,step-by-step reasoning:\n1. identify the germa...,[{'content': 'You are a helpful reasoning assi...
2,5abe54585542991f66106148_gen,What president was assassinated and had Worthy...,[James Abram Garfield],step 1: identify the president who was assassi...,step-by-step reasoning:\n1. identify the presi...,[{'content': 'You are a helpful reasoning assi...
3,5abd848455429924427fd03a_gen,Are Edward F. Cline and Floyd Mutrux both scre...,[yes],step 1: identify edward f. cline\nedward f. cl...,step-by-step reasoning:\n1. edward f. cline is...,[{'content': 'You are a helpful reasoning assi...
4,5abe0e4e55429976d4830a62_gen,Tim O'Kelly was cast in a film because he rese...,[Texas Tower Sniper],step 1: identify the film and the character ti...,step-by-step reasoning:\n1. the question asks ...,[{'content': 'You are a helpful reasoning assi...
...,...,...,...,...,...,...
1495,5ae121315542997b2ef7d0ee_gen,Daburiyya is 8km east of a city where where pe...,[30.9%],step 1: identify the location of daburiyya.\nd...,step-by-step reasoning:\n1. identify the key e...,[{'content': 'You are a helpful reasoning assi...
1496,5ae056c755429945ae959319_gen,James Garner had a role in the 2004 romantic d...,[Nick Cassavetes],step 1: identify the movie james garner was in...,step-by-step reasoning:\n1. the question asks ...,[{'content': 'You are a helpful reasoning assi...
1497,5ade1f0555429975fa854e74_gen,"Which actor starred in Angels in the Outfield,...",[Danny Glover],step 1: identify the films mentioned in the qu...,step-by-step reasoning:\n1. identify the films...,[{'content': 'You are a helpful reasoning assi...
1498,5a8b2a0355429971feec465b_gen,How much longer after Sambou Yatabaré was born...,[1899],step 1: determine the birth year of sambou yat...,step-by-step reasoning:\n1. the question asks ...,[{'content': 'You are a helpful reasoning assi...
