In [1]:
timestamp = "2025-09-13_1725"
timestamp_random_forest = "2025-09-14_1008"

In [2]:
from src.model.reg import load_all_summaries_with_pvalues
from src.config import REGRESSION_TARGETS, DATASETS,REGRESSION_RESULTS_DIR
import pandas as pd
summaries = load_all_summaries_with_pvalues(REGRESSION_RESULTS_DIR, timestamp, REGRESSION_TARGETS, DATASETS,
                                                task="regression", metric="mae")
summaries_rf = load_all_summaries_with_pvalues(REGRESSION_RESULTS_DIR, timestamp_random_forest, REGRESSION_TARGETS, DATASETS,
                                                task="regression", metric="mae")


In [3]:
def combine_summaries(dict1: dict, dict2: dict) -> dict:
    """
    Combina dos diccionarios de diccionarios con DataFrames (resultados de load_all_summaries).

    Si un target/dataset existe en ambos, concatena los DataFrames.
    Si existe solo en uno, lo toma tal cual.

    Args:
        dict1 (dict): Primer diccionario de resultados.
        dict2 (dict): Segundo diccionario de resultados.

    Returns:
        dict: Diccionario combinado con todos los resultados.
    """
    combined = {}

    all_targets = set(dict1.keys())
    for target in all_targets:
        combined[target] = {}
        datasets1 = dict1.get(target)
        datasets2 = dict2.get(target)
        all_datasets = set(datasets1.keys())

        for dataset in all_datasets:
            df1 = datasets1.get(dataset)
            df2 = datasets2.get(dataset)

            if df1 is not None and df2 is not None:
                combined[target][dataset] = pd.concat([df1, df2], ignore_index=True)
            else:
                raise ValueError(f"Dataset {dataset} for target {target} is missing in one of the dictionaries.")
    return combined

In [4]:
all_results = combine_summaries(summaries, summaries_rf)
all_results

{'backward_digit_span_raw': {'non_digital_tests':                    model         r2        mse       mae  \
  0         DummyRegressor  -0.027585   0.678739  0.606812   
  1       LinearRegression -15.385735  10.823083  0.597732   
  2                  Ridge -15.699682  11.030451  0.734318   
  3                  Lasso -13.375795   9.495481  0.718206   
  4           XGBRegressor   0.787385   0.140436  0.190235   
  5             ElasticNet -13.504312   9.580368  0.748298   
  6                    SVR  -2.282455   2.168123  0.401358   
  7  RandomForestRegressor   0.586710   0.272986  0.392331   
  
                                                y_true  \
  0  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  1  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  2  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  3  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  4  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  5  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.

In [5]:
def filter_by_datasets(results: dict, dataset_names: list) -> dict:
    """
    Filtra un diccionario de resultados (target -> dataset -> DataFrame)
    y devuelve solo los resultados para los datasets especificados.

    Args:
        results (dict): Diccionario combinado de resultados (target -> dataset -> DataFrame).
        dataset_names (list[str]): Lista de datasets a filtrar.

    Returns:
        dict: Diccionario { target : { dataset : DataFrame } } con solo los datasets pedidos.
    """
    filtered = {}
    for target, datasets in results.items():
        subdict = {}
        for ds in dataset_names:
            if ds in datasets and datasets[ds] is not None:
                subdict[ds] = datasets[ds]
        if subdict:  # solo guardar si tiene al menos un dataset válido
            filtered[target] = subdict
    return filtered

In [6]:
all_results_filtered = filter_by_datasets(all_results, ["demographic+digital", "digital_test"])
all_results_filtered

{'backward_digit_span_raw': {'demographic+digital':                    model        r2       mse       mae  \
  0         DummyRegressor -0.027585  0.678739  0.606812   
  1       LinearRegression -0.044133  0.689669  0.688983   
  2                  Ridge  0.038620  0.635010  0.644920   
  3                  Lasso -0.107976  0.731839  0.684075   
  4           XGBRegressor  0.133847  0.572110  0.625514   
  5             ElasticNet -0.023555  0.676077  0.669066   
  6                    SVR -0.010139  0.667216  0.624503   
  7  RandomForestRegressor  0.163640  0.552431  0.606109   
  
                                                y_true  \
  0  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  1  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  2  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  3  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  4  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
  5  [5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...   
 

In [7]:
all_results_filtered.get("mmse").get("digital_test")

Unnamed: 0,model,r2,mse,mae,y_true,y_pred,dataset,permutation_test_p_value
0,DummyRegressor,-0.027585,3.835616,1.596446,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[27.671232876712327, 27.671232876712327, 27.71...",digital_test,1.0
1,LinearRegression,-0.497065,5.588024,1.840739,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[28.423861702130058, 28.47881409535952, 27.122...",digital_test,0.317682
2,Ridge,-0.131366,4.222995,1.645542,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[27.663847283662584, 27.664647033582153, 27.68...",digital_test,0.998002
3,Lasso,-0.027585,3.835616,1.596446,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[27.671232876712327, 27.671232876712327, 27.71...",digital_test,1.0
4,XGBRegressor,-0.143099,4.26679,1.578685,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[27.69255256652832, 29.303041458129883, 26.725...",digital_test,0.04995
5,ElasticNet,-0.077602,4.022314,1.631149,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[27.671232876712327, 27.671232876712327, 27.71...",digital_test,0.999001
6,SVR,-0.087197,4.058127,1.569455,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[27.9843662495782, 28.002583807261733, 27.9830...",digital_test,0.798202
7,RandomForestRegressor,-0.078381,4.02522,1.570883,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[27.16240636339065, 28.24875636705529, 27.1007...",digital_test,0.183816


In [8]:
import pandas as pd
from typing import Dict, Any, Tuple, Optional

def _infer_higher_is_better(metric: str) -> bool:
    """
    Heurística para decidir si 'más alto es mejor' según el nombre de la métrica.
    Ajustá esta lista a tus nombres reales de columnas.
    """
    metric_lower = metric.lower()
    higher_metrics = ["r2", "roc_auc", "auc", "accuracy", "balanced_accuracy",
                      "f1", "f1_macro", "f1_weighted", "average_precision", "ap"]
    lower_metrics  = ["mae", "mse", "rmse", "mape", "logloss", "log_loss"]

    if any(k in metric_lower for k in higher_metrics):
        return True
    if any(k in metric_lower for k in lower_metrics):
        return False
    # Por defecto, asumimos que más alto es mejor
    return True

def best_models_by_target(
    results: Dict[str, Dict[str, pd.DataFrame]],
    metric: str,
    higher_is_better: Optional[bool] = None,
    model_col: str = "model",
) -> Tuple[pd.DataFrame, Dict[str, Dict[str, Any]]]:
    """
    Para cada target, elige el mejor modelo entre todos los datasets según 'metric'.
    Devuelve:
      - un DataFrame con una fila por target (target, dataset, model, metric, ...),
      - y un dict { target : { 'dataset': ..., 'model': ..., 'row': dict_con_todas_las_cols } }.

    Args:
        results: dict anidado { target : { dataset : DataFrame } }.
        metric: nombre de la columna de métrica en summary.csv (p.ej. 'mae', 'rmse', 'r2', 'roc_auc').
        higher_is_better: si None, se infiere por el nombre de la métrica.
        model_col: nombre de la columna que identifica el modelo (por defecto 'model').

    Returns:
        (best_df, best_dict)
    """
    if higher_is_better is None:
        higher_is_better = _infer_higher_is_better(metric)

    rows = []
    for target, datasets in results.items():
        frames = []
        for ds_name, df in datasets.items():
            if df is None or not isinstance(df, pd.DataFrame) or df.empty:
                continue
            if metric not in df.columns:
                # si el summary de ese dataset no tiene la métrica, lo salteamos
                continue
            tmp = df.copy()
            tmp["target"] = target
            tmp["dataset"] = ds_name
            frames.append(tmp)

        if not frames:
            # no hay datos para este target
            continue

        pool = pd.concat(frames, ignore_index=True)

        # filtramos filas sin la métrica
        pool = pool.dropna(subset=[metric])
        if pool.empty:
            continue

        # elegimos la mejor fila según la métrica
        idx = pool[metric].idxmax() if higher_is_better else pool[metric].idxmin()
        best_row = pool.loc[idx]

        rows.append(best_row)

    if not rows:
        # No se encontró nada en ningún target
        return pd.DataFrame(), {}

    best_df = pd.DataFrame(rows).reset_index(drop=True)

    # Armamos dict amigable por target
    best_dict: Dict[str, Dict[str, Any]] = {}
    for _, r in best_df.iterrows():
        tgt = r["target"]
        best_dict[tgt] = {
            "dataset": r["dataset"],
            "model": r.get(model_col, None),
            "row": r.to_dict()
        }

    # Dejamos columnas clave adelante, si existen
    front_cols = [c for c in ["target", "dataset", model_col, metric] if c in best_df.columns]
    other_cols = [c for c in best_df.columns if c not in front_cols]
    best_df = best_df[front_cols + other_cols]

    return best_df, best_dict

In [9]:
best_df , _ = best_models_by_target(all_results_filtered, metric="mae")

best_df


Unnamed: 0,target,dataset,model,mae,r2,mse,y_true,y_pred,permutation_test_p_value
0,backward_digit_span_raw,digital_test,SVR,0.587023,-0.003764,0.663005,"[5.0, 4.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, ...","[4.000794145887611, 3.9997757054130822, 4.0002...",0.418581
1,mmse,demographic+digital,RandomForestRegressor,1.454819,0.07172,3.464945,"[28.0, 28.0, 25.0, 29.0, 26.0, 30.0, 30.0, 27....","[27.360607142857145, 28.192060019602938, 26.57...",0.003996
2,digit_symbol_raw,demographic+digital,Lasso,9.472263,0.177604,151.237433,"[36.0, 45.0, 32.0, 54.0, 65.0, 86.0, 78.0, 32....","[36.907681420295816, 45.7631190883819, 36.9862...",0.000999
3,forward_digit_span_raw,digital_test,Ridge,0.834254,-0.028182,1.116806,"[6.0, 5.0, 5.0, 7.0, 6.0, 5.0, 6.0, 5.0, 5.0, ...","[5.528183025296401, 5.613868881627409, 5.07314...",0.026973
4,tmt_a_raw,digital_test,SVR,11.844269,-0.011014,296.321005,"[47.0, 65.0, 63.0, 35.87, 50.0, 30.05, 51.0, 7...","[53.48980063620952, 44.720817379833505, 70.909...",0.000999
5,clock_drawing_test,demographic+digital,SVR,0.28999,-0.261457,0.358672,"[3.0, 3.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, ...","[2.9818266572299943, 2.98985475401055, 2.98568...",0.837163
6,tmt_b_raw,demographic+digital,SVR,43.089897,-0.090146,4405.337457,"[82.0, 107.0, 180.09, 49.91, 65.0, 300.0, 61.2...","[120.44119803195954, 97.2912653563694, 115.908...",0.000999


In [10]:
best_df_display = best_df[["target", "dataset", "model", "mae", "permutation_test_p_value"]]
best_df_display

Unnamed: 0,target,dataset,model,mae,permutation_test_p_value
0,backward_digit_span_raw,digital_test,SVR,0.587023,0.418581
1,mmse,demographic+digital,RandomForestRegressor,1.454819,0.003996
2,digit_symbol_raw,demographic+digital,Lasso,9.472263,0.000999
3,forward_digit_span_raw,digital_test,Ridge,0.834254,0.026973
4,tmt_a_raw,digital_test,SVR,11.844269,0.000999
5,clock_drawing_test,demographic+digital,SVR,0.28999,0.837163
6,tmt_b_raw,demographic+digital,SVR,43.089897,0.000999
