In [24]:
import pandas as pd
import ast

In [63]:
def clean_optimizer_name(row):

    if row["optimizer"] == "adam":
        return "Adam"
    elif row["optimizer"] == "newton":
        return "Newton"
    elif row["optimizer"] == "gd":
        if row["friction"] == 0:
            return "GD"
        else:
            return "Nesterov"
    else:
        raise ValueError(f"Unknown optimizer: {row['optimizer']}")
    
    return df

metric_map = {
    "mean_fit_time": "Fit Time (s)",
    "mean_test_score": "Accuracy",
}

In [74]:
experiments = {
    "Handwritten Digits" : "handwritten_digits_results.csv",
    "Titanic" : "titanic_results.csv"
}

for experiment_name, fname in experiments.items():

    df = pd.read_csv(fname)
    df.columns = df.columns.str.replace("param_model__", "")

    df["optimizer_kw_args"] = df["optimizer_kw_args"].apply(
        lambda x: ast.literal_eval(x) if isinstance(x, str) else x
    )

    df["optimizer_kw_args"] = df["optimizer_kw_args"].apply(
        lambda x: {} if pd.isna(x) else x
    )
    expanded_df = pd.DataFrame.from_records(df["optimizer_kw_args"])

    df = pd.concat([df, expanded_df], axis=1)

    df = df.drop(columns=["optimizer_kw_args"])

    df["optimizer"] = df.apply(clean_optimizer_name, axis=1)

    numeric_df = df.select_dtypes(include="number")  # Select numeric columns only
    grouped = df.groupby(["optimizer", "max_epochs"], sort="mean_test_score")[numeric_df.columns].agg(["mean", "std"])[list(metric_map.keys())]

    grouped.columns = [f"{metric_map[metric]} ({stat})" for metric, stat in grouped.columns if metric in metric_map.keys()]

    # grouped.set_index(grouped.index.set_names(["Optimizer", "Max Epochs"]), inplace=True)
    grouped.to_latex(
        f"{experiment_name}_table.tex",
        index=True,
        float_format="%.2f",
        longtable=True,
        caption=f"{experiment_name} Results",
        label=f"tab:{experiment_name.lower().replace(' ', '_')}_results"
        )
    
    grouped
    