In [1]:
import pandas as pd

# 1) Read the CSV
df = pd.read_csv("ray_results_search_hyperparameters.csv")

# 2) Identify columns that define each unique config except 'fold'
#    Example: if your 'fold' column is named `config/train_loop_config/fold`,
#    and your other columns are all the hyperparameters you listed.
#    We'll store that 'fold' column in a variable for clarity.

fold_col = "config/train_loop_config/fold"

# Let's gather all the columns that start with "config/train_loop_config/"
# except the fold column. Adjust this logic as appropriate for your CSV.

all_config_cols = [c for c in df.columns 
                   if c.startswith("config/train_loop_config/") 
                      and c != fold_col]

# 3) Group by everything except the fold column
grouped = df.groupby(all_config_cols, dropna=False)

# 4) Compute mean and std across folds for your metrics of interest.
#    For example, you might have columns named "accuracy" or "loss" or something else.
#    Replace them with whichever metric columns you actually want to aggregate.

metrics_of_interest = ["val_loss", "train_loss"]  # <-- replace with your real metric columns
agg_df = grouped[metrics_of_interest].agg(["mean", "std"])

# Add a function to concatenate trial_ids
def concatenate_trial_ids(trial_ids):
    return ','.join(trial_ids.astype(str))

# Aggregate metrics and concatenate trial_ids
agg_df = grouped.agg(
    {**{metric: ["mean", "std"] for metric in metrics_of_interest},
     "trial_id": concatenate_trial_ids}
)
# 5) agg_df now has a hierarchical column index:
#       ( "accuracy", "mean" ), ( "accuracy", "std" ), ( "loss", "mean" ), ( "loss", "std" )
#    You can reset the index to flatten it out if you want a cleaner CSV output.

agg_df.reset_index(inplace=True)

# 6) Optionally rename columns for clarity
agg_df.columns = [
    "_".join(col).rstrip("_") for col in agg_df.columns.to_flat_index()
]
# 7) Print or save your results. For example:
print(agg_df.head())

# 8) Optionally write out to a new CSV
agg_df.to_csv("kfold_mean_std_results.csv", index=False)

   config/train_loop_config/seed  config/train_loop_config/batch_size  \
0                             42                                    8   
1                             42                                    8   
2                             42                                    8   
3                             42                                    8   
4                             42                                    8   

  config/train_loop_config/lr_scheduler  config/train_loop_config/dropout  \
0                     CosineAnnealingLR                               0.1   
1                     CosineAnnealingLR                               0.1   
2                     CosineAnnealingLR                               0.1   
3                     CosineAnnealingLR                               0.1   
4                     CosineAnnealingLR                               0.1   

  config/train_loop_config/act  config/train_loop_config/num_nodes  \
0                          e