In [None]:
import os
import pandas as pd
import pickle

def save_csvs_to_pickle(base_dir, output_pickle_path):
    """
    Traverse directories to load all CSVs and save them into a pickle file.

    Args:
    - base_dir (str): Base directory containing experiment folders.
    - output_pickle_path (str): Path to save the pickle file.

    The resulting pickle will store data as a dictionary with keys indicating:
    experiment/dataset/model/class/replicate/csv_filename.
    """
    raw_scores = {}

    # Traverse the directory hierarchy
    for experiment in os.listdir(base_dir):
        experiment_path = os.path.join(base_dir, experiment)
        if not os.path.isdir(experiment_path):
            continue

        for dataset in os.listdir(experiment_path):
            dataset_path = os.path.join(experiment_path, dataset)
            if not os.path.isdir(dataset_path):
                continue

            for model in os.listdir(dataset_path):
                model_path = os.path.join(dataset_path, model)
                if not os.path.isdir(model_path):
                    continue

                for cls in os.listdir(model_path):
                    if not cls.startswith("class_"):
                        continue

                    class_path = os.path.join(model_path, cls)
                    for replicate in os.listdir(class_path):
                        if not replicate.startswith("replicate_"):
                            continue

                        replicate_path = os.path.join(class_path, replicate)
                        for csv_file in os.listdir(replicate_path):
                            if not csv_file.endswith(".csv"):
                                continue

                            csv_path = os.path.join(replicate_path, csv_file)
                            try:
                                df = pd.read_csv(csv_path)
                                key = f"{experiment}/{dataset}/{model}/{cls}/{replicate}/{csv_file}"
                                raw_scores[key] = df
                            except Exception as e:
                                print(f"Failed to process {csv_path}: {e}")

    # Save the collected data into a pickle file
    with open(output_pickle_path, "wb") as pickle_file:
        pickle.dump(raw_scores, pickle_file)

    print(f"All CSV data has been saved to {output_pickle_path}.")

# Define paths
base_dir = "/media/ankit-gupta/546B-6466/data_generated"
output_pickle_path = "/media/ankit-gupta/546B-6466/raw_scores.pkl"  # Specify the output pickle file path

# Run the function
save_csvs_to_pickle(base_dir, output_pickle_path)
