In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


def load_and_aggregate_all_data(base_dir, experiments, datasets):
    """
    Load and aggregate scores for all combinations of experiments and datasets.

    Args:
        base_dir (str): Base directory where data is stored.
        experiments (list): List of experiment names.
        datasets (list): List of dataset names.

    Returns:
        dict: Dictionary with keys as "<experiment>_<dataset>" and values as aggregated DataFrames.
    """
    aggregated_results = {}
    aggregated_avg_results = {}

    for experiment in experiments:
        for dataset in datasets:
            aggregated_data = []
            aggregated_avg_data = []

            experiment_path = os.path.join(base_dir, experiment, dataset)
            models = [m for m in os.listdir(experiment_path) if os.path.isdir(os.path.join(experiment_path, m))]

            print(f"Processing: {experiment}/{dataset}")
            print(f"Models found: {models}")

            for model in models:
                model_path = os.path.join(experiment_path, model)
                classes = [c for c in os.listdir(model_path) if c.startswith("class_")]

                for cls in classes:
                    class_path = os.path.join(model_path, cls)
                    replicates = [r for r in os.listdir(class_path) if r.startswith("replicate_")]

                    for replicate in replicates:
                        replicate_path = os.path.join(class_path, replicate)
                        csv_files = [f for f in os.listdir(replicate_path) if f.endswith(".csv")]

                        for csv_file in csv_files:
                            csv_path = os.path.join(replicate_path, csv_file)
                            df = pd.read_csv(csv_path)
                            if "Generation" in df.columns:
                                final_gen = df[df["Generation"] == df["Generation"].max()].copy()
                                final_gen["Model"] = model
                                final_gen["Class"] = cls
                                final_gen["Replicate"] = replicate
                                aggregated_data.append(final_gen)

                                avg_values = df.mean().to_frame().T
                                avg_values["Model"] = model
                                avg_values["Class"] = cls
                                avg_values["Replicate"] = replicate
                                aggregated_avg_data.append(avg_values)

            combined_data = pd.concat(aggregated_data, ignore_index=True) if aggregated_data else pd.DataFrame()
            combined_avg_data = pd.concat(aggregated_avg_data, ignore_index=True) if aggregated_avg_data else pd.DataFrame()
            key = f"{experiment}_{dataset}"
            aggregated_results[key] = combined_data
            aggregated_avg_results[key] = combined_avg_data

            print(f"Combined data shape for {key}: {combined_data.shape}")
            print(f"Combined average data shape for {key}: {combined_avg_data.shape}")

    return aggregated_results, aggregated_avg_results


def save_aggregated_data(aggregated_results, output_dir):
    """
    Save the aggregated data for each experiment-dataset combination as CSV files.

    Args:
        aggregated_results (dict): Dictionary with aggregated DataFrames.
        output_dir (str): Directory to save the CSV files.

    Returns:
        None
    """
    os.makedirs(output_dir, exist_ok=True)

    for key, data in aggregated_results.items():
        output_path = os.path.join(output_dir, f"{key}_aggregated.csv")
        data.to_csv(output_path, index=False)
        print(f"Saved aggregated data for {key} to {output_path}")



# Base directory and parameters
base_dir = "/media/ankit-gupta/546B-6466/data_generated"
experiments = ["Exp_1", "Exp_2_1a", "Exp_2_2"]
datasets = ["mnistDigits", "sklearnDigits"]
output_dir = "/media/ankit-gupta/546B-6466/aggregated_data"
final_gen_dir = os.path.join(output_dir, "final_gen_data")
avg_metric_dir = os.path.join(output_dir, "avg_metric_data")

# Load all data
aggregated_results, aggregated_avg_results = load_and_aggregate_all_data(base_dir, experiments, datasets)

# Save the aggregated data for further analysis
save_aggregated_data(aggregated_results, final_gen_dir)
save_aggregated_data(aggregated_avg_results, avg_metric_dir)

# Access individual datasets for analysis
data_keys = aggregated_results.keys()
print("Available datasets for analysis:", data_keys)



Processing: Exp_1/mnistDigits
Models found: ['XGB', 'RF', 'CNN', 'MLP', 'SVM', 'RNN']
Combined data shape for Exp_1_mnistDigits: (1800, 21)
Combined average data shape for Exp_1_mnistDigits: (1800, 21)
Processing: Exp_1/sklearnDigits
Models found: ['XGB', 'RF', 'CNN', 'MLP', 'SVM', 'RNN']
Combined data shape for Exp_1_sklearnDigits: (1800, 21)
Combined average data shape for Exp_1_sklearnDigits: (1800, 21)
Processing: Exp_2_1a/mnistDigits
Models found: ['XGB', 'RF', 'CNN', 'MLP', 'SVM', 'RNN']
Combined data shape for Exp_2_1a_mnistDigits: (1800, 21)
Combined average data shape for Exp_2_1a_mnistDigits: (1800, 21)
Processing: Exp_2_1a/sklearnDigits
Models found: ['XGB', 'RF', 'CNN', 'MLP', 'SVM', 'RNN']
Combined data shape for Exp_2_1a_sklearnDigits: (1800, 21)
Combined average data shape for Exp_2_1a_sklearnDigits: (1800, 21)
Processing: Exp_2_2/mnistDigits
Models found: ['XGB', 'RF', 'CNN', 'MLP', 'SVM', 'RNN']
Combined data shape for Exp_2_2_mnistDigits: (1800, 21)
Combined average d

In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


def load_and_aggregate_all_data(base_dir, experiments, datasets):
    """
    Load and aggregate scores for all combinations of experiments and datasets.

    Args:
        base_dir (str): Base directory where data is stored.
        experiments (list): List of experiment names.
        datasets (list): List of dataset names.

    Returns:
        dict: Dictionary with keys as "<experiment>_<dataset>" and values as aggregated DataFrames.
    """
    aggregated_results = {}
    aggregated_avg_results = {}

    for experiment in experiments:
        for dataset in datasets:
            aggregated_data = []
            aggregated_avg_data = []

            experiment_path = os.path.join(base_dir, experiment, dataset)
            models = [m for m in os.listdir(experiment_path) if os.path.isdir(os.path.join(experiment_path, m))]

            print(f"Processing: {experiment}/{dataset}")
            print(f"Models found: {models}")

            for model in models:
                model_path = os.path.join(experiment_path, model)
                classes = [c for c in os.listdir(model_path) if c.startswith("class_")]

                for cls in classes:
                    class_path = os.path.join(model_path, cls)
                    replicates = [r for r in os.listdir(class_path) if r.startswith("replicate_")]

                    for replicate in replicates:
                        replicate_path = os.path.join(class_path, replicate)
                        csv_files = [f for f in os.listdir(replicate_path) if f.endswith(".csv")]

                        for csv_file in csv_files:
                            csv_path = os.path.join(replicate_path, csv_file)
                            df = pd.read_csv(csv_path)
                            if "Generation" in df.columns:
                                final_gen = df[df["Generation"] == df["Generation"].max()].copy()
                                final_gen["Model"] = model
                                final_gen["Class"] = cls
                                final_gen["Replicate"] = replicate
                                aggregated_data.append(final_gen)

                                avg_values = df.mean().to_frame().T
                                avg_values["Model"] = model
                                avg_values["Class"] = cls
                                avg_values["Replicate"] = replicate
                                aggregated_avg_data.append(avg_values)

            combined_data = pd.concat(aggregated_data, ignore_index=True) if aggregated_data else pd.DataFrame()
            combined_avg_data = pd.concat(aggregated_avg_data, ignore_index=True) if aggregated_avg_data else pd.DataFrame()
            key = f"{experiment}_{dataset}"
            aggregated_results[key] = combined_data
            aggregated_avg_results[key] = combined_avg_data

            print(f"Combined data shape for {key}: {combined_data.shape}")
            print(f"Combined average data shape for {key}: {combined_avg_data.shape}")

    return aggregated_results, aggregated_avg_results


def save_aggregated_data(aggregated_results, output_dir):
    """
    Save the aggregated data for each experiment-dataset combination as CSV files.

    Args:
        aggregated_results (dict): Dictionary with aggregated DataFrames.
        output_dir (str): Directory to save the CSV files.

    Returns:
        None
    """
    os.makedirs(output_dir, exist_ok=True)

    for key, data in aggregated_results.items():
        output_path = os.path.join(output_dir, f"{key}_aggregated.csv")
        data.to_csv(output_path, index=False)
        print(f"Saved aggregated data for {key} to {output_path}")



# Base directory and parameters
base_dir = "/media/ankit-gupta/546B-6466/data_generated"
experiments = ["Exp_1", "Exp_2_2"]
datasets = ["mnistFashion"]
output_dir = "/media/ankit-gupta/546B-6466/aggregated_data"
final_gen_dir = os.path.join(output_dir, "final_gen_data")
avg_metric_dir = os.path.join(output_dir, "avg_metric_data")

# Load all data
aggregated_results, aggregated_avg_results = load_and_aggregate_all_data(base_dir, experiments, datasets)

# Save the aggregated data for further analysis
save_aggregated_data(aggregated_results, final_gen_dir)
save_aggregated_data(aggregated_avg_results, avg_metric_dir)

# Access individual datasets for analysis
data_keys = aggregated_results.keys()
print("Available datasets for analysis:", data_keys)



Processing: Exp_1/mnistFashion
Models found: ['CNN', 'MLP', 'RF', 'RNN', 'SVM', 'XGB']
Combined data shape for Exp_1_mnistFashion: (1800, 21)
Combined average data shape for Exp_1_mnistFashion: (1800, 21)
Processing: Exp_2_2/mnistFashion
Models found: ['CNN', 'MLP', 'RF', 'RNN', 'SVM', 'XGB']
Combined data shape for Exp_2_2_mnistFashion: (1200, 21)
Combined average data shape for Exp_2_2_mnistFashion: (1200, 21)
Saved aggregated data for Exp_1_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/final_gen_data/Exp_1_mnistFashion_aggregated.csv
Saved aggregated data for Exp_2_2_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/final_gen_data/Exp_2_2_mnistFashion_aggregated.csv
Saved aggregated data for Exp_1_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/avg_metric_data/Exp_1_mnistFashion_aggregated.csv
Saved aggregated data for Exp_2_2_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/avg_metric_data/Exp_2_2_mnistFashion_aggregated.csv
Availa

In [5]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


def load_and_aggregate_all_data(base_dir, experiments, datasets):
    """
    Load and aggregate scores for all combinations of experiments and datasets.

    Args:
        base_dir (str): Base directory where data is stored.
        experiments (list): List of experiment names.
        datasets (list): List of dataset names.

    Returns:
        dict: Dictionary with keys as "<experiment>_<dataset>" and values as aggregated DataFrames.
    """
    aggregated_results = {}
    aggregated_avg_results = {}

    for experiment in experiments:
        for dataset in datasets:
            aggregated_data = []
            aggregated_avg_data = []

            experiment_path = os.path.join(base_dir, experiment, dataset)
            models = [m for m in os.listdir(experiment_path) if os.path.isdir(os.path.join(experiment_path, m))]

            print(f"Processing: {experiment}/{dataset}")
            print(f"Models found: {models}")

            for model in models:
                model_path = os.path.join(experiment_path, model)
                classes = [c for c in os.listdir(model_path) if c.startswith("class_")]

                for cls in classes:
                    class_path = os.path.join(model_path, cls)
                    replicates = [r for r in os.listdir(class_path) if r.startswith("replicate_")]

                    for replicate in replicates:
                        replicate_path = os.path.join(class_path, replicate)
                        csv_files = [f for f in os.listdir(replicate_path) if f.endswith(".csv")]

                        for csv_file in csv_files:
                            csv_path = os.path.join(replicate_path, csv_file)
                            df = pd.read_csv(csv_path)
                            if "Generation" in df.columns:
                                final_gen = df[df["Generation"] == df["Generation"].max()].copy()
                                final_gen["Model"] = model
                                final_gen["Class"] = cls
                                final_gen["Replicate"] = replicate
                                aggregated_data.append(final_gen)

                                avg_values = df.mean().to_frame().T
                                avg_values["Model"] = model
                                avg_values["Class"] = cls
                                avg_values["Replicate"] = replicate
                                aggregated_avg_data.append(avg_values)

            combined_data = pd.concat(aggregated_data, ignore_index=True) if aggregated_data else pd.DataFrame()
            combined_avg_data = pd.concat(aggregated_avg_data, ignore_index=True) if aggregated_avg_data else pd.DataFrame()
            key = f"{experiment}_{dataset}"
            aggregated_results[key] = combined_data
            aggregated_avg_results[key] = combined_avg_data

            print(f"Combined data shape for {key}: {combined_data.shape}")
            print(f"Combined average data shape for {key}: {combined_avg_data.shape}")

    return aggregated_results, aggregated_avg_results


def save_aggregated_data(aggregated_results, output_dir):
    """
    Save the aggregated data for each experiment-dataset combination as CSV files.

    Args:
        aggregated_results (dict): Dictionary with aggregated DataFrames.
        output_dir (str): Directory to save the CSV files.

    Returns:
        None
    """
    os.makedirs(output_dir, exist_ok=True)

    for key, data in aggregated_results.items():
        output_path = os.path.join(output_dir, f"{key}_aggregated.csv")
        data.to_csv(output_path, index=False)
        print(f"Saved aggregated data for {key} to {output_path}")



# Base directory and parameters
base_dir = "/home/ankit-gupta/Downloads/EPIC_Fool_Files/data_generated"
experiments = ["Exp_2_1a", "Exp_2_2"]
datasets = ["mnistFashion"]
output_dir = "/media/ankit-gupta/546B-6466/aggregated_data"
final_gen_dir = os.path.join(output_dir, "final_gen_data")
avg_metric_dir = os.path.join(output_dir, "avg_metric_data")

# Load all data
aggregated_results, aggregated_avg_results = load_and_aggregate_all_data(base_dir, experiments, datasets)

# Save the aggregated data for further analysis
save_aggregated_data(aggregated_results, final_gen_dir)
save_aggregated_data(aggregated_avg_results, avg_metric_dir)

# Access individual datasets for analysis
data_keys = aggregated_results.keys()
print("Available datasets for analysis:", data_keys)



Processing: Exp_2_1a/mnistFashion
Models found: ['XGB', 'SVM', 'CNN', 'RNN', 'MLP', 'RF']
Combined data shape for Exp_2_1a_mnistFashion: (1800, 21)
Combined average data shape for Exp_2_1a_mnistFashion: (1800, 21)
Processing: Exp_2_2/mnistFashion
Models found: ['XGB', 'SVM', 'CNN', 'RNN', 'MLP', 'RF']
Combined data shape for Exp_2_2_mnistFashion: (1800, 21)
Combined average data shape for Exp_2_2_mnistFashion: (1800, 21)
Saved aggregated data for Exp_2_1a_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/final_gen_data/Exp_2_1a_mnistFashion_aggregated.csv
Saved aggregated data for Exp_2_2_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/final_gen_data/Exp_2_2_mnistFashion_aggregated.csv
Saved aggregated data for Exp_2_1a_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/avg_metric_data/Exp_2_1a_mnistFashion_aggregated.csv
Saved aggregated data for Exp_2_2_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/avg_metric_data/Exp_2_2_mnistFashion_

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


def load_and_aggregate_all_data(base_dir, experiments, datasets):
    """
    Load and aggregate scores for all combinations of experiments and datasets.

    Args:
        base_dir (str): Base directory where data is stored.
        experiments (list): List of experiment names.
        datasets (list): List of dataset names.

    Returns:
        dict: Dictionary with keys as "<experiment>_<dataset>" and values as aggregated DataFrames.
    """
    aggregated_results = {}
    aggregated_avg_results = {}

    for experiment in experiments:
        for dataset in datasets:
            aggregated_data = []
            aggregated_avg_data = []

            experiment_path = os.path.join(base_dir, experiment, dataset)
            models = [m for m in os.listdir(experiment_path) if os.path.isdir(os.path.join(experiment_path, m))]

            print(f"Processing: {experiment}/{dataset}")
            print(f"Models found: {models}")

            for model in models:
                model_path = os.path.join(experiment_path, model)
                classes = [c for c in os.listdir(model_path) if c.startswith("target_class_")]

                for cls in classes:
                    class_path = os.path.join(model_path, cls)
                    replicates = [r for r in os.listdir(class_path) if r.startswith("replicate_")]

                    for replicate in replicates:
                        replicate_path = os.path.join(class_path, replicate)
                        csv_files = [f for f in os.listdir(replicate_path) if f.endswith(".csv")]

                        for csv_file in csv_files:
                            csv_path = os.path.join(replicate_path, csv_file)
                            df = pd.read_csv(csv_path)
                            if "Generation" in df.columns:
                                final_gen = df[df["Generation"] == df["Generation"].max()].copy()
                                final_gen["Model"] = model
                                final_gen["Class"] = cls
                                final_gen["Replicate"] = replicate
                                aggregated_data.append(final_gen)

                                avg_values = df.mean().to_frame().T
                                avg_values["Model"] = model
                                avg_values["Class"] = cls
                                avg_values["Replicate"] = replicate
                                aggregated_avg_data.append(avg_values)

            combined_data = pd.concat(aggregated_data, ignore_index=True) if aggregated_data else pd.DataFrame()
            combined_avg_data = pd.concat(aggregated_avg_data, ignore_index=True) if aggregated_avg_data else pd.DataFrame()
            key = f"{experiment}_{dataset}"
            aggregated_results[key] = combined_data
            aggregated_avg_results[key] = combined_avg_data

            print(f"Combined data shape for {key}: {combined_data.shape}")
            print(f"Combined average data shape for {key}: {combined_avg_data.shape}")

    return aggregated_results, aggregated_avg_results


def save_aggregated_data(aggregated_results, output_dir):
    """
    Save the aggregated data for each experiment-dataset combination as CSV files.

    Args:
        aggregated_results (dict): Dictionary with aggregated DataFrames.
        output_dir (str): Directory to save the CSV files.

    Returns:
        None
    """
    os.makedirs(output_dir, exist_ok=True)

    for key, data in aggregated_results.items():
        output_path = os.path.join(output_dir, f"{key}_aggregated.csv")
        data.to_csv(output_path, index=False)
        print(f"Saved aggregated data for {key} to {output_path}")



# Base directory and parameters
base_dir = "/media/ankit-gupta/546B-6466/data_generated"
experiments = ["Exp_2_1b"]
datasets = ["sklearnDigits"]
output_dir = "/media/ankit-gupta/546B-6466/aggregated_data"
final_gen_dir = os.path.join(output_dir, "final_gen_data")
avg_metric_dir = os.path.join(output_dir, "avg_metric_data")

# Load all data
aggregated_results, aggregated_avg_results = load_and_aggregate_all_data(base_dir, experiments, datasets)

# Save the aggregated data for further analysis
save_aggregated_data(aggregated_results, final_gen_dir)
save_aggregated_data(aggregated_avg_results, avg_metric_dir)

# Access individual datasets for analysis
data_keys = aggregated_results.keys()
print("Available datasets for analysis:", data_keys)



Processing: Exp_2_1b/sklearnDigits
Models found: ['XGB', 'CNN', 'MLP', 'SVM', 'RF', 'RNN']
Combined data shape for Exp_2_1b_sklearnDigits: (1800, 21)
Combined average data shape for Exp_2_1b_sklearnDigits: (1800, 21)
Saved aggregated data for Exp_2_1b_sklearnDigits to /media/ankit-gupta/546B-6466/aggregated_data/final_gen_data/Exp_2_1b_sklearnDigits_aggregated.csv
Saved aggregated data for Exp_2_1b_sklearnDigits to /media/ankit-gupta/546B-6466/aggregated_data/avg_metric_data/Exp_2_1b_sklearnDigits_aggregated.csv
Available datasets for analysis: dict_keys(['Exp_2_1b_sklearnDigits'])


In [3]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


def load_and_aggregate_all_data(base_dir, experiments, datasets):
    """
    Load and aggregate scores for all combinations of experiments and datasets.

    Args:
        base_dir (str): Base directory where data is stored.
        experiments (list): List of experiment names.
        datasets (list): List of dataset names.

    Returns:
        dict: Dictionary with keys as "<experiment>_<dataset>" and values as aggregated DataFrames.
    """
    aggregated_results = {}
    aggregated_avg_results = {}

    for experiment in experiments:
        for dataset in datasets:
            aggregated_data = []
            aggregated_avg_data = []

            experiment_path = os.path.join(base_dir, experiment, dataset)
            models = [m for m in os.listdir(experiment_path) if os.path.isdir(os.path.join(experiment_path, m))]

            print(f"Processing: {experiment}/{dataset}")
            print(f"Models found: {models}")

            for model in models:
                model_path = os.path.join(experiment_path, model)
                classes = [c for c in os.listdir(model_path) if c.startswith("target_class_")]

                for cls in classes:
                    class_path = os.path.join(model_path, cls)
                    replicates = [r for r in os.listdir(class_path) if r.startswith("replicate_")]

                    for replicate in replicates:
                        replicate_path = os.path.join(class_path, replicate)
                        csv_files = [f for f in os.listdir(replicate_path) if f.endswith(".csv")]

                        for csv_file in csv_files:
                            csv_path = os.path.join(replicate_path, csv_file)
                            df = pd.read_csv(csv_path)
                            if "Generation" in df.columns:
                                final_gen = df[df["Generation"] == df["Generation"].max()].copy()
                                final_gen["Model"] = model
                                final_gen["Class"] = cls
                                final_gen["Replicate"] = replicate
                                aggregated_data.append(final_gen)

                                avg_values = df.mean().to_frame().T
                                avg_values["Model"] = model
                                avg_values["Class"] = cls
                                avg_values["Replicate"] = replicate
                                aggregated_avg_data.append(avg_values)

            combined_data = pd.concat(aggregated_data, ignore_index=True) if aggregated_data else pd.DataFrame()
            combined_avg_data = pd.concat(aggregated_avg_data, ignore_index=True) if aggregated_avg_data else pd.DataFrame()
            key = f"{experiment}_{dataset}"
            aggregated_results[key] = combined_data
            aggregated_avg_results[key] = combined_avg_data

            print(f"Combined data shape for {key}: {combined_data.shape}")
            print(f"Combined average data shape for {key}: {combined_avg_data.shape}")

    return aggregated_results, aggregated_avg_results


def save_aggregated_data(aggregated_results, output_dir):
    """
    Save the aggregated data for each experiment-dataset combination as CSV files.

    Args:
        aggregated_results (dict): Dictionary with aggregated DataFrames.
        output_dir (str): Directory to save the CSV files.

    Returns:
        None
    """
    os.makedirs(output_dir, exist_ok=True)

    for key, data in aggregated_results.items():
        output_path = os.path.join(output_dir, f"{key}_aggregated.csv")
        data.to_csv(output_path, index=False)
        print(f"Saved aggregated data for {key} to {output_path}")



# Base directory and parameters
base_dir = "/home/ankit-gupta/Downloads/EPIC_Fool_Files/data_generated/"
experiments = ["Exp_2_1b"]
datasets = ["mnistDigits"]
output_dir = "/media/ankit-gupta/546B-6466/aggregated_data"
final_gen_dir = os.path.join(output_dir, "final_gen_data")
avg_metric_dir = os.path.join(output_dir, "avg_metric_data")

# Load all data
aggregated_results, aggregated_avg_results = load_and_aggregate_all_data(base_dir, experiments, datasets)

# Save the aggregated data for further analysis
save_aggregated_data(aggregated_results, final_gen_dir)
save_aggregated_data(aggregated_avg_results, avg_metric_dir)

# Access individual datasets for analysis
data_keys = aggregated_results.keys()
print("Available datasets for analysis:", data_keys)



Processing: Exp_2_1b/mnistDigits
Models found: ['XGB', 'SVM', 'MLP', 'RF']
Combined data shape for Exp_2_1b_mnistDigits: (1200, 21)
Combined average data shape for Exp_2_1b_mnistDigits: (1200, 21)
Saved aggregated data for Exp_2_1b_mnistDigits to /media/ankit-gupta/546B-6466/aggregated_data/final_gen_data/Exp_2_1b_mnistDigits_aggregated.csv
Saved aggregated data for Exp_2_1b_mnistDigits to /media/ankit-gupta/546B-6466/aggregated_data/avg_metric_data/Exp_2_1b_mnistDigits_aggregated.csv
Available datasets for analysis: dict_keys(['Exp_2_1b_mnistDigits'])


In [4]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


def load_and_aggregate_all_data(base_dir, experiments, datasets):
    """
    Load and aggregate scores for all combinations of experiments and datasets.

    Args:
        base_dir (str): Base directory where data is stored.
        experiments (list): List of experiment names.
        datasets (list): List of dataset names.

    Returns:
        dict: Dictionary with keys as "<experiment>_<dataset>" and values as aggregated DataFrames.
    """
    aggregated_results = {}
    aggregated_avg_results = {}

    for experiment in experiments:
        for dataset in datasets:
            aggregated_data = []
            aggregated_avg_data = []

            experiment_path = os.path.join(base_dir, experiment, dataset)
            models = [m for m in os.listdir(experiment_path) if os.path.isdir(os.path.join(experiment_path, m))]

            print(f"Processing: {experiment}/{dataset}")
            print(f"Models found: {models}")

            for model in models:
                model_path = os.path.join(experiment_path, model)
                classes = [c for c in os.listdir(model_path) if c.startswith("target_class_")]

                for cls in classes:
                    class_path = os.path.join(model_path, cls)
                    replicates = [r for r in os.listdir(class_path) if r.startswith("replicate_")]

                    for replicate in replicates:
                        replicate_path = os.path.join(class_path, replicate)
                        csv_files = [f for f in os.listdir(replicate_path) if f.endswith(".csv")]

                        for csv_file in csv_files:
                            csv_path = os.path.join(replicate_path, csv_file)
                            df = pd.read_csv(csv_path)
                            if "Generation" in df.columns:
                                final_gen = df[df["Generation"] == df["Generation"].max()].copy()
                                final_gen["Model"] = model
                                final_gen["Class"] = cls
                                final_gen["Replicate"] = replicate
                                aggregated_data.append(final_gen)

                                avg_values = df.mean().to_frame().T
                                avg_values["Model"] = model
                                avg_values["Class"] = cls
                                avg_values["Replicate"] = replicate
                                aggregated_avg_data.append(avg_values)

            combined_data = pd.concat(aggregated_data, ignore_index=True) if aggregated_data else pd.DataFrame()
            combined_avg_data = pd.concat(aggregated_avg_data, ignore_index=True) if aggregated_avg_data else pd.DataFrame()
            key = f"{experiment}_{dataset}"
            aggregated_results[key] = combined_data
            aggregated_avg_results[key] = combined_avg_data

            print(f"Combined data shape for {key}: {combined_data.shape}")
            print(f"Combined average data shape for {key}: {combined_avg_data.shape}")

    return aggregated_results, aggregated_avg_results


def save_aggregated_data(aggregated_results, output_dir):
    """
    Save the aggregated data for each experiment-dataset combination as CSV files.

    Args:
        aggregated_results (dict): Dictionary with aggregated DataFrames.
        output_dir (str): Directory to save the CSV files.

    Returns:
        None
    """
    os.makedirs(output_dir, exist_ok=True)

    for key, data in aggregated_results.items():
        output_path = os.path.join(output_dir, f"{key}_aggregated.csv")
        data.to_csv(output_path, index=False)
        print(f"Saved aggregated data for {key} to {output_path}")



# Base directory and parameters
base_dir = "/home/ankit-gupta/Downloads/EPIC_Fool_Files/data_generated/"
experiments = ["Exp_2_1b"]
datasets = ["mnistFashion"]
output_dir = "/media/ankit-gupta/546B-6466/aggregated_data"
final_gen_dir = os.path.join(output_dir, "final_gen_data")
avg_metric_dir = os.path.join(output_dir, "avg_metric_data")

# Load all data
aggregated_results, aggregated_avg_results = load_and_aggregate_all_data(base_dir, experiments, datasets)

# Save the aggregated data for further analysis
save_aggregated_data(aggregated_results, final_gen_dir)
save_aggregated_data(aggregated_avg_results, avg_metric_dir)

# Access individual datasets for analysis
data_keys = aggregated_results.keys()
print("Available datasets for analysis:", data_keys)



Processing: Exp_2_1b/mnistFashion
Models found: ['XGB', 'SVM', 'MLP', 'RF']
Combined data shape for Exp_2_1b_mnistFashion: (1200, 21)
Combined average data shape for Exp_2_1b_mnistFashion: (1200, 21)
Saved aggregated data for Exp_2_1b_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/final_gen_data/Exp_2_1b_mnistFashion_aggregated.csv
Saved aggregated data for Exp_2_1b_mnistFashion to /media/ankit-gupta/546B-6466/aggregated_data/avg_metric_data/Exp_2_1b_mnistFashion_aggregated.csv
Available datasets for analysis: dict_keys(['Exp_2_1b_mnistFashion'])
