In [8]:
import os
import pandas as pd
import matplotlib.pyplot as plt


def plot_combined_boxplot(folder_path, column, output_file="combined_boxplot.png"):
    """
    Reads CSV files in a folder, extracts category-wise results, 
    and creates a combined boxplot for all categories across all files.

    Parameters:
        folder_path (str): Path to the folder containing the CSV files.
        output_file (str): Path to save the generated boxplot. Default is 'combined_boxplot.png'.
    """
    # List all CSV files in the folder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    if not csv_files:
        print("No CSV files found in the folder.")
        return

    combined_data = {}

    for file in csv_files:
        file_path = os.path.join(folder_path, file)

        try:
            # Read the CSV file
            df = pd.read_csv(file_path, sep=";")

            # Extract the first column (categories) and the third column (results)
            categories = df["name"]
            results = df[column]

            # Convert results to numeric
            results = pd.to_numeric(results, errors='coerce')  # Convert to numeric, invalid entries to NaN

            for category, result in zip(categories, results):
                if pd.notna(result):  # Skip NaN values
                    if category not in combined_data:
                        combined_data[category] = []
                    combined_data[category].append(result)
        except Exception as e:
            print(f"Error processing {file}: {e}")

    if not combined_data:
        print("No data to plot.")
        return

    # Prepare data for boxplot
    categories = list(combined_data.keys())
    data = [combined_data[category] for category in categories]

    # Plot the boxplot
    plt.figure(figsize=(12, 8))
    plt.boxplot(data, labels=categories,
                patch_artist=True,
                boxprops=dict(facecolor='skyblue', color='black'),
                medianprops=dict(color='red'),
                whiskerprops=dict(color='black'),
                capprops=dict(color='black')
                )
    plt.title(folder_path+" Distribution of Results", fontsize=16)
    # plt.xlabel("Categories", fontsize=14)
    plt.ylabel("Results", fontsize=14)
    plt.xticks(rotation=90, ha='right')
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Save the plot as an image
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close()

    print(f"Combined boxplot saved: {output_file}")


In [10]:
folder_path = [
    "MGAB_add_point_outlier_0.02", 
    "MGAB_add_point_outlier_0.05", 
    "MGAB_add_random_walk_trend_0.1",
    "MGAB_add_random_walk_trend_0.2", 
    "MGAB_add_white_noise_0.5",
    "MGAB_add_white_noise_1.0",
    "MGAB_filter_fft_11",
    "MGAB_filter_fft_21"
]
output_mmd_value = [
    file+'_mmd_value.png' for file in folder_path
]
output_dtw_value = [
    file+'_dtw_value.png' for file in folder_path
]
column = 'mmd_value'  # dwt_value mmd_value
for folder, file in zip(folder_path, output_mmd_value):
    plot_combined_boxplot(folder, column, file)

  plt.boxplot(data, labels=categories,
  plt.boxplot(data, labels=categories,


Combined boxplot saved: MGAB_add_point_outlier_0.02_mmd_value.png
Combined boxplot saved: MGAB_add_point_outlier_0.05_mmd_value.png


  plt.boxplot(data, labels=categories,
  plt.boxplot(data, labels=categories,


Combined boxplot saved: MGAB_add_random_walk_trend_0.1_mmd_value.png
Combined boxplot saved: MGAB_add_random_walk_trend_0.2_mmd_value.png


  plt.boxplot(data, labels=categories,


Combined boxplot saved: MGAB_add_white_noise_0.5_mmd_value.png
Combined boxplot saved: MGAB_add_white_noise_1.0_mmd_value.png


  plt.boxplot(data, labels=categories,
  plt.boxplot(data, labels=categories,


Combined boxplot saved: MGAB_filter_fft_11_mmd_value.png
Combined boxplot saved: MGAB_filter_fft_21_mmd_value.png


  plt.boxplot(data, labels=categories,


Skipping MGAB_add_point_outlier_002_01.test.out_20250108_215112.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_02.test.out_20250108_215113.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_03.test.out_20250108_215113.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_04.test.out_20250108_215112.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_05.test.out_20250108_215112.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_06.test.out_20250108_215113.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_07.test.out_20250108_215112.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_08.test.out_20250108_215113.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_09.test.out_20250108_215112.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_10.test.out_20250108_215112.csv: Not enough columns.
Skipping MGAB_add_point_outlier_002_v2_01.test.out_20250108_215113.csv: Not enough columns.
Skipping MGAB_add_