In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:

def plot_combined_boxplot(folder_path, column, output_file="combined_boxplot.jpg"):
    """
    Reads CSV files in a folder, extracts category-wise results, 
    and creates a combined boxplot for all categories across all files.

    Parameters:
        folder_path (str): Path to the folder containing the CSV files.
        output_file (str): Path to save the generated boxplot. Default is 'combined_boxplot.jpg'.
    """
    # List all CSV files in the folder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    if not csv_files:
        print("No CSV files found in the folder.")
        return

    combined_data = {}

    for file in csv_files:
        file_path = os.path.join(folder_path, file)

        try:
            # Read the CSV file
            df = pd.read_csv(file_path, sep=";")

            # Extract the first column (categories) and the third column (results)
            categories = df["name"]
            results = df[column]

            # Convert results to numeric
            results = pd.to_numeric(results, errors='coerce')  # Convert to numeric, invalid entries to NaN

            for category, result in zip(categories, results):
                if pd.notna(result):  # Skip NaN values
                    if category not in combined_data:
                        combined_data[category] = []
                    combined_data[category].append(result)
        except Exception as e:
            print(f"Error processing {file}: {e}")

    if not combined_data:
        print("No data to plot.")
        return

    # Prepare data for boxplot
    categories = list(combined_data.keys())
    data = [combined_data[category] for category in categories]

    # Plot the boxplot
    plt.figure(figsize=(12, 8))
    plt.boxplot(data, labels=categories,
                patch_artist=True,
                boxprops=dict(facecolor='skyblue', color='black'),
                medianprops=dict(color='red'),
                whiskerprops=dict(color='black'),
                capprops=dict(color='black')
                )
    plt.title(folder_path+" Distribution of Results", fontsize=16)
    # plt.xlabel("Categories", fontsize=14)
    plt.ylabel("Results", fontsize=14)
    plt.xticks(rotation=90, ha='right')
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Save the plot as an image
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close()

    print(f"Combined boxplot saved: {output_file}")


In [5]:
folder_path = [
    # "MGAB_add_point_outlier_0.02", 
    # "MGAB_add_point_outlier_0.05", 
    "MGAB_add_random_walk_trend_0.1",
    "MGAB_add_white_noise_1.0"
]
output_mmd_value = [
    file+'_mmd_value.jpg' for file in folder_path
]
output_dtw_value = [
    file+'_dtw_value.jpg' for file in folder_path
]
column = 'dwt_value'  # dwt_value mmd_value
for folder, file in zip(folder_path, output_dtw_value):
    plot_combined_boxplot(folder, column, file)

  plt.boxplot(data, labels=categories,
  plt.boxplot(data, labels=categories,


Combined boxplot saved: MGAB_add_random_walk_trend_0.1_dtw_value.jpg
Combined boxplot saved: MGAB_add_white_noise_1.0_dtw_value.jpg


In [2]:
def plot_combined_heatmap(folder_path, category_column, value_column, output_file="combined_heatmap.jpg"):
    """
    Reads CSV files in a folder, aggregates the specified category and value columns, 
    and creates a heatmap showing the aggregated values.

    Parameters:
        folder_path (str): Path to the folder containing the CSV files.
        category_column (str): Name of the column containing the categories.
        value_column (str): Name of the column containing the values to aggregate.
        output_file (str): Path to save the generated heatmap. Default is 'combined_heatmap.jpg'.
    """
    # List all CSV files in the folder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    if not csv_files:
        print("No CSV files found in the folder.")
        return

    combined_data = []

    for file in csv_files:
        file_path = os.path.join(folder_path, file)

        try:
            # Read the CSV file
            df = pd.read_csv(file_path, sep=';')

            if category_column not in df.columns or value_column not in df.columns:
                print(f"Skipping {file}: Required columns '{category_column}' or '{value_column}' not found.")
                continue

            # Extract the specified columns
            categories = df[category_column]
            values = pd.to_numeric(df[value_column], errors='coerce')  # Convert to numeric, set invalid to NaN

            # Drop NaN values
            valid_data = pd.DataFrame({"Category": categories, "Value": values}).dropna()

            # Append to combined data
            combined_data.append(valid_data)

        except Exception as e:
            print(f"Error processing {file}: {e}")

    if not combined_data:
        print("No data to plot.")
        return

    # Combine all data into a single DataFrame
    combined_df = pd.concat(combined_data, ignore_index=True)

    # Pivot the data to create a matrix format for the heatmap
    heatmap_data = combined_df.pivot_table(index="Category", values="Value", aggfunc="mean")

    # Plot the heatmap
    plt.figure(figsize=(12, 8))
    sns.heatmap(heatmap_data, annot=True, cmap="coolwarm", fmt=".2f", cbar_kws={'label': 'Average Value'})
    plt.title("Heatmap of Average DTW Values by Time Series", fontsize=16)
    # plt.xlabel("Categories", fontsize=14)
    plt.ylabel("", fontsize=14)

    # Save the plot as an image
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close()

    print(f"Combined heatmap saved: {output_file}")


In [None]:
folder_path = [
    # "MGAB_add_point_outlier_0.02", 
    # "MGAB_add_point_outlier_0.05", 
    "MGAB_add_random_walk_trend_0.1",
    "MGAB_add_random_walk_trend_0.2", 
    "MGAB_add_white_noise_0.5",
    "MGAB_add_white_noise_1.0",
    "MGAB_filter_fft_11",
    "MGAB_filter_fft_21"
]
output_mmd_value = [
    file+'_mmd_value.jpg' for file in folder_path
]
output_dtw_value = [
    file+'_heatmap_dtw_value.jpg' for file in folder_path
]
column = 'dwt_value'  # dwt_value mmd_value
for folder, file in zip(folder_path, output_dtw_value):
    plot_combined_heatmap(folder, "name", column, file)

In [14]:
file_path = "MGAB_add_random_walk_trend_0.1/MGAB_add_random_walk_trend_0.1_10.test.out.csv"
df = pd.read_csv(file_path, delimiter=';')

# Convert mmd_value and dwt_value to numeric
df["mmd_value"] = pd.to_numeric(df["mmd_value"], errors="coerce")
df["dwt_value"] = pd.to_numeric(df["dwt_value"].str.replace(".", "", regex=False), errors="coerce")

# Compute the Pearson correlation coefficient between MMD and DWT values
correlation = df[["mmd_value", "dwt_value"]].corr()
correlation

Unnamed: 0,mmd_value,dwt_value
mmd_value,1.0,-0.038146
dwt_value,-0.038146,1.0


In [6]:
from PIL import Image

def combine_images(columns, space, images):
    rows = len(images) // columns
    if len(images) % columns:
        rows += 1
    width_max = max([Image.open(image).width for image in images])
    height_max = max([Image.open(image).height for image in images])
    background_width = width_max*columns + (space*columns)-space
    background_height = height_max*rows + (space*rows)-space
    background = Image.new('RGBA', (background_width, background_height), (255, 255, 255, 255))
    x = 0
    y = 0
    for i, image in enumerate(images):
        img = Image.open(image)
        x_offset = int((width_max-img.width)/2)
        y_offset = int((height_max-img.height)/2)
        background.paste(img, (x+x_offset, y+y_offset))
        x += width_max + space
        if (i+1) % columns == 0:
            y += height_max + space
            x = 0
    background.save('combined_image.png')

In [10]:
list_im = ['MGAB_add_random_walk_trend_0.1_dtw_value.jpg', 'MGAB_add_white_noise_1.0_dtw_value.jpg']
combine_images(columns=1, space=20, images=list_im)