# Paddy MLP GRID Search Heatmap

In [1]:
import pandas as pd

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
# Cell 1: Import Libraries and Define Functions

# Import necessary libraries
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def plot_heatmap(df, output_path, title):
    """
    Generates and saves a heatmap from the provided DataFrame with enhanced font sizes.

    Parameters:
        df (pandas.DataFrame): DataFrame containing 'Qmax', 'YT', and 'Best_F1_Score'.
        output_path (str): Path to save the generated heatmap PNG.
        title (str): Title of the heatmap.
    """
    # Pivot the DataFrame to create a matrix for the heatmap
    heatmap_data = df.pivot(index='YT', columns='Qmax', values='Best_F1_Score')

    # Sort the index and columns for better visualization
    heatmap_data = heatmap_data.sort_index().sort_index(axis=1)

    plt.figure(figsize=(12, 10))
    sns.set(style="white")

    # Create the heatmap with annotations
    ax = sns.heatmap(
        heatmap_data,
        annot=True,
        fmt=".3f",
        cmap="viridis",
        cbar_kws={'label': 'Best F1 Score'},
        linewidths=.5,
        linecolor='gray'
    )

    # # Set the title with a larger font size
    # ax.set_title(title, fontsize=20, pad=20)

    # Set the x and y labels with larger font sizes
    ax.set_xlabel('Qmax', fontsize=20)
    ax.set_ylabel('YT', fontsize=20)

    # Set the tick labels with larger font sizes
    ax.tick_params(axis='x', labelsize=20)
    ax.tick_params(axis='y', labelsize=20)

    # Adjust colorbar (legend) font sizes
    cbar = ax.collections[0].colorbar
    cbar.ax.tick_params(labelsize=20)  # Colorbar tick labels
    cbar.ax.set_ylabel('Best F1 Score', fontsize=20, labelpad=20)  # Colorbar label

    # Adjust layout for better fit
    plt.tight_layout()

    # Save the figure
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Heatmap saved to '{output_path}'.")

def process_csv_file(csv_file, output_dir, title=None):
    """
    Processes a single CSV file to generate a heatmap with an optional custom title.

    Parameters:
        csv_file (str): Path to the CSV file.
        output_dir (str): Directory to save the heatmap PNG.
        title (str, optional): Custom title for the heatmap. Defaults to None.
    """
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file)
        print(f"Successfully read '{csv_file}'.")
        
        # Print the columns for debugging
        print("Columns in the DataFrame:")
        print(df.columns.tolist())

        # Define the required columns exactly as they appear in the CSV
        required_columns = {'Qmax', 'YT', 'Best_F1_Score'}

        # Check if required columns exist
        if not required_columns.issubset(df.columns):
            missing = required_columns - set(df.columns)
            print(f"Skipping '{csv_file}': Missing required columns: {missing}")
            return

        # Drop duplicates in case there are multiple entries for the same Qmax and YT
        df_unique = df.drop_duplicates(subset=['Qmax', 'YT'])

        # Handle missing values
        if df_unique['Best_F1_Score'].isnull().any():
            print(f"Warning: Missing Best_F1_Score values in '{csv_file}'. These will be shown as empty in the heatmap.")

        # Generate the heatmap title based on the filename if no custom title is provided
        # if title is None:
        #     base_name = os.path.splitext(os.path.basename(csv_file))[0]
        #     # title = f"Heatmap of Best F1 Score for {base_name}"

        # Define the output path for the heatmap PNG
        png_filename = f"MLP_GRID_{os.path.splitext(os.path.basename(csv_file))[0]}.png"
        output_path = os.path.join(output_dir, png_filename)

        # Plot and save the heatmap
        plot_heatmap(df_unique, output_path, title)

    except Exception as e:
        print(f"An error occurred while processing '{csv_file}': {e}")


In [3]:
# Define a list of input CSV file paths
input_csv_files = [
    # 'data/results/SUMMARY_PS.csv',
    # 'data/results/SUMMARY_PD.csv',
    'data/results/SUMMARY_GD.csv',
    # 'data/results/SUMMARY_GS.csv'
]

# Define a list of corresponding custom titles
custom_titles = [
    # "MLP - Population Scaled",
    # "MLP - Population Default",
    "MLP - Generational Default",
    # "MLP - Generational Scaled"
]

# Define the output directory for heatmaps
output_directory = 'heatmaps'

# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Iterate through each CSV file and its corresponding title
for csv_file, title in zip(input_csv_files, custom_titles):
    print(f"Processing '{csv_file}' with title '{title}'...")
    process_csv_file(csv_file, output_directory)


Processing 'data/results/SUMMARY_GD.csv' with title 'MLP - Generational Default'...
Successfully read 'data/results/SUMMARY_GD.csv'.
Columns in the DataFrame:
['Filename', 'Qmax', 'YT', 'Best_F1_Score', 'Worst_F1_Score', 'Avg_F1_Score', 'StdDev_F1_Score']
Heatmap saved to 'heatmaps/MLP_GRID_SUMMARY_GD.png'.
