# GRAMACY LEE BEST MSE HEATMAP

## runs were done seperaetly so need to combine them first before running here

In [1]:
# Cell: Generate Heatmap Using Minimum Best MSE

import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def generate_min_mse_heatmap(csv_file, output_dir, title=None):
    """
    Generates and saves a heatmap using the minimum best_MSE for each Qmax and YT.
    
    Parameters:
        csv_file (str): Path to the input CSV file.
        output_dir (str): Directory to save the heatmap PNG.
        title (str, optional): Custom title for the heatmap. Defaults to None.
    """
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file)
        print(f"Loaded CSV file: {csv_file}")

        # Check if required columns exist
        required_columns = {'Qmax', 'YT', 'best_MSE'}
        if not required_columns.issubset(df.columns):
            print(f"Error: CSV file '{csv_file}' is missing required columns: {required_columns - set(df.columns)}")
            return

        # Group by Qmax and YT, then select the minimum best_MSE for each group
        df_min = df.groupby(['Qmax', 'YT'], as_index=False)['best_MSE'].min()
        print("Computed minimum best_MSE for each Qmax and YT.")

        # Pivot the DataFrame to create a matrix for the heatmap
        heatmap_data = df_min.pivot(index='YT', columns='Qmax', values='best_MSE')

        # Sort the index and columns for better visualization
        heatmap_data = heatmap_data.sort_index().sort_index(axis=1)

        # # Define the heatmap title
        # if title is None:
        #     base_name = os.path.splitext(os.path.basename(csv_file))[0]
        #     title = f"Heatmap of Minimum Best MSE for {base_name}"

        # Create the heatmap
        plt.figure(figsize=(12, 10))
        sns.set(style="white")

        ax = sns.heatmap(
            heatmap_data,
            annot=True,
            fmt=".3f",
            cmap="viridis_r",
            cbar_kws={'label': 'Best MSE'},
            linewidths=.5,
            linecolor='gray'
        )

        # # Set the title with a larger font size
        # ax.set_title(title, fontsize=22, pad=20)

        # Set the x and y labels with larger font sizes
        ax.set_xlabel('Qmax', fontsize=20)
        ax.set_ylabel('YT', fontsize=20)

        # Set the tick labels with larger font sizes
        ax.tick_params(axis='x', labelsize=18)
        ax.tick_params(axis='y', labelsize=18)

        # Adjust colorbar (legend) font sizes
        cbar = ax.collections[0].colorbar
        cbar.ax.tick_params(labelsize=18)  # Colorbar tick labels
        cbar.ax.set_ylabel('MSE', fontsize=20, labelpad=15)  # Colorbar label

        # Adjust layout for better fit
        plt.tight_layout()

        # Ensure the output directory exists
        os.makedirs(output_dir, exist_ok=True)

        # Define the output path for the heatmap PNG
        png_filename = f"Gramacy Lee GRID {os.path.splitext(os.path.basename(csv_file))[0]}_min_heatmap.png"
        output_path = os.path.join(output_dir, png_filename)

        # Save the figure
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"Heatmap saved to '{output_path}'.")
    
    except FileNotFoundError:
        print(f"Error: The file '{csv_file}' does not exist.")
    except pd.errors.EmptyDataError:
        print(f"Error: The file '{csv_file}' is empty.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# ----------------------------
# Usage Example
# ----------------------------

# Define the output directory for heatmaps
heatmap_output_directory = 'output_heatmaps/'

# Ensure the output directory exists
os.makedirs(heatmap_output_directory, exist_ok=True)

# List of compiled CSV files to process
compiled_csv_files = [
    # 'data/combined/combined_PS.csv',
    # 'data/combined/combined_PD.csv',
    'data/combined/combined_GD.csv',
    # 'data/combined/combined_GS.csv'
]

# (Optional) Define custom titles for each heatmap
heatmap_titles = {
    # 'combined_PS.csv': 'Gramacy Lee - Population Scaled',
    # 'combined_PD.csv': 'Gramacy Lee - Population Default',
    'combined_GD.csv': 'Gramacy Lee - Generational Default',
    # 'combined_GS.csv': 'Gramacy Lee - Generational Scaled'
}

# Iterate over each CSV file and generate the corresponding heatmap
for csv_file in compiled_csv_files:
    if os.path.isfile(csv_file):
        base_filename = os.path.basename(csv_file)
        generate_min_mse_heatmap(csv_file, heatmap_output_directory)
    else:
        print(f"CSV file '{csv_file}' does not exist. Skipping.")

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


Loaded CSV file: data/combined/combined_GD.csv
Computed minimum best_MSE for each Qmax and YT.
Heatmap saved to 'output_heatmaps/Gramacy Lee GRID combined_GD_min_heatmap.png'.


# update subscript

In [2]:
# Cell: Generate Heatmap Using Minimum Best MSE

import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def generate_min_mse_heatmap(csv_file, output_dir, title=None):
    """
    Generates and saves a heatmap using the minimum best_MSE for each Qmax and YT.
    
    Parameters:
        csv_file (str): Path to the input CSV file.
        output_dir (str): Directory to save the heatmap PNG.
        title (str, optional): Custom title for the heatmap. Defaults to None.
    """
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file)
        print(f"Loaded CSV file: {csv_file}")

        # Check if required columns exist
        required_columns = {'Qmax', 'YT', 'best_MSE'}
        if not required_columns.issubset(df.columns):
            print(f"Error: CSV file '{csv_file}' is missing required columns: {required_columns - set(df.columns)}")
            return

        # Group by Qmax and YT, then select the minimum best_MSE for each group
        df_min = df.groupby(['Qmax', 'YT'], as_index=False)['best_MSE'].min()
        print("Computed minimum best_MSE for each Qmax and YT.")

        # Pivot the DataFrame to create a matrix for the heatmap
        heatmap_data = df_min.pivot(index='YT', columns='Qmax', values='best_MSE')

        # Sort the index and columns for better visualization
        heatmap_data = heatmap_data.sort_index().sort_index(axis=1)

        # # Define the heatmap title
        # if title is None:
        #     base_name = os.path.splitext(os.path.basename(csv_file))[0]
        #     title = f"Heatmap of Minimum Best MSE for {base_name}"

        # Create the heatmap
        plt.figure(figsize=(12, 10))
        sns.set(style="white")

        ax = sns.heatmap(
            heatmap_data,
            annot=True,
            fmt=".3f",
            cmap="viridis_r",
            cbar_kws={'label': 'Best MSE'},
            linewidths=.5,
            linecolor='gray'
        )

        # # Set the title with a larger font size
        # ax.set_title(title, fontsize=22, pad=20)

        # **Updated Axis Labels with Subscripts**
        ax.set_xlabel(r'$Q_{\mathrm{max}}$', fontsize=20)  # X-axis label with subscript
        ax.set_ylabel(r'$y_t$', fontsize=20)                # Y-axis label with subscript

        # Set the tick labels with larger font sizes
        ax.tick_params(axis='x', labelsize=18)
        ax.tick_params(axis='y', labelsize=18)

        # Adjust colorbar (legend) font sizes
        cbar = ax.collections[0].colorbar
        cbar.ax.tick_params(labelsize=18)  # Colorbar tick labels
        cbar.ax.set_ylabel('MSE', fontsize=20, labelpad=15)  # Colorbar label

        # Adjust layout for better fit
        plt.tight_layout()

        # Ensure the output directory exists
        os.makedirs(output_dir, exist_ok=True)

        # Define the output path for the heatmap PNG
        png_filename = f"Gramacy Lee GRID {os.path.splitext(os.path.basename(csv_file))[0]}_min_heatmap.png"
        output_path = os.path.join(output_dir, png_filename)

        # Save the figure
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"Heatmap saved to '{output_path}'.")
    
    except FileNotFoundError:
        print(f"Error: The file '{csv_file}' does not exist.")
    except pd.errors.EmptyDataError:
        print(f"Error: The file '{csv_file}' is empty.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# ----------------------------
# Usage Example
# ----------------------------

# Define the output directory for heatmaps
heatmap_output_directory = 'output_heatmaps/'

# Ensure the output directory exists
os.makedirs(heatmap_output_directory, exist_ok=True)

# List of compiled CSV files to process
compiled_csv_files = [
    # 'data/combined/combined_PS.csv',
    # 'data/combined/combined_PD.csv',
    'data/combined/combined_GD.csv',
    # 'data/combined/combined_GS.csv'
]

# (Optional) Define custom titles for each heatmap
heatmap_titles = {
    # 'combined_PS.csv': 'Gramacy Lee - Population Scaled',
    # 'combined_PD.csv': 'Gramacy Lee - Population Default',
    'combined_GD.csv': 'Gramacy Lee - Generational Default',
    # 'combined_GS.csv': 'Gramacy Lee - Generational Scaled'
}

# Iterate over each CSV file and generate the corresponding heatmap
for csv_file in compiled_csv_files:
    if os.path.isfile(csv_file):
        base_filename = os.path.basename(csv_file)
        generate_min_mse_heatmap(csv_file, heatmap_output_directory)
    else:
        print(f"CSV file '{csv_file}' does not exist. Skipping.")


Loaded CSV file: data/combined/combined_GD.csv
Computed minimum best_MSE for each Qmax and YT.
Heatmap saved to 'output_heatmaps/Gramacy Lee GRID combined_GD_min_heatmap.png'.
