In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
def plot_single_csv(folder_path, csv_file):
    '''
    Plots the validation loss for a specific CSV file.

        Parameters:
                folder_path (string): Path to the folder containing the CSV files
                csv_file (string): Name of the CSV file
    '''
    # Read the CSV file
    file_path = os.path.join(folder_path, csv_file)
    data = pd.read_csv(file_path)
    
    # Extract the epochs and validation loss as numpy arrays
    epochs = data['Epoch'].values
    validation_loss = data['Validation_Loss'].values
    seed = data["SEED"][0]
    learning_rate = data["AE_LEARNING_RATE"][0]
    batch_size = data["AE_BATCH_SIZE"][0]
    test_loss = data["Test_Loss"][0]
    
    # Plot the graph
    plt.plot(epochs, validation_loss)
    plt.xlabel('Epoch')
    plt.ylabel('Validation Loss')
    plt.title(f'Validation Loss - {seed} - {learning_rate} - {batch_size}')
    plt.suptitle(f'Test Loss: {test_loss}')
    plt.grid(True)
    plt.show()

In [None]:
def plot_specific_combination(folder_path, specific_lr, specific_bs):
    '''
    Plots the validation loss per seed & average for a combination of hyperparameters
    
        Parameters:
                folder_path (string): Path to the folder containing the CSV files
                specific_lr (float): learning rate
                specific_bs (int): batch size
    '''

    # Get all the files in the folder
    files = os.listdir(folder_path)
    # Filter out the CSV files
    csv_files = [file for file in files if file.endswith('.csv')]

    list_with_best_combinations = []
    for file in csv_files:
        # Read the CSV file
        file_path = os.path.join(folder_path, file)
        data = pd.read_csv(file_path)
        AE_learning_rate = data["AE_LEARNING_RATE"][0]
        AE_batch_size = data["AE_BATCH_SIZE"][0]
        AE_validation_loss = data["Validation_Loss"].values
        AE_seed = data["SEED"][0]
        AE_dataset_name = data["Dataset"][0]

        if AE_learning_rate == specific_lr and AE_batch_size == specific_bs:
            list_with_best_combinations.append((AE_seed, AE_validation_loss))


    list_with_best_combinations = sorted(list_with_best_combinations, key=lambda x:x[0])
    
    # Create a figure and axis
    fig, ax = plt.subplots()

    # Iterate through the seed-test_loss pairs
    for seed, test_loss in list_with_best_combinations:
        # Plot the test loss values for the current seed
        ax.plot(test_loss, label=f"Seed {seed}")
    
    # Average over all seeds
    # Extract the array from the tuple
    array_data = [arr for _, arr in list_with_best_combinations]
    # Calculate the average for each corresponding element in the arrays
    averages = np.mean(array_data, axis=0)
    ax.plot(averages, label=f"Average")

    # Add labels and title
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Validation Loss')
    ax.set_title(f'{AE_dataset_name}: Validation Loss - {specific_lr} - {specific_bs}')
    # Add legend
    ax.legend()

    # Display the graph
    plt.show()

In [None]:
def best_combination(folder_path, round_numbers):
    '''
    Returns the best combination of hyperparameters

        Parameters:
                folder_path (string): Path to the folder containing the CSV files
        Returns:
                best_learning_rate (float): Best learning rate
                best_batch_size (int): Best batch size
    '''
    # Get all the files in the folder
    files = os.listdir(folder_path)

    # Filter out the CSV files
    csv_files = [file for file in files if file.endswith('.csv')]

    # Define the combinations of learning_rate and batch_size
    learning_rates = [0.1, 0.01, 0.001, 0.0001, 0.0005]
    batch_sizes = [32, 64, 128, 256]

    # Store results as tuples in list
    results = []

    # Iterate through the combinations (group by learning_rate and batch_size)
    for lr in learning_rates:
        for bs in batch_sizes:
            # initialize an empty list to store test losses for each combination
            test_losses = []
            seeds = []

            # Iterate through the CSV files
            for file in csv_files:
                # Read the CSV file
                file_path = os.path.join(folder_path, file)
                data = pd.read_csv(file_path)
                AE_learning_rate = data["AE_LEARNING_RATE"][0]
                AE_batch_size = data["AE_BATCH_SIZE"][0]
                AE_test_loss = data["Test_Loss"][0]
                AE_dataset_name = data["Dataset"][0]
                AE_seed = data["SEED"][0]

                if AE_learning_rate == lr and AE_batch_size == bs:
                    test_losses.append(AE_test_loss)
                    seeds.append(AE_seed)
                    # plot_single_csv(folder_path, file)
            
            # Calculate the mean test loss
            if(len(test_losses) != 0):
                assert len(set(seeds)) == 5, "Number of unique seeds is not equal to 5 -> ERROR."
                assert len(test_losses) == 5, "Number of test losses is not equal to 5 -> ERROR."
                mean_test_loss = np.mean(test_losses)
                se_test_loss = stats.sem(test_losses)

                # Calculate the 95% confidence interval for test accuracy
                confidence_interval_loss = stats.t.interval(0.95, len(test_losses) - 1, loc=mean_test_loss, scale=se_test_loss)
                conf_interval_half_loss = abs(confidence_interval_loss[1] - confidence_interval_loss[0]) / 2

                assert mean_test_loss == (sum(test_losses)/len(test_losses)), "Mean test loss is not equal to sum of test losses divided by number of test losses -> ERROR."
                print(f"Learning Rate: {lr}, Batch Size: {bs} || Mean Test Loss: {mean_test_loss}, SE Test Loss: {se_test_loss}, 95% Confidence Interval: {confidence_interval_loss}, 95% Confidence Interval Half: {conf_interval_half_loss}")
                results.append((lr, bs, mean_test_loss, conf_interval_half_loss, confidence_interval_loss))

    # sort results by mean test loss
    sorted_results = sorted(results, key=lambda x:x[2])
    print(f"len(sorted_results) = {len(sorted_results)}")
    print(f"len(csv_files) = {len(csv_files)}")
    print("Sorted List:")
    print(sorted_results)
    print(f"Best Combination for {AE_dataset_name}:")
    best_learning_rate = sorted_results[0][0]
    best_batch_size = sorted_results[0][1]
    best_mean_test_loss = sorted_results[0][2]
    best_conf_interval_half_loss = sorted_results[0][3]

    if round_numbers:
        # round to 4 decimal places
        best_mean_test_loss = round(best_mean_test_loss, 4)
        best_conf_interval_half_loss = round(best_conf_interval_half_loss, 4)
    print(f"Learning Rate: {best_learning_rate}, Batch Size: {best_batch_size}, Mean Test Loss: {best_mean_test_loss}, 95% Confidence Interval Half: {best_conf_interval_half_loss}")
    print(100*"*")

    return best_learning_rate, best_batch_size


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc

import itertools

plt.rcParams["text.usetex"] = True
rc("font", **{"family": "serif", "serif": ["Computer Modern"]})
rc("text", usetex=True)

sns.set_style("whitegrid")
sns.set(font_scale=1.5)
sns.set(font="Computer Modern")


# Define the color palette for the models
model_color_palette = sns.color_palette("colorblind")
relevant_model_names = ["VQC_angle_list", "VQC_amplitude_list", "dressed_quantum_list", "sequent_quantum_list", "NN_with_compressed_input_list", "NN_with_original_input_list", "DQC_classical", "Sequent_classical"]

# Create a custom color dictionary to map each model name to its corresponding color
model_colors = dict(zip(relevant_model_names, model_color_palette))
print(model_colors)

sns.set(style="whitegrid", font="serif", font_scale=2)



def plot_specific_combination_2(folder_path, specific_lr, specific_bs):
    # Get all the files in the folder
    files = os.listdir(folder_path)
    # Filter out the CSV files
    csv_files = [file for file in files if file.endswith('.csv')]

    data_list = []
    for file in csv_files:
        # Read the CSV file
        file_path = os.path.join(folder_path, file)
        data = pd.read_csv(file_path)
        AE_learning_rate = data["AE_LEARNING_RATE"][0]
        AE_batch_size = data["AE_BATCH_SIZE"][0]
        AE_validation_loss = data["Validation_Loss"].values
        AE_seed = data["SEED"][0]
        AE_dataset_name = data["Dataset"][0]

        if AE_learning_rate == specific_lr and AE_batch_size == specific_bs:
            # Add multiple rows for each epoch and its corresponding validation loss
            for epoch, loss in enumerate(AE_validation_loss, start=1):
                data_list.append((AE_seed, epoch, loss))

    # Convert the list into a DataFrame
    df = pd.DataFrame(data_list, columns=["seed", "epoch", "validation loss"])

    # Calculate the average validation loss and standard deviation for each epoch across all seeds
    avg_and_std_per_epoch = df.groupby('epoch')['validation loss'].agg(['mean', 'std']).reset_index()

    # Create a line plot for the average validation loss with error band (standard deviation)
    plt.figure(figsize=(10, 6))  # Adjust the figure size as needed
    sns.lineplot(data=avg_and_std_per_epoch, x='epoch', y='mean', linewidth=2)
    plt.fill_between(avg_and_std_per_epoch['epoch'], avg_and_std_per_epoch['mean'] - avg_and_std_per_epoch['std'],
                     avg_and_std_per_epoch['mean'] + avg_and_std_per_epoch['std'], alpha=0.3)

    '''
    plt.xlim(left=0)#, right=max_epoch)
    plt.xlim(right=500)
    plt.ylim(bottom=0, top=0.06)
    '''
    plt.xlabel('Epoch')
    plt.ylabel('Validation Loss')
    plt.grid(True)
    plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def plot_specific_combination_smooth(folder_path, specific_lr, specific_bs):
    # Get all the files in the folder
    files = os.listdir(folder_path)
    # Filter out the CSV files
    csv_files = [file for file in files if file.endswith('.csv')]

    data_list = []
    for file in csv_files:
        # Read the CSV file
        file_path = os.path.join(folder_path, file)
        data = pd.read_csv(file_path)
        AE_learning_rate = data["AE_LEARNING_RATE"][0]
        AE_batch_size = data["AE_BATCH_SIZE"][0]
        AE_validation_loss = data["Validation_Loss"].values
        AE_seed = data["SEED"][0]
        AE_dataset_name = data["Dataset"][0]

        if AE_learning_rate == specific_lr and AE_batch_size == specific_bs:
            # Add multiple rows for each epoch and its corresponding validation loss
            for epoch, loss in enumerate(AE_validation_loss, start=1):
                data_list.append((AE_seed, epoch, loss))

    # Convert the list into a DataFrame
    df = pd.DataFrame(data_list, columns=["seed", "epoch", "validation loss"])

    avg_and_std_per_epoch = df.groupby('epoch')['validation loss'].agg(['mean', 'std']).reset_index()

    # Calculate smoothed validation loss using exponential moving average (EMA)
    alpha = 0.6
    avg_and_std_per_epoch['smoothed_mean'] = avg_and_std_per_epoch['mean'].ewm(alpha=alpha).mean()

    # Create the plot
    plt.figure(figsize=(10, 6))  # Adjust the figure size as needed

    # Plot the smoothed average validation loss with error band (standard deviation)
    sns.lineplot(x='epoch', y='smoothed_mean', data=avg_and_std_per_epoch, linewidth=2)
    plt.fill_between(avg_and_std_per_epoch['epoch'],
                     avg_and_std_per_epoch['smoothed_mean'] - avg_and_std_per_epoch['std'],
                     avg_and_std_per_epoch['smoothed_mean'] + avg_and_std_per_epoch['std'], alpha=0.3)

    plt.xlim(left=1)  # Start the x-axis from epoch 1
    plt.xlabel('Epoch')
    plt.ylabel('Average Validation Loss')
    plt.title('Smoothed Average Validation Loss with Standard Deviation over Epochs')
    plt.grid(True)
    plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams["text.usetex"] = True
rc("font", **{"family": "serif", "serif": ["Computer Modern"]})
rc("text", usetex=True)

# Define the color palette for the models
model_color_palette = sns.color_palette("colorblind")


sns.set(style="whitegrid", font="Computer Modern", font_scale=2)


def plot_specific_combination_smooth_mean_std(folder_path, specific_lr, specific_bs):
    # Get all the files in the folder
    files = os.listdir(folder_path)
    # Filter out the CSV files
    csv_files = [file for file in files if file.endswith('.csv')]

    data_list = []
    for file in csv_files:
        # Read the CSV file
        file_path = os.path.join(folder_path, file)
        data = pd.read_csv(file_path)
        AE_learning_rate = data["AE_LEARNING_RATE"][0]
        AE_batch_size = data["AE_BATCH_SIZE"][0]
        AE_validation_loss = data["Validation_Loss"].values
        AE_seed = data["SEED"][0]
        AE_dataset_name = data["Dataset"][0]

        if AE_learning_rate == specific_lr and AE_batch_size == specific_bs:
            # Add multiple rows for each epoch and its corresponding validation loss
            for epoch, loss in enumerate(AE_validation_loss, start=1):
                data_list.append((AE_seed, epoch, loss))

    # Convert the list into a DataFrame
    df = pd.DataFrame(data_list, columns=["seed", "epoch", "validation loss"])
    avg_and_std_per_epoch = df.groupby('epoch')['validation loss'].agg(['mean', 'std']).reset_index()

    alpha = 0.6
    # Calculate smoothed validation loss and standard deviation using exponential moving average (EMA)
    avg_and_std_per_epoch['smoothed_mean'] = avg_and_std_per_epoch['mean'].ewm(alpha=alpha).mean()
    avg_and_std_per_epoch['smoothed_std'] = avg_and_std_per_epoch['std'].ewm(alpha=alpha).mean()

    # Create the plot
    plt.figure(figsize=(10, 6))  # Adjust the figure size as needed

    # Plot the smoothed average validation loss with error band (standard deviation)
    sns.lineplot(x='epoch', y='smoothed_mean', data=avg_and_std_per_epoch, linewidth=1.5, palette=model_color_palette)
    plt.fill_between(avg_and_std_per_epoch['epoch'],
                     avg_and_std_per_epoch['smoothed_mean'] - avg_and_std_per_epoch['smoothed_std'],
                     avg_and_std_per_epoch['smoothed_mean'] + avg_and_std_per_epoch['smoothed_std'], alpha=0.3)

    plt.xlim(left = 0, right = 500)  
    # plt.ylim(bottom = 0, top = 0.06)
    plt.xlabel('Epoch')
    plt.ylabel('Validation Reconstruction Loss')
    plt.grid(True)
    plt.show()


In [None]:
folder_path_BanknoteAuthentication= os.path.join(os.getcwd(), "AE_Optimization", "Banknote")
folder_path_BreastCancer = os.path.join(os.getcwd(), "AE_Optimization", "Breast_Cancer")
folder_path_MNIST = os.path.join(os.getcwd(), "AE_Optimization", "MNIST")
folder_path_Audio = os.path.join(os.getcwd(), "AE_Optimization", "Audio_MNIST")


best_learning_rate_BanknoteAuthentication, best_batch_size_BanknoteAuthentication = best_combination(folder_path_BanknoteAuthentication, True)
best_learning_rate_BreastCancer, best_batch_size_BreastCancer = best_combination(folder_path_BreastCancer, True)
best_learning_rate_MNIST, best_batch_size_MNIST = best_combination(folder_path_MNIST, True)
best_learning_rate_Audio, best_batch_size_Audio = best_combination(folder_path_Audio, True)

print("Plot for best combination")
plot_specific_combination_smooth_mean_std(folder_path_BanknoteAuthentication, best_learning_rate_BanknoteAuthentication, best_batch_size_BanknoteAuthentication)
plot_specific_combination_smooth_mean_std(folder_path_BreastCancer, best_learning_rate_BreastCancer, best_batch_size_BreastCancer)
plot_specific_combination_smooth_mean_std(folder_path_MNIST, best_learning_rate_MNIST, best_batch_size_MNIST)
plot_specific_combination_smooth_mean_std(folder_path_Audio, best_learning_rate_Audio, best_batch_size_Audio)



"""
print("Plot for all combinations")
learning_rates = [0.1, 0.01, 0.001, 0.0001, 0.0005]
batch_sizes = [32, 64, 128, 256]

for lr in learning_rates:
    for bs in batch_sizes:
        plot_specific_combination(folder_path_Audio, lr, bs)
"""
