In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Providing the full code, which the user can run by just providing the two file paths:

def generate_full_plots(file_path_1, file_path_2):
    # Load the data
    data1 = pd.read_excel(file_path_1)
    data2 = pd.read_excel(file_path_2)

    # Combine the data from the two experiments
    data1['Experiment'] = '1.0'
    data2['Experiment'] = '2.0'
    combined_data_all = pd.concat([data1, data2])

    # Defining the necessary variables and reprocessing the data
    models = ['ArcadeFacadeV2.1', 'ArcadeFacadeV2.2', 'ArcadeFacadeV2.3', 'ArcadeFacadeV2.4', 'ArcadeFacadeV2.5',
              'ArcadeFacadeV2.6']
    weights = ['Weight 1', 'Weight 2', 'Weight 3', 'Weight 4', 'Weight 5']
    parameters = ['Batch size', 'Repeat', 'Learning rate', 'Optimizer type']
    param_mapping = {
        'ArcadeFacadeV2.1': {'Batch size': 3, 'Learning rate': 0.0001, 'Optimizer type': 'AdamW8bit', 'Repeat': 6},
        'ArcadeFacadeV2.2': {'Batch size': 3, 'Learning rate': 0.0001, 'Optimizer type': 'AdamW8bit', 'Repeat': 8},
        'ArcadeFacadeV2.3': {'Batch size': 3, 'Learning rate': 0.0001, 'Optimizer type': 'Lion', 'Repeat': 6},
        'ArcadeFacadeV2.4': {'Batch size': 2, 'Learning rate': 0.0002, 'Optimizer type': 'Lion', 'Repeat': 6},
        'ArcadeFacadeV2.5': {'Batch size': 2, 'Learning rate': 0.0002, 'Optimizer type': 'Lion', 'Repeat': 10},
        'ArcadeFacadeV2.6': {'Batch size': 2, 'Learning rate': 0.0002, 'Optimizer type': 'Lion', 'Repeat': 8}
    }

    # Reformatting the data
    all_data = []
    for model in models:
        start_col = combined_data_all.columns.get_loc(model)
        end_col = start_col + 5
        model_data = combined_data_all.iloc[:, start_col:end_col]
        model_data.columns = weights
        model_data['Model'] = model
        model_data['Experiment'] = combined_data_all['Experiment']
        all_data.append(model_data)
    formatted_data = pd.concat(all_data)

    # Melting the data for plotting
    melted_data = formatted_data.melt(id_vars=['Model', 'Experiment'], value_vars=weights, var_name='Weight',
                                      value_name='CLIPScore')

    # Mapping training parameters to the data
    for param in parameters:
        melted_data[param] = melted_data['Model'].map(lambda x: param_mapping[x][param])

    # Define a new comfortable color palette
    palette_comfortable = sns.color_palette("pastel")
    
    # Generate the combined plot
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(22, 20), facecolor='white')
    axes = axes.ravel()

    for i, param in enumerate(parameters):
        sns.violinplot(x=param, y='CLIPScore', hue='Experiment', data=melted_data, split=True, inner="quartile",
                       palette=palette_comfortable, ax=axes[i])
        axes[i].set_title(f"Distribution of CLIPScores by {param}", fontsize=28)
        axes[i].set_ylabel("CLIPScore", fontsize=28)
        axes[i].set_xlabel(param, fontsize=28)
        axes[i].legend(title='Experiment', fontsize=18, title_fontsize=18)
        axes[i].tick_params(labelsize=28)

        # Setting the facecolor to white and adding a black frame around each subplot
        axes[i].set_facecolor('white')
        for spine in axes[i].spines.values():
            spine.set_visible(True)
            spine.set_color('black')

    plt.tight_layout()
    plt.show()

    # Generate individual plots
    '''
    for param in parameters:
        plt.figure(figsize=(15, 12))
        sns.violinplot(x=param, y='CLIPScore', hue='Experiment', data=melted_data, split=True, inner="quartile",
                       palette=palette_comfortable)
        plt.title(f"Distribution of CLIPScores by {param}", fontsize=30)
        plt.ylabel("CLIPScore", fontsize=30)
        plt.xlabel(param, fontsize=30)
        plt.legend(title='Experiment', fontsize=20, title_fontsize=20)
        plt.tick_params(labelsize=30)
        plt.tight_layout()
        plt.show()
    '''

In [None]:
# Commenting out the execution part since we already visualized the plots
generate_full_plots("CLIPScores_ALL_1.0.xlsx", "CLIPScores_ALL_2.0.xlsx") # Replace with the path of the Experiment file