In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
def preprocess_data(file1, file2):
    # Load data
    fid_scores1 = pd.read_excel(file1)
    fid_scores2 = pd.read_excel(file2)

    # Reshape data
    fid_scores1.columns = fid_scores1.iloc[0]
    fid_scores1 = fid_scores1.drop(0)
    fid_scores2.columns = fid_scores2.iloc[0]
    fid_scores2 = fid_scores2.drop(0)
    fid_scores1_melted = fid_scores1.melt(id_vars=['Weight'], value_name='FID Score', var_name='Model')
    fid_scores2_melted = fid_scores2.melt(id_vars=['Weight'], value_name='FID Score', var_name='Model')
    fid_scores1_melted['Experiment'] = 'Exp 1.0'
    fid_scores2_melted['Experiment'] = 'Exp 2.0'
    combined_data = pd.concat([fid_scores1_melted, fid_scores2_melted])
    combined_data['FID Score'] = combined_data['FID Score'].astype(float)

    # Merge with training parameters
    training_params = {
        'LoRA model': ['ArcadeFacadeV2.1', 'ArcadeFacadeV2.2', 'ArcadeFacadeV2.3', 'ArcadeFacadeV2.4',
                       'ArcadeFacadeV2.5', 'ArcadeFacadeV2.6'],
        'Batch size': [3, 3, 3, 2, 2, 2],
        'Epoch': [20, 20, 20, 20, 20, 20],
        'Repeat': [6, 8, 6, 6, 10, 8],
        'Learning rate': [0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002],
        'Optimizer type': ['AdamW8bit', 'AdamW8bit', 'Lion', 'Lion', 'Lion', 'Lion']
    }
    params_df = pd.DataFrame(training_params)
    params_df['LoRA model'] = params_df['LoRA model'].str.replace("ArcadeFacade", "AcadeFacde")
    combined_data_with_params = combined_data.merge(params_df, left_on='Model', right_on='LoRA model', how='left').drop(
        columns='LoRA model')

    return combined_data_with_params

In [3]:
def plot_individual_violin_graphs(file1, file2):
    combined_data_with_params = preprocess_data(file1, file2)

    # Setting style and palette
    sns.set_style("whitegrid")
    palette_pastel = sns.color_palette("pastel")

    # Plot separate violin graphs for each training parameter
    parameters = ['Batch size', 'Repeat', 'Learning rate', 'Optimizer type']
    for param in parameters:
        plt.figure(figsize=(10, 8))
        sns.violinplot(data=combined_data_with_params, x=param, y='FID Score', hue='Experiment', split=True,
                       inner="quartile", palette=palette_pastel)
        plt.title(f"Distribution of FID Scores by {param}", fontsize=26)
        plt.ylabel("FID Score", fontsize=26)
        plt.xlabel(param, fontsize=26)

In [4]:
def plot_combined_violin_graphs(file1, file2):
    combined_data_with_params = preprocess_data(file1, file2)

    # Setting style and palette
    sns.set_style("whitegrid")
    palette_pastel = sns.color_palette("pastel")

    # Plot combined violin graphs
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 18), facecolor='white')
    axes = axes.ravel()
    parameters = ['Batch size', 'Repeat', 'Learning rate', 'Optimizer type']
    for i, param in enumerate(parameters):
        sns.violinplot(data=combined_data_with_params, x=param, y='FID Score', hue='Experiment', split=True,
                       inner="quartile", palette=palette_pastel, ax=axes[i])
        axes[i].set_title(f"Distribution of FID Scores by {param}", fontsize=26)
        axes[i].set_ylabel("FID Score", fontsize=26)
        axes[i].set_xlabel(param, fontsize=26)
        axes[i].legend(title='Experiment', fontsize=18, title_fontsize=18)
        axes[i].tick_params(labelsize=26)

        # Setting the facecolor to #f5f5f5 and adding a black frame around each subplot
        axes[i].set_facecolor('#f5f5f5')
        for spine in axes[i].spines.values():
            spine.set_visible(True)
            spine.set_color('black')

    plt.tight_layout()
    plt.show()

In [None]:
# To run the functions, just provide the paths to the two files:
plot_combined_violin_graphs("FID scores1.0.xlsx", "FID scores2.0.xlsx") # Replace with the path of the Experiment file

In [None]:
plot_individual_violin_graphs("FID scores1.0.xlsx", "FID scores2.0.xlsx")