In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
from  matplotlib.ticker import FuncFormatter
import numpy as np

In [None]:
def plot(dataset, model1, model2):
    dataset = 'pretrained_finetuned/' + dataset + '/'
    directory = '../plots/' + dataset

    df1 = pd.read_csv('../results/' + dataset + model1 + '.csv')
    df2 = pd.read_csv('../results/' + dataset + model2 + '.csv')

    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5), sharey=True)
    # kdeplot for mean and median distribution for Model column
    df_pretrained = df1
    df_finetuned = df2
    sns.kdeplot(df_pretrained['mean'], ax=ax[0], label=model1)
    sns.kdeplot(df_finetuned['mean'], ax=ax[0], label=model2)
    ax[0].set_title('Mean')
    ax[0].set_xlabel('Token position')
    ax[0].set_ylabel('Density')
    ax[0].legend()

    sns.kdeplot(df_pretrained['median'], ax=ax[1], label=model1)
    sns.kdeplot(df_finetuned['median'], ax=ax[1], label=model2)
    # Ensure the directory exists
    os.makedirs(directory + model1, exist_ok=True)

    plt.savefig(directory + model1 + '/statistics.pdf', dpi=300, bbox_inches='tight')
    plt.show()




    fontsize = 12
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13, 5), sharey=True)


    count_pretrained = {}
    for i in range(0, len(df_pretrained.columns)-7):
        count_pretrained[i] = df_pretrained[str(i)].sum()/len(df_pretrained)

    # compute cumulative sum and normalize to get values between 0 and 1
    cumulative_pretrained = np.cumsum(list(count_pretrained.values())) / np.sum(list(count_pretrained.values()))

    # plot the bar plot for pretrained model
    sns.barplot(x=list(count_pretrained.keys()), y=list(count_pretrained.values()), ax=ax[0])
    ax[0].set_title(model1.replace('meta-llama_','').replace('mistralai_',''), fontsize=fontsize+2)
    ax[0].set_xlabel('Label position', fontsize=fontsize)
    ax[0].set_ylabel('Accuracy', fontsize=fontsize)

    # create secondary y-axis for the cumulative distribution
    ax2_pretrained = ax[0].twinx()

    # plot the cumulative distribution line (normalized) on secondary y-axis
    ax2_pretrained.plot(list(count_pretrained.keys()), cumulative_pretrained, color='red', marker='', linestyle='-', linewidth=2)

    # Synchronize ticks for better readability
    ax2_pretrained.tick_params(axis='y', labelcolor='red')
    ax2_pretrained.set_yticklabels([]) 


    count_finetuned = {}
    for i in range(0, len(df_pretrained.columns)-7):
        count_finetuned[i] = df_finetuned[str(i)].sum()/len(df_finetuned)

    # compute cumulative sum and normalize for finetuned model
    cumulative_finetuned = np.cumsum(list(count_finetuned.values())) / np.sum(list(count_finetuned.values()))

    # plot the bar plot for finetuned model
    sns.barplot(x=list(count_finetuned.keys()), y=list(count_finetuned.values()), ax=ax[1])
    ax[1].set_title(model2.replace('meta-llama_','').replace('mistralai_',''), fontsize=fontsize+2)
    ax[1].set_xlabel('Label position', fontsize=fontsize)
    #ax[1].set_ylabel('Accuracy', fontsize=fontsize)

    # create secondary y-axis for the cumulative distribution
    ax2_finetuned = ax[1].twinx()

    # plot the cumulative distribution line (normalized) on secondary y-axis
    ax2_finetuned.plot(list(count_finetuned.keys()), cumulative_finetuned, color='red', marker='', linestyle='-', linewidth=2)
    ax2_finetuned.set_ylabel('Cumulative distribution', color='red', fontsize=fontsize)
    ax2_finetuned.set_ylim(0, 1)  # Set the limit of the y-axis to [0, 1]

    # Synchronize ticks for better readability
    ax2_finetuned.tick_params(axis='y', labelcolor='red')

    # set x labels from 0 to len(df_pretrained.columns)-7 with step 10
    plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
    pretrained_len = len(df_pretrained.columns)-7+1
    finetuned_len = len(df_finetuned.columns)-7+1
    ax[0].set_xticks(range(0, pretrained_len, 15))
    ax[0].set_xticklabels(range(0, pretrained_len, 15), fontsize=fontsize)
    ax[1].set_xticks(range(0, finetuned_len, 15))
    ax[1].set_xticklabels(range(0, finetuned_len, 15), fontsize=fontsize)
    y_labels = np.arange(0, 1.1, 0.2)
    y_labels = np.round(y_labels, 2)
    ax[0].set_yticks(y_labels)
    ax[0].set_yticklabels(y_labels, fontsize=fontsize)
    ax[1].set_yticks(y_labels)
    ax[1].set_yticklabels(y_labels, fontsize=fontsize)

    # adjust the space between subplots
    plt.subplots_adjust(wspace=0.1)

    # save the plot
    plt.savefig(directory + model1 + '/cumulative_distribution.pdf', dpi=300, bbox_inches='tight')

    plt.show()







    fontsize = 12
    _, ax = plt.subplots(figsize=(7, 5))

    count_finetuned = {}
    for i in range(0, len(df_finetuned.columns)-7):
        count_finetuned[i] = df_finetuned[str(i)].sum()/len(df_finetuned)

    # compute cumulative sum and normalize for finetuned model
    cumulative_finetuned = np.cumsum(list(count_finetuned.values())) / np.sum(list(count_finetuned.values()))

    # plot the bar plot for finetuned model
    sns.barplot(x=list(count_finetuned.keys()), y=list(count_finetuned.values()), ax=ax)
    ax.set_title(dataset.replace('_subset', '').replace('/', '').replace('pretrained_finetuned', '').replace('77', '').replace('64', '').replace('150', ''), fontsize=fontsize+2)
    ax.set_xlabel('Label position', fontsize=fontsize)
    ax.set_ylabel('Accuracy', fontsize=fontsize)

    # create secondary y-axis for the cumulative distribution
    ax2_finetuned = ax.twinx()

    # plot the cumulative distribution line (normalized) on secondary y-axis
    ax2_finetuned.plot(list(count_finetuned.keys()), cumulative_finetuned, color='red', marker='', linestyle='-', linewidth=2)
    ax2_finetuned.set_ylabel('Cumulative distribution', color='red', fontsize=fontsize)
    ax2_finetuned.set_ylim(0, 1)  # Set the limit of the y-axis to [0, 1]

    # Synchronize ticks for better readability
    ax2_finetuned.tick_params(axis='y', labelcolor='red')

    # set x labels from 0 to len(df_pretrained.columns)-7 with step 10
    plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
    finetuned_len = len(df_finetuned.columns)-7+1
    ax.set_xticks(range(0, finetuned_len, 15))
    ax.set_xticklabels(range(0, finetuned_len, 15), fontsize=fontsize)
    y_labels = np.arange(0, 1.1, 0.2)
    y_labels = np.round(y_labels, 2)
    ax.set_yticks(y_labels)
    ax.set_yticklabels(y_labels, fontsize=fontsize)

    # save the plot
    plt.savefig(directory + model1 + '/cumulative_distribution_dataset.pdf', dpi=300, bbox_inches='tight')

    plt.show()








    # plot the distribution of the columns 0 to 77
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))
    # plot for 0 to 77 columns for pretrained model
    df_pretrained = df1
    # create a dictionary with the column name and the count of the column
    count_pretrained = {}
    for i in range(0, len(df_pretrained.columns)-7):
        count_pretrained[i] = df_pretrained[str(i)].sum()/len(df_pretrained)
    # plot for 0 to 77 columns for finetuned model
    df_finetuned = df2
    # create a dictionary with the column name and the count of the column
    count_finetuned = {}
    for i in range(0, len(df_pretrained.columns)-7):
        count_finetuned[i] = df_finetuned[str(i)].sum()/len(df_finetuned)
    # plot the dictionary
    sns.lineplot(data=count_pretrained, ax=ax, label=model1)
    sns.lineplot(data=count_finetuned, ax=ax, label=model2)
    ax.set_xlabel('Label position')
    ax.set_ylabel('Accuracy')

    # set x labels from 0 to 77 every 5
    ax.set_xticks(range(0, len(df_pretrained.columns)-7, 10))
    ax.set_yticks(np.arange(0, 1.1, 0.2))

    ax.legend()

    # save the plot
    plt.savefig(directory + model1 + '/distribution.pdf', dpi=300, bbox_inches='tight')

    plt.show()


In [None]:
models = ['meta-llama_Llama-2-7b-chat-hf', 'meta-llama_Llama-2-13b-chat-hf', 'meta-llama_Meta-Llama-3-8B-Instruct', 'mistralai_Mistral-7B-Instruct-v0.3']
datasets = ['CLINC150_subset', 'BANKING77', 'HWU64']

for model in models:
    for dataset in datasets:
        plot(dataset, model.replace('-chat', '').replace('-Instruct', ''), model)

In [None]:
def plot_dataset(dataset1, dataset2, dataset3, model):

    fontsize = 14
    _, ax = plt.subplots(nrows=1, ncols=3, figsize=(7.5, 3), sharey=True)

    # datasets: CLINC150_subset, BANKING77, HWU64
    path = '../plots/pretrained_finetuned/datasets.pdf'

    df1 = pd.read_csv('../results/pretrained_finetuned/' + dataset1 + '/' + model + '.csv')
    df2 = pd.read_csv('../results/pretrained_finetuned/' + dataset2 + '/' + model + '.csv')
    df3 = pd.read_csv('../results/pretrained_finetuned/' + dataset3 + '/' + model + '.csv')

    dataset1 = dataset1.replace('_subset', '').replace('77', '').replace('64', '').replace('150', '')
    dataset2 = dataset2.replace('_subset', '').replace('77', '').replace('64', '').replace('150', '')
    dataset3 = dataset3.replace('_subset', '').replace('77', '').replace('64', '').replace('150', '')

    count_df1 = {}
    for i in range(0, len(df1.columns)-7):
        count_df1[i] = df1[str(i)].sum()/len(df1)

    # compute cumulative sum and normalize to get values between 0 and 1
    cumulative_df1 = np.cumsum(list(count_df1.values())) / np.sum(list(count_df1.values()))

    # plot the bar plot for df1 model
    sns.barplot(x=list(count_df1.keys()), y=list(count_df1.values()), ax=ax[0])
    ax[0].set_title(dataset1, fontsize=fontsize+2)
    ax[0].set_xlabel('Label position', fontsize=fontsize)
    ax[0].set_ylabel('Accuracy', fontsize=fontsize)

    # create secondary y-axis for the cumulative distribution
    ax2_df1 = ax[0].twinx()

    # plot the cumulative distribution line (normalized) on secondary y-axis
    ax2_df1.plot(list(count_df1.keys()), cumulative_df1, color='red', marker='', linestyle='-', linewidth=2)

    # Synchronize ticks for better readability
    ax2_df1.tick_params(axis='y', labelcolor='red')
    ax2_df1.set_yticklabels([]) 




    count_df2 = {}
    for i in range(0, len(df2.columns)-7):
        count_df2[i] = df2[str(i)].sum()/len(df2)

    # compute cumulative sum and normalize for df2 model
    cumulative_df2 = np.cumsum(list(count_df2.values())) / np.sum(list(count_df2.values()))

    # plot the bar plot for df2 model
    sns.barplot(x=list(count_df2.keys()), y=list(count_df2.values()), ax=ax[1])
    ax[1].set_title(dataset2, fontsize=fontsize+2)
    ax[1].set_xlabel('Label position', fontsize=fontsize)
    ax[1].set_ylabel('Accuracy', fontsize=fontsize)

    # create secondary y-axis for the cumulative distribution
    ax2_df2 = ax[1].twinx()

    # plot the cumulative distribution line (normalized) on secondary y-axis
    ax2_df2.plot(list(count_df2.keys()), cumulative_df2, color='red', marker='', linestyle='-', linewidth=2)

    # Synchronize ticks for better readability
    ax2_df2.tick_params(axis='y', labelcolor='red')
    ax2_df2.set_yticklabels([]) 





    count_df3 = {}
    for i in range(0, len(df3.columns)-7):
        count_df3[i] = df3[str(i)].sum()/len(df3)

    # compute cumulative sum and normalize for df3 model
    cumulative_df3 = np.cumsum(list(count_df3.values())) / np.sum(list(count_df3.values()))

    # plot the bar plot for df3 model
    sns.barplot(x=list(count_df3.keys()), y=list(count_df3.values()), ax=ax[2])
    ax[2].set_title(dataset3, fontsize=fontsize+2)
    ax[2].set_xlabel('Label position', fontsize=fontsize)
    ax[2].set_ylabel('Accuracy', fontsize=fontsize)

    # create secondary y-axis for the cumulative distribution
    ax2_df3 = ax[2].twinx()

    # plot the cumulative distribution line (normalized) on secondary y-axis
    ax2_df3.plot(list(count_df3.keys()), cumulative_df3, color='red', marker='', linestyle='-', linewidth=2)
    ax2_df3.set_ylabel('Cumulative distribution', color='red', fontsize=fontsize)
    ax2_df3.set_ylim(0, 1)  # Set the limit of the y-axis to [0, 1]

    # Synchronize ticks for better readability
    ax2_df3.tick_params(axis='y', labelcolor='red')





    # set x labels from 0 to len(df_pretrained.columns)-7 with step 10
    plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
    df1_len = len(df1.columns)-7+1
    df2_len = len(df2.columns)-7+1
    df3_len = len(df3.columns)-7+1
    ax[0].set_xticks(range(0, df1_len, 50))
    ax[0].set_xticklabels(range(0, df1_len, 50), fontsize=fontsize)
    ax[1].set_xticks(range(0, df2_len, 50))
    ax[1].set_xticklabels(range(0, df2_len, 50), fontsize=fontsize)
    ax[2].set_xticks(range(0, df3_len, 50))
    ax[2].set_xticklabels(range(0, df3_len, 50), fontsize=fontsize)
    y_labels = np.arange(0, 1.1, 0.2)
    y_labels = np.round(y_labels, 2)
    ax[0].set_yticks(y_labels)
    ax[0].set_yticklabels(y_labels, fontsize=fontsize)
    ax[1].set_yticks(y_labels)
    ax[1].set_yticklabels(y_labels, fontsize=fontsize)
    ax[2].set_yticks(y_labels)
    ax[2].set_yticklabels(y_labels, fontsize=fontsize)

    # adjust the space between subplots
    plt.subplots_adjust(wspace=0.1)

    # save the plot
    plt.savefig(path, dpi=300, bbox_inches='tight')

    plt.show()

In [None]:
model = 'meta-llama_Meta-Llama-3-8B-Instruct'
datasets = ['HWU64', 'BANKING77', 'CLINC150_subset']

plot_dataset(datasets[0], datasets[1], datasets[2], model)

In [None]:
def plot_all(dataset, model1, model2, model3, model4, finetuned_model1, finetuned_model2, finetuned_model3, finetuned_model4):
    dataset = 'pretrained_finetuned/' + dataset + '/'
    directory = '../plots/' + dataset

    df1 = pd.read_csv('../results/' + dataset + model1 + '.csv')
    df2 = pd.read_csv('../results/' + dataset + model2 + '.csv')
    df3 = pd.read_csv('../results/' + dataset + model3 + '.csv')
    df4 = pd.read_csv('../results/' + dataset + model4 + '.csv')

    df_finetuned1 = pd.read_csv('../results/' + dataset + finetuned_model1 + '.csv')
    df_finetuned2 = pd.read_csv('../results/' + dataset + finetuned_model2 + '.csv')
    df_finetuned3 = pd.read_csv('../results/' + dataset + finetuned_model3 + '.csv')
    df_finetuned4 = pd.read_csv('../results/' + dataset + finetuned_model4 + '.csv')

    fontsize = 9
    fig, ax = plt.subplots(nrows=4, ncols=2, figsize=(4, 8), sharey=True)

    for z in range(4):
        count_pretrained = {}
        df_pretrained = df4 if z == 3 else df1 if z == 0 else df2 if z == 1 else df3
        model = model4 if z == 3 else model1 if z == 0 else model2 if z == 1 else model3
        df_finetuned = df_finetuned4 if z == 3 else df_finetuned1 if z == 0 else df_finetuned2 if z == 1 else df_finetuned3
        finetuned_model = finetuned_model4 if z == 3 else finetuned_model1 if z == 0 else finetuned_model2 if z == 1 else finetuned_model3
        for i in range(0, len(df_pretrained.columns)-7):
            count_pretrained[i] = df_pretrained[str(i)].sum()/len(df_pretrained)
        cumulative_pretrained = np.cumsum(list(count_pretrained.values())) / np.sum(list(count_pretrained.values()))
        sns.barplot(x=list(count_pretrained.keys()), y=list(count_pretrained.values()), ax=ax[z][0])
        ax[z][0].set_title(model.replace('meta-llama_','').replace('mistralai_','').replace('-hf','').replace('-v0.3',''), fontsize=fontsize)
        ax[z][0].set_xlabel('Label position', fontsize=fontsize) if z == 3 else ax[z][0].set_xlabel('')
        ax[z][0].set_ylabel('Accuracy', fontsize=fontsize)
        ax2_pretrained = ax[z][0].twinx()
        ax2_pretrained.plot(list(count_pretrained.keys()), cumulative_pretrained, color='red', marker='', linestyle='-', linewidth=2)
        ax2_pretrained.tick_params(axis='y', labelcolor='red')
        ax2_pretrained.set_yticklabels([])

        count_finetuned = {}
        for i in range(0, len(df_pretrained.columns)-7):
            count_finetuned[i] = df_finetuned[str(i)].sum()/len(df_finetuned)
        cumulative_finetuned = np.cumsum(list(count_finetuned.values())) / np.sum(list(count_finetuned.values()))
        sns.barplot(x=list(count_finetuned.keys()), y=list(count_finetuned.values()), ax=ax[z][1])
        ax[z][1].set_title(finetuned_model.replace('meta-llama_','').replace('mistralai_','').replace('-hf','').replace('-v0.3',''), fontsize=fontsize)
        ax[z][1].set_xlabel('Label position', fontsize=fontsize) if z == 3 else ax[z][1].set_xlabel('')
        ax2_finetuned = ax[z][1].twinx()
        ax2_finetuned.plot(list(count_finetuned.keys()), cumulative_finetuned, color='red', marker='', linestyle='-', linewidth=2)
        ax2_finetuned.set_ylabel('Cumulative distribution', color='red', fontsize=fontsize-1)
        y_labels = np.arange(0, 1.1, 0.25)
        y_labels = np.round(y_labels, 2)
        ax2_finetuned.set_yticks(y_labels)
        ax2_finetuned.set_yticklabels(y_labels, fontsize=fontsize)
        ax2_finetuned.tick_params(axis='y', labelcolor='red')
        plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
        pretrained_len = len(df_pretrained.columns)-7+1
        finetuned_len = len(df_finetuned.columns)-7+1
        ax[z][0].set_xticks(range(0, pretrained_len, 30))
        ax[z][0].set_xticklabels(range(0, pretrained_len, 30), fontsize=fontsize) if z == 3 else ax[z][0].set_xticklabels([])
        ax[z][1].set_xticks(range(0, finetuned_len, 30))
        ax[z][1].set_xticklabels(range(0, finetuned_len, 30), fontsize=fontsize) if z == 3 else ax[z][1].set_xticklabels([])
        y_labels = np.arange(0, 1.1, 0.25)
        y_labels = np.round(y_labels, 2)
        ax[z][0].set_yticks(y_labels)
        ax[z][0].set_yticklabels(y_labels, fontsize=fontsize)
        ax[z][1].set_yticks(y_labels)
        ax[z][1].set_yticklabels(y_labels, fontsize=fontsize)

    # adjust the space between subplots
    plt.subplots_adjust(wspace=0.1, hspace=0.3)

    # save the plot
    plt.savefig(directory + '/cumulative_distribution.pdf', dpi=300, bbox_inches='tight')

    plt.show()

models = ['meta-llama_Llama-2-7b-chat-hf', 'meta-llama_Llama-2-13b-chat-hf', 'meta-llama_Meta-Llama-3-8B-Instruct', 'mistralai_Mistral-7B-Instruct-v0.3']
datasets = ['CLINC150_subset', 'BANKING77', 'HWU64']

for dataset in datasets:
    plot_all(dataset, models[0].replace('-chat', '').replace('-Instruct', ''), models[1].replace('-chat', '').replace('-Instruct', ''), models[2].replace('-chat', '').replace('-Instruct', ''), models[3].replace('-chat', '').replace('-Instruct', ''), models[0], models[1], models[2], models[3])