In [3]:
from IPython.display import display, HTML

import pandas as pd
from os import listdir
from os.path import isfile, join
from pprint import pprint
import json

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import gridspec
from matplotlib.font_manager import FontProperties
import numpy as np


sns.set(style="ticks")
plt.rcParams['axes.facecolor']='white'
task_order = ['Length',  'WordContent', 'Depth', 'TopConstituents', 'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber', 'OddManOut', 'CoordinationInversion']
model_order = ['bert-base-uncased', 'bert-large-uncased', 'openai-gpt', 'gpt2', 'transfo-xl-wt103']
dict_task = {0:'Length',  1:'WordContent', 2:'Depth', 3:'TopConstituents', 4:'BigramShift', 5:'Tense', 6:'SubjNumber', 7:'ObjNumber', 8:'OddManOut', 9:'CoordinationInversion'}

def get_results(dir_path='./results/mlp_results'):
    columns = ['data_path', 'cache_path', 'result_path', 'batch_size', 'cbatch_size', 'nhid', 'optim', 'kfold', 'tenacity', 'usepytorch', 'epoch_size', 'device']
    filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if '.json' in f]
    list_result = []
    for filename in filenames:
        with open(join(dir_path, filename), 'r') as infile:
#             print(filename)
            results = json.load(infile)
            for key, result in results.items():
                list_result.append(result)
                
    df = pd.DataFrame(list_result)[['acc', 'head', 'layer', 'task', 'model_name']]
    
    for column in columns:
        try:
            df = df.drop(columns=column)
        except:
            pass

    return df

def get_multi_head_results(dir_path='./top_head_wise_results'):
    columns = ['data_path', 'cache_path', 'result_path', 'batch_size', 'cbatch_size', 'nhid', 'optim', 'kfold', 'tenacity', 'usepytorch', 'epoch_size', 'device']
    filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if '.json' in f]
    list_result = []
    for filename in filenames:
        with open(join(dir_path, filename), 'r') as infile:
#             print(filename)
            results = json.load(infile)
            for key, result in results.items():
                list_result.append(result)
                
    df = pd.DataFrame(list_result)[['acc', 'num_head', 'task', 'model_name']]
    
    for column in columns:
        try:
            df = df.drop(columns=column)
        except:
            pass

    return df

In [4]:
# Find last layer performance 
result_dir_path = '../../results'
df = get_results(dir_path=join(result_dir_path, 'linear_results'))

df = df.loc[df['head'] == -1]
df_base = df.loc[(df['layer'] == 11) & (df['model_name'] == 'bert-base-uncased')]
df_large = df.loc[(df['layer'] == 23) & (df['model_name'] == 'bert-large-uncased')]
df_gpt = df.loc[(df['layer'] == 11) & (df['model_name'] == 'openai-gpt')]
df_gpt2 = df.loc[(df['layer'] == 11) & (df['model_name'] == 'gpt2')]
df_xl = df.loc[(df['layer'] == 17) & (df['model_name'] == 'transfo-xl-wt103')]

df_last_linear = pd.concat([df_base, df_large, df_gpt, df_gpt2, df_xl])
df_last_linear = df_last_linear.set_index(['task', 'model_name'])
df_last_linear = df_last_linear.sort_index()
df_last_linear['last_linear_layer'] = df_last_linear['acc']
df_last_linear = df_last_linear.drop(columns=['acc']).round(1)



# Find best layer performance
df = get_results(dir_path=join(result_dir_path, 'linear_results'))
df = df.loc[df['head'] == -1]
df = pd.DataFrame(df.groupby(['task', 'model_name'])['acc'].max())
df['best_linear_layer'] = df['acc']
df_best_linear = df.drop(columns=['acc'])
# display(df)

df_last_linear = pd.concat([df_base, df_large, df_gpt, df_gpt2, df_xl])
df_last_linear = df_last_linear.set_index(['task', 'model_name'])
df_last_linear = df_last_linear.sort_index()
df_last_linear['last_linear_layer'] = df_last_linear['acc']
df_last_linear = df_last_linear.drop(columns=['acc'])

# Find top n head performance
df = get_multi_head_results(dir_path=join(result_dir_path, './top_head_wise_results'))
df_base = df.loc[(df['num_head'] == 12) & (df['model_name'] == 'bert-base-uncased')]
df_large = df.loc[(df['num_head'] == 16) & (df['model_name'] == 'bert-large-uncased')]
df_gpt = df.loc[(df['num_head'] == 12) & (df['model_name'] == 'openai-gpt')]
df_gpt2 = df.loc[(df['num_head'] == 12) & (df['model_name'] == 'gpt2')]
df_xl = df.loc[(df['num_head'] == 16) & (df['model_name'] == 'transfo-xl-wt103')]
df = pd.concat([df_base, df_large, df_gpt, df_gpt2, df_xl])


df = df.set_index(['task', 'model_name'])
df = df.sort_index()
df['top_n_head'] = df['acc']
df_top_n_head = df.drop(columns=['acc'])


result = pd.concat([df_last_linear, df_best_linear, df_top_n_head], axis=1)
result = result.drop(columns=['head', 'layer', 'num_head'])
result['enhancement'] = round((result['top_n_head'] - result['best_linear_layer']) /  result['best_linear_layer'] * 100, 2)
result['top_n_head2'] = ''

for i, row in result.iterrows():
    result.at[i, 'top_n_head2'] =  '{:1.1f} ({:1.1f})'.format(row[2], row[3])
result['top_n_head'] = result['top_n_head2']
result = result.drop(columns=['enhancement', 'top_n_head2'])

# result = result.dropna()

result = result.reindex(task_order, level=0)
result = result.reindex(model_order, level=1)

display(result.round(1))
result.to_csv('embedding_reconstruction.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,last_linear_layer,best_linear_layer,top_n_head
task,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Length,bert-base-uncased,58.0,87.8,95.0 (8.2)
Length,bert-large-uncased,54.8,94.4,95.2 (0.8)
Length,openai-gpt,52.2,96.4,96.2 (-0.2)
Length,gpt2,57.8,88.9,92.8 (4.3)
Length,transfo-xl-wt103,61.3,64.0,80.9 (26.4)
WordContent,bert-base-uncased,25.2,25.2,73.1 (190.3)
WordContent,bert-large-uncased,12.2,32.2,79.8 (147.5)
WordContent,openai-gpt,35.3,35.3,71.3 (102.0)
WordContent,gpt2,37.5,37.5,71.0 (89.5)
WordContent,transfo-xl-wt103,26.0,26.0,40.8 (56.5)
