# Visualizing Results with `plot_utils`

In [8]:
import numpy as np
import pandas as pd
import os
from utils.plot_utils import *

## Setup default pathing and data parsing utilities

In [9]:
def gen_plot_df(fil, 
                trials = 150, 
                epochs = 8, 
                fpath_ = "C:\\BERTVision\\code\\torch\\pfreezing_trials\\bert-base-uncased\\RTE"):
    import pickle
    columns = ['trial', 'Frozen Weights Pct'] + list(map(str, list(range(1,epochs+1))))
    f = os.path.join(fpath_, fil)
    f_ = open(f, 'rb')
    data = np.array(pickle.load(f_))
    f_.close()
    results = np.zeros(shape=(trials, epochs))
    freeze_p = []
    state = []
    
    for i in range(len(data)):
        freeze_p.append(data[i]['result']['freeze_p'][0])
        results[i, :] = data[i]['result']['metric']
        state.append(data[i]['misc']['vals']['freeze'][0])
    plot_data = pd.DataFrame(results)
    plot_data.columns = columns[2:]
    plot_data['Frozen Weights Pct'] = 1-np.array(freeze_p)
    plot_data['trial'] = list(range(1, len(freeze_p) + 1))
    plot_data['state'] = state
    return plot_data
    


In [10]:
# Utility function for preparing data for performance line plots (frozen weights)
def prepare_data_performance_line(data):
    df = data.pivot(index=['trial','freeze_p'], columns=['epoch'], values=['dev_metric']).reset_index()
    df.columns = ['trial', 'Frozen Weights Pct', '1', '2','3','4']
    return df

# Utility function for preparing data for performance line plots (frozen weights) with 8 lines
def prepare_data_performance_line8(data):
    df = data.pivot(index=['trial','freeze_p'], columns=['epoch'], values=['dev_metric']).reset_index()
    df.columns = ['trial', 'Frozen Weights Pct', '1', '2','3','4','5','6','7','8']
    return df

# Utility function for preparing data for performance ridge plots (fozen weights)
def prepare_data_ridgeplot(data):
    df = data[['trial','freeze_p','epoch','dev_metric']].copy()
    df.columns = ['trial','Frozen Weights Pct','Epoch','Dev Metric']
    df['Frozen Weights Pct'] = df['Frozen Weights Pct'].apply(lambda x: round(x, 1))
    df = df[(df['Frozen Weights Pct'] > 0.0) & (df['Frozen Weights Pct'] < 1.0)]
    return df

# Utility function for preparing data for parasite comparison bar plots (parasite)
def prepare_data_parasite_barplot(data):
    df = data.copy().melt(id_vars=['index'])
    df.columns = ['task', 'model', 'score']
    df.loc[(df.model == 'BERT Parasite 64'), 'model'] = 'Parasite 64 (1.2% BERT-base params)'
    df.loc[(df.model == 'BERT parasite 16'), 'model'] = 'Parasite 16 (0.29% BERT-base params)'
    df.loc[(df.model == 'BERT parasite 64 w/ linear'), 'model'] = 'Parasite 64 (with FF)'
    df.loc[(df.model == 'BERT parasite 16 w/ linear'), 'model'] = 'Parasite 16 (with FF)'
    
    return df


In [11]:
plot_data = gen_plot_df("2021-03-12_11-57-39.pkl")

In [12]:
plot_data.head()

Unnamed: 0,1,2,3,4,5,6,7,8,Frozen Weights Pct,trial,state
0,0.584838,0.584838,0.617329,0.638989,0.620939,0.620939,0.642599,0.628159,0.943202,1,0
1,0.584838,0.624549,0.642599,0.646209,0.638989,0.635379,0.649819,0.67509,0.100535,2,1
2,0.602888,0.617329,0.65343,0.638989,0.689531,0.66787,0.66787,0.68231,0.685844,3,1
3,0.65343,0.642599,0.638989,0.635379,0.68231,0.65343,0.66426,0.67148,0.298726,4,0
4,0.548736,0.472924,0.559567,0.65343,0.67148,0.6787,0.67148,0.67148,0.242962,5,2


In [15]:
viz = altair_frozen_weights_performance_plot8(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, title_subtitle = "GLUE Task: RTE", 
    comparison_bert_range = [0.47292418, 0.501805054, 0.606498195, 0.65703971,0.649819495,0.631768953,0.613718412,0.61161496],
    AdapterBERT_performance=0.661)
viz

# PLOTS

## (<font color='Red'>Frozen Weights</font>) `MSR` Performance Line plot
### using polynomial regression (O=10), default color scheme, BERT-base comparison

In [6]:
# load MSR performance data from CSV, pivot columns for analysis
#f_ = "2021-03-12_02-20-03.csv"
#df = prepare_data_performance_line(pd.read_csv(os.path.join(fpath_, f_)))
df = plot_data[plot_data.state == 0].drop('state', axis = 1)
# generate and display plot
viz = altair_frozen_weights_performance_plot(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, poly_order=3, title_subtitle = "GLUE Task: RTE\nUnfreeze Layer 0-3", comparison_bert_range = [0.47292418, 0.501805054, 0.606498195, 0.65703971])
viz

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [160]:
# load MSR performance data from CSV, pivot columns for analysis
#f_ = "2021-03-12_02-20-03.csv"
#df = prepare_data_performance_line(pd.read_csv(os.path.join(fpath_, f_)))
df = plot_data[plot_data.state == 1].drop('state', axis = 1)
# generate and display plot
viz = altair_frozen_weights_performance_plot(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, poly_order = 3, title_subtitle = "GLUE Task: RTE\nUnfreeze Layer 6-9", comparison_bert_range = [0.47292418, 0.501805054, 0.606498195, 0.65703971])
viz

In [161]:
# load MSR performance data from CSV, pivot columns for analysis
#f_ = "2021-03-12_02-20-03.csv"
#df = prepare_data_performance_line(pd.read_csv(os.path.join(fpath_, f_)))
df = plot_data[plot_data.state == 2].drop('state', axis = 1)
# generate and display plot
viz = altair_frozen_weights_performance_plot(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, poly_order=3,title_subtitle = "GLUE Task: RTE\nUnfreeze Layer 8-11", comparison_bert_range = [0.47292418, 0.501805054, 0.606498195, 0.65703971])
viz

In [11]:
# load MSR performance data from CSV, pivot columns for analysis (8 lines)
f_ = "test.csv"
df = pd.read_csv(os.path.join(fpath_, f_))
# if you're going to use Andrews original format, call the below line!
#df = prepare_data_performance_line8(pd.read_csv(os.path.join(fpath_, f_)))

# generate and display plot
viz = altair_frozen_weights_performance_plot8(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, title_subtitle = "GLUE Task: MSR", comparison_bert_range = [0.842, 0.830, 0.828, 0.827, 0.843,0.842, 0.830, 0.828, 0.827, 0.843],
    AdapterBERT_performance=0.861)
viz

## (<font color='Red'>Frozen Weights</font>) `MSR` Performance Ridgeplot
### using all default parameters

In [5]:
# load MSR performance data from CSV, pivot columns for analysis
f_ = "msr_final_exp.csv"
df = prepare_data_ridgeplot(pd.read_csv(os.path.join(fpath_, f_)))

# plot the combined epoch ridge plot (set return_all = False for single aggregate plot)
viz = altair_frozen_weights_performance_ridge_plot(df, task_name = "MSR")
viz

## (<font color='Red'>Frozen Weights</font>) `CoLA` Performance Line plot
### using polynomial regression (O=10), default color scheme, BERT-base comparison

In [6]:
# load CoLA performance data from CSV, pivot columns for analysis
f_ = "cola_final_exp.csv"
df = prepare_data_performance_line(pd.read_csv(os.path.join(fpath_, f_)))

# generate and display plot
viz = altair_frozen_weights_performance_plot(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, title_subtitle = "GLUE Task: CoLA", comparison_bert_range = [0.601, 0.556, 0.581, 0.564, 0.570])
viz

## (<font color='Red'>Frozen Weights</font>) `CoLA` Performance Ridgeplot
### using all default parameters

In [7]:
# load MSR performance data from CSV, pivot columns for analysis
f_ = "cola_final_exp.csv"
df = prepare_data_ridgeplot(pd.read_csv(os.path.join(fpath_, f_)))

# plot the combined epoch ridge plot (set return_all = False for single aggregate plot)
viz = altair_frozen_weights_performance_ridge_plot(df, task_name = "CoLA")
viz

## (<font color='Red'>Frozen Weights</font>) `STS-B` Performance Line plot
### using polynomial regression (O=10), default color scheme, BERT-base comparison

In [8]:
# load STS-B performance data from CSV, pivot columns for analysis
f_ = "stsb_final_exp.csv"
df = prepare_data_performance_line(pd.read_csv(os.path.join(fpath_, f_)))

# generate and display plot
viz = altair_frozen_weights_performance_plot(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, title_subtitle = "GLUE Task: STS-B", comparison_bert_range = [0.889, 0.893, 0.881, 0.885, 0.889])
viz

## (<font color='Red'>Frozen Weights</font>) `STS-B` Performance Ridgeplot
### using all default parameters

In [9]:
# load STS-B performance data from CSV, pivot columns for analysis
f_ = "stsb_final_exp.csv"
df = prepare_data_ridgeplot(pd.read_csv(os.path.join(fpath_, f_)))

# plot the combined epoch ridge plot (set return_all = False for single aggregate plot)
viz = altair_frozen_weights_performance_ridge_plot(df, task_name = "STS-B")
viz

## `Parasite` Comparison Barplot to BERT-base

In [22]:

# load STS-B performance data from CSV, pivot columns for analysis
fpath_ = "C:\w266\stone\\"
f_ = "parasite_adapter_first_round.txt"
data = pd.read_csv(os.path.join(fpath_, f_), sep = ",", index_col = 0).reset_index()
data = data.iloc[:, [0,3,4,5,1,2]]
df = prepare_data_parasite_barplot(data)

# plot the comparison of parasite to bert-base
viz = altair_parasite_comparison_faceted_bar(df, yaxis_title = "Performance", title_main = "Parasite Model Performance", 
    subtitle = "Compared to BERT-base", height=600, width=250,)
viz