# Visualizing Results with `plot_utils`

In [1]:
import numpy as np
import pandas as pd
import os
from utils.plot_utils import *

## Setup default pathing and data parsing utilities

In [5]:
# set path to location of frozen weights results CSV files
fpath_ = "C:/media/resultsviz/"

In [19]:
# Utility function for preparing data for performance line plots (frozen weights)
def prepare_data_performance_line(data):
    df = data.pivot(index=['trial','freeze_p'], columns=['epoch'], values=['dev_metric']).reset_index()
    df.columns = ['trial', 'Frozen Weights Pct', '1', '2','3','4']
    return df

# Utility function for preparing data for performance ridge plots (fozen weights)
def prepare_data_ridgeplot(data):
    df = data[['trial','freeze_p','epoch','dev_metric']].copy()
    df.columns = ['trial','Frozen Weights Pct','Epoch','Dev Metric']
    df['Frozen Weights Pct'] = df['Frozen Weights Pct'].apply(lambda x: round(x, 1))
    df = df[(df['Frozen Weights Pct'] > 0.0) & (df['Frozen Weights Pct'] < 1.0)]
    return df

# Utility function for preparing data for parasite comparison bar plots (parasite)
def prepare_data_parasite_barplot(data):
    df = data.copy().melt(id_vars=['index'])
    df.columns = ['task', 'model', 'score']
    df.loc[(df.model == 'BERT Parasite 64'), 'model'] = 'Parasite 64 (1.2% BERT-base params)'
    df.loc[(df.model == 'BERT parasite 16'), 'model'] = 'Parasite 16 (0.29% BERT-base params)'
    df.loc[(df.model == 'BERT parasite 64 w/ linear'), 'model'] = 'Parasite 64 (with FF)'
    df.loc[(df.model == 'BERT parasite 16 w/ linear'), 'model'] = 'Parasite 16 (with FF)'
    
    return df


# PLOTS

## (<font color='Red'>Frozen Weights</font>) `MSR` Performance Line plot
### using polynomial regression (O=10), default color scheme, BERT-base comparison

In [4]:
# load MSR performance data from CSV, pivot columns for analysis
f_ = "msr_final_exp.csv"
df = prepare_data_performance_line(pd.read_csv(os.path.join(fpath_, f_)))

# generate and display plot
viz = altair_frozen_weights_performance_plot(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, title_subtitle = "GLUE Task: MSR", comparison_bert_range = [0.842, 0.830, 0.828, 0.827, 0.843])
viz

## (<font color='Red'>Frozen Weights</font>) `MSR` Performance Ridgeplot
### using all default parameters

In [5]:
# load MSR performance data from CSV, pivot columns for analysis
f_ = "msr_final_exp.csv"
df = prepare_data_ridgeplot(pd.read_csv(os.path.join(fpath_, f_)))

# plot the combined epoch ridge plot (set return_all = False for single aggregate plot)
viz = altair_frozen_weights_performance_ridge_plot(df, task_name = "MSR")
viz

## (<font color='Red'>Frozen Weights</font>) `CoLA` Performance Line plot
### using polynomial regression (O=10), default color scheme, BERT-base comparison

In [6]:
# load CoLA performance data from CSV, pivot columns for analysis
f_ = "cola_final_exp.csv"
df = prepare_data_performance_line(pd.read_csv(os.path.join(fpath_, f_)))

# generate and display plot
viz = altair_frozen_weights_performance_plot(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, title_subtitle = "GLUE Task: CoLA", comparison_bert_range = [0.601, 0.556, 0.581, 0.564, 0.570])
viz

## (<font color='Red'>Frozen Weights</font>) `CoLA` Performance Ridgeplot
### using all default parameters

In [7]:
# load MSR performance data from CSV, pivot columns for analysis
f_ = "cola_final_exp.csv"
df = prepare_data_ridgeplot(pd.read_csv(os.path.join(fpath_, f_)))

# plot the combined epoch ridge plot (set return_all = False for single aggregate plot)
viz = altair_frozen_weights_performance_ridge_plot(df, task_name = "CoLA")
viz

## (<font color='Red'>Frozen Weights</font>) `STS-B` Performance Line plot
### using polynomial regression (O=10), default color scheme, BERT-base comparison

In [8]:
# load STS-B performance data from CSV, pivot columns for analysis
f_ = "stsb_final_exp.csv"
df = prepare_data_performance_line(pd.read_csv(os.path.join(fpath_, f_)))

# generate and display plot
viz = altair_frozen_weights_performance_plot(data = df, xaxis_title = "Frozen Weights Pct", yaxis_title = "Dev Metric",
    width = 1200, height = 600, title_subtitle = "GLUE Task: STS-B", comparison_bert_range = [0.889, 0.893, 0.881, 0.885, 0.889])
viz

## (<font color='Red'>Frozen Weights</font>) `STS-B` Performance Ridgeplot
### using all default parameters

In [9]:
# load STS-B performance data from CSV, pivot columns for analysis
f_ = "stsb_final_exp.csv"
df = prepare_data_ridgeplot(pd.read_csv(os.path.join(fpath_, f_)))

# plot the combined epoch ridge plot (set return_all = False for single aggregate plot)
viz = altair_frozen_weights_performance_ridge_plot(df, task_name = "STS-B")
viz

## `Parasite` Comparison Barplot to BERT-base

In [22]:

# load STS-B performance data from CSV, pivot columns for analysis
fpath_ = "C:\w266\stone\\"
f_ = "parasite_adapter_first_round.txt"
data = pd.read_csv(os.path.join(fpath_, f_), sep = ",", index_col = 0).reset_index()
data = data.iloc[:, [0,3,4,5,1,2]]
df = prepare_data_parasite_barplot(data)

# plot the comparison of parasite to bert-base
viz = altair_parasite_comparison_faceted_bar(df, yaxis_title = "Performance", title_main = "Parasite Model Performance", 
    subtitle = "Compared to BERT-base", height=600, width=250,)
viz