In [1]:
import numpy as np
import pandas as pd
import json, glob, re

import matplotlib
import matplotlib.pyplot as plt
from pylab import rcParams
%matplotlib inline


In [None]:
IMG_DIR = "./img"
RESULTS_DIR = "../results"

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
TASK1_COLORS = {'0xx':'green',
                '1xx':'darkorange',
                '2xx':'dodgerblue',
                '5xx':'blue',
                '6xx':'violet',
                '7xx':'purple' ,
                'all': 'red'}

TASK2_COLORS = {'2xx':'royalblue', 
                '6xx':'cadetblue', 
                '9xx':'darkorange', 
                '10xx':'darkmagenta',
                'all': 'red'}

TASK1_SHAPES = {'0xx':'o',
                '1xx':'s',
                '2xx':'v',
                '5xx':'^',
                '6xx':'<',
                '7xx':'>' ,
                'all': 'd'}

TASK2_SHAPES= {'2xx':'o', 
                '6xx':'s', 
                '9xx':'v', 
                '10xx':'^',
                'all': 'd'}


ALL_COLOR = 'dimgray'
AVG_COLOR = ALL_COLOR


plt.style.use('default')

TITLE_SIZE = 40
AXIS_SIZE = 36
LEGEND_SIZE = 24
TICK_SIZE = 20
MARKER_SIZE = 12

In [4]:
AVG_LOG_SPECTRALNORM_EQN = r"$\langle\log_{10}\Vert\mathbf{W}\Vert^{2}_{\infty}\rangle$"
AVG_ALPHA_EQN = r"$\langle\alpha\rangle$"
ALPHA_HAT = AVG_ALPHA_WEIGHTED_EQN = r"$\hat\alpha$"

W_DISTANCE_EQN = r"$\log_{10}\langle\Vert\mathbf{W}-\mathbf{W}_{init}\Vert^{2}_{F}\rangle$"
QUALITY_FIT_EQN = r"$\langle D_{KS}\rangle$"

AVG_LOG_NORM_EQN = r"$\langle\Vert\mathbf{W}\Vert^{2}_{F}\rangle$"

SHARPNESS_EQN = r"Sharpness"
SVD_10_EQN = r"SVD $10%$"
SVD_20_EQN = r"SVD $20%$"



LOG_SPECTRALNORM= "LogSpectralNorm"
ALPHA = "Alpha"
ALPHA_HAT = AVG_ALPHA_WEIGHTED = "AlphaHat"
NORM = "LogFrobeniusNorm"
D_ALPHA_FIT = "LogFrobeniusNorm"
ALPHA_PNORM = "LogAlphaShattenNorm"

KENDAL_TAU = "Kendal-tau"
R_SQUARED = "R-Squared"

    


def taskname(task):
    taskname = task
    taskname = taskname.replace("_v1",'')
    taskname = taskname.replace("_v4",'')
    return taskname

def metric_name(metric):
    name = metric
    if metric == "logspectralnorm":
        name = LOG_SPECTRALNORM
    elif metric == 'alpha':
        name = ALPHA
    elif metric == 'alpha_weighted':
        name = ALPHA_HAT
    elif metric == 'lognorm':
        name = NORM
    elif metric == "D":
        name = D_ALPHA_FIT
    elif metric == "logpnorm":
        name = ALPHA_PNORM
    return name

def metric_eqn(metric):
    eqn = ""
    if metric == 'logspectralnorm':
        eqn = AVG_LOG_SPECTRALNORM_EQN
    elif metric == 'alpha':
        eqn = AVG_ALPHA_EQN
    elif metric == 'alpha_weighted':
        eqn = AVG_ALPHA_WEIGHTED_EQN
    elif metric == "D":
        eqn = QUALITY_FIT_EQN
    elif metric == 'lognorm':
        eqn = AVG_LOG_NORM_EQN
    elif metric == "sharpness":
        eqn = SHARPNESS_EQN
    elif metric == "svd10":
        eqn = SVD_10_EQN
    elif metric == "svd20":
        eqn = SVD_20_EQN
    elif metric == "W_distance":
        eqn = W_DISTANCE_EQN
        
    return eqn

In [5]:
def model_num(k):
    return int(int(k)/100)

def model_group(k):
    return "{}xx".format(model_num(k))

def mid_color(task,mid):
    color = None
    key = "{}xx".format(model_num(mid))
    
    if task=='task1_v4':
        color = TASK1_COLORS[key]
    else:
        color = TASK2_COLORS[key]
    return color



def mid_shape(task,mid):
    shape = None
    key = "{}xx".format(model_num(mid))
    
    if task=='task1_v4':
        shape = TASK1_SHAPES[key]
    else:
        shape = TASK2_SHAPES[key]
    return shape

In [6]:
def read_accuracies():
    """Read the test accuracies from the public reference data model configs"""
    test_accuracies = {}
    train_accuracies = {}
    for task in ["task1_v4", "task2_v1"]:
        filename = "../public_data/reference_data/{}/model_configs.json".format(task)
        with open(filename) as f:
            data = json.load(f)

        test_accuracies[task]= {}
        train_accuracies[task]= {}
        for k, v in data.items():
            test_accuracies[task][int(k)]=v['metrics']['test_acc']
            train_accuracies[task][int(k)]=v['metrics']['train_acc']
            
    return test_accuracies, train_accuracies

In [7]:
def read_details(this_task):
    """Read the WeightWatcher details dataframe for all models in this task"""

    path = r"../results/{}/model_*".format(this_task)
    regex = re.compile(path, re.IGNORECASE)

    all_details = {}
    for fullname in glob.glob(path): 
        mid = re.sub(path,'', fullname).replace('.csv', '')
        all_details[mid] = pd.read_csv(fullname)

    num = len(all_details)
    print("Read {} details dataframes for {}".format(num, this_task))
    assert(num>0)
    
    return all_details

In [8]:
def read_task_metrics(task, out_dir):
    """Read metric values (i.e. W-distance) in the task.predict file, as computed by the context ingestion program"""
    # collect all outputs
    filename = "{}/{}.predict".format(out_dir,task)
    #print(filename)
    with open(filename) as f:
        data = json.load(f)
    
    metrics = {}
    for k, v in data.items():
        ik = int(k.replace("model_",""))
        metrics[int(ik)]=v

    return metrics

In [11]:
def read_modelgroup_accuracies():
    """get arrays of accuracies for each model group"""
    test_group_accuracies = {}
    train_group_accuracies = {}
    
    for task in ["task1_v4", "task2_v1"]:

        filename = "../public_data/reference_data/{}/model_configs.json".format(task)
        with open(filename) as f:
            data = json.load(f)

        test_group_accuracies[task]= {}
        train_group_accuracies[task]= {}

        for k, v in data.items():
            # form model group list
            test_group_accuracies[task][model_num(k)]=[]
            train_group_accuracies[task][model_num(k)]=[]

        for k, v in data.items():
            test_group_accuracies[task][model_num(k)].append([int(k),v['metrics']['test_acc']])
            train_group_accuracies[task][model_num(k)].append([int(k),v['metrics']['train_acc']])

        
    return test_group_accuracies, train_group_accuracies