In [1]:
import pickle
import numpy as np
import pandas as pd

import bokeh
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.models import ColumnDataSource, Whisker
from bokeh.sampledata.autompg import autompg as df

# Visualisation Tests

## Full sequences 

In [2]:
gs = '../experiments/pw training/2021_05_12_2/results/unsup_nested_xval_mpwsg.pkl'
with open(gs, 'rb') as fp:
    gs = pickle.load(fp)

In [3]:
gs[0]

{'train_index': array([  1,   2,   4,   7,   9,  12,  14,  16,  18,  19,  20,  24,  25,
         29,  30,  31,  32,  33,  34,  37,  38,  40,  41,  42,  44,  48,
         50,  53,  54,  56,  59,  60,  63,  64,  66,  68,  70,  71,  73,
         75,  78,  80,  82,  86,  87,  89,  90,  92,  97,  99, 100, 103,
        106, 108, 109, 118, 121, 123, 124, 127, 133, 134, 135, 139, 140,
        142, 144, 145, 149, 150, 151, 152, 153, 155, 157, 158, 160, 162,
        163, 165, 166, 167, 168, 169, 171, 172, 175, 177, 180, 181, 182,
        185, 186, 188, 192, 193, 194, 196, 197, 198, 201]),
 'test_index': array([  0,   3,   5,   6,   8,  10,  11,  13,  15,  17,  21,  22,  23,
         26,  27,  28,  35,  36,  39,  43,  45,  46,  47,  49,  51,  52,
         55,  57,  58,  61,  62,  65,  67,  69,  72,  74,  76,  77,  79,
         81,  83,  84,  85,  88,  91,  93,  94,  95,  96,  98, 101, 102,
        104, 105, 107, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120,
        122, 125, 126, 128, 129, 13

- dataframes:
    - dots
        - data
        - x
        - parameters = ''
        - all parameters
    - quartiles
        - q1 = q1
        - median = median 
        - q3 = q3 
        - x's (where the different boxplots are based)
        - width
        -


In [20]:
def create_dataframes(gs):
    # dots dataframe
    dots = {}
    params = ['parameters']
    params_columns = ['parameters']
    for fold in gs:
        if fold != 'x' and fold != 'y':
            dots[fold] = {}
            dots[fold]['data'] = gs[fold]['best_params']['mean_score']
            for parameter in gs[fold]['best_params']:
                param = parameter.replace('_', ' ')
                if 'score' not in param:
                    dots[fold][param] = str(gs[fold]['best_params'][parameter])
                    params.append(param.replace('_', ' '))
            dots[fold]['fold'] = fold
    dots_df = pd.DataFrame(dots).transpose()
        
    
    # statistics
    q1 = float(dots_df['data'].quantile(q=0.25))
    q2 = float(dots_df['data'].quantile(q=0.5))
    q3 = float(dots_df['data'].quantile(q=0.75))
    mean = float(dots_df['data'].mean())
    iqr = q3 - q1
    upper = q3 + 1.5*iqr
    lower = q1 - 1.5*iqr
    
    # boxplot dataframe
    boxplot = pd.DataFrame()
    boxplot['q1'] = [q1]
    boxplot['median'] = [q2]
    boxplot['mean'] = [mean]
    boxplot['q3'] = [q3]
    boxplot['upper'] = [upper]
    boxplot['lower'] = [lower]
    
    
    return dots_df, set(list(params)), boxplot
        
    

In [21]:
dots, param, boxplot = create_dataframes(gs)

In [26]:
# TODO: flatten the df
def plot(dots:pd.DataFrame, param:pd.DataFrame, boxplot:pd.DataFrame, glyphs:dict, x:float, p):
    dots['x'] = x
    boxplot['x'] = x
    
    glyphs['datapoints'][x] = p.circle(x='x', y='data', radius=0.007, source=dots, alpha=0.5, color='dodgerblue')
    tooltips = [
        ('fold', "@fold"),
        ('score', "@data"),
    ]
    
    for parameter in param:
        tooltips.append((parameter, "@{" + parameter + "}"))
    p.add_tools(HoverTool(renderers=[glyphs['datapoints'][x]], tooltips=tooltips, mode='mouse'))
    
    bx = pd.concat([boxplot, boxplot])
    bx['x'] = [x, x]
    bx['y0'] = [boxplot.iloc[0]['upper'], boxplot.iloc[0]['lower']]
    bx['y1'] = [boxplot.iloc[0]['q3'], boxplot.iloc[0]['q1']]
    bx['bar_length'] = 0.5
    
    p.segment('x', 'y0', 'x', 'y1', line_color="black", source=bx)
    
    boxplot['bar_length'] = 0.5
    glyphs['upper_moustache'][x] = p.vbar('x', 'bar_length', 'median', 'q3', fill_color="dodgerblue", alpha=0.3, line_color="black", source=boxplot)
    glyphs['lower_moustache'][x] = p.vbar('x', 'bar_length', 'q1', 'median', fill_color="dodgerblue", alpha=0.3, line_color="black", source=boxplot)
    glyphs['lower_rect'][x] = p.rect('x', 'lower', 0.2, 0.0001, fill_color="black", alpha=0.3, line_color="black", source=boxplot)
    glyphs['upper_rect'][x] = p.rect('x', 'upper', 0.2, 0.0001, fill_color="black", alpha=0.3, line_color="black", source=boxplot)
    
    p.add_tools(HoverTool(renderers=[
        glyphs['upper_moustache'][x],
        glyphs['lower_moustache'][x],
        glyphs['lower_rect'][x],
        glyphs['upper_rect'][x]
    ], tooltips=[
        ('lower whisker',"@lower"),
        ('1st quartile', "@q1"),
        ('median', "@median"),
        ('3rd quartile', "@q3"),
        ('upper whisker', "@upper")
    ], mode='mouse'))

    return glyphs, p

        

In [27]:
def plots(dots:list, param:list, boxplot:list):
    glyphs = {}
    glyphs['datapoints'] = {}
    glyphs['upper_moustache'] = {}
    glyphs['lower_moustache'] = {}
    glyphs['upper_rect'] = {}
    glyphs['lower_rect'] = {}
    p = figure(title='helloworld')
    
    for i in range(len(dots)):
        x = 0.51 * i
        glyphs, p = plot(dots[i], param[i], boxplot[i], glyphs, x, p)
    show(p)
        
    

In [28]:
ds = [dots.copy(), dots.copy()]
ps = [param, param]
bxs = [boxplot.copy(), boxplot.copy()]

In [29]:
plots(ds, ps, bxs)

In [13]:
dots

Unnamed: 0,data,n states,embeddings,window size,optimiser,early stopping,batch size,shuffle,epochs,verbose,fold
0,1.5148,17,5,2,adam,False,16,True,5,0,0
1,1.53235,17,5,2,adam,False,16,True,5,0,1
