# Visualization - Performance App

My most up-to-date visualisations.

## Preliminaries

### Imports

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
import json
import sys
import pickle as pkl
import warnings

from os.path import dirname

# Dash
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
from dash.dependencies import Input, Output, State

In [2]:
# Custom

root_dir = dirname(dirname(os.getcwd()))
src_dir = os.path.join(root_dir, 'src')
sys.path.append(src_dir)

import exp
from exp.utils.extra import mem_usage
from exp.runner.RunExp import RunExp
from exp.runner.RunMercs import RunMercs
from exp.eval.preprocess import (preprocess_aggr_df,
                                 build_df_plt,
                                 build_df_lineplot)
from exp.visual.menus import (generate_dropdown_menu,
                              generate_dropdown_menus_from_df,
                              generate_slider_menu)
from exp.visual.plots import (generate_graph)
from exp.visual.callback import (extract_menu_inputs_menu_names_from_layout,
                                filter_dataframe)

In [3]:
root_dir

'/cw/dtailocal/Dropbox/Files/KUL/research/codebases/homework'

### Methods

Some custom methods I need in this notebook.

In [4]:
def merge_aggregated_outputs_multiple_exps(exp_idxs, **kwargs):
    """
    Merge aggregated outputs from multiple experiments.
    """
    
    f = collect_aggregated_outputs_from_exp
    
    gen = (f(exp_idx, **kwargs) for exp_idx in exp_idxs)
    
    result = {}
    for g in gen:
        result = {k: pd.concat([result.get(k, None),v], sort=False)
                  for k,v in g.items()}    
        
    return result

def collect_aggregated_outputs_from_exp(exp_idx, **kwargs):
    """
    Load the aggregated outputs by a single experiment.
    """
    
    # Preliminaries
    dfs = {}
    
    # Actions
    re = RunExp.load(idx=exp_idx, **kwargs)
    for output in re.aggr_outputs:
        dfs[output] = re.load_output(kind=output)
    return dfs

## Global Parameters

This is the single most important thing you need to specify, i.e., from which experiments do you want to collect the results?

In [5]:
exp_idxs = [1,2,3,100]

## Collect Data

Now, the actual work starts.

In [6]:
dfs = merge_aggregated_outputs_multiple_exps(exp_idxs, root_dir=root_dir)

In [7]:
df_res = preprocess_aggr_df(dfs['results'], kind='res')
df_qry = preprocess_aggr_df(dfs['qry_codes'], kind='qry')
df_cfg = preprocess_aggr_df(dfs['mod_config'], kind='cfg')

In [8]:
df_plt = build_df_plt(df_res, df_qry, df_cfg)
df_plt.rename(columns={'macro_f1': 'score'}, inplace=True)
mem_usage(df_plt)
df_plt.head()


    119.17 kiloB
    


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,score,t_idx,perc_miss,base_perf,dataset
idx,name,q_idx,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
101,pred.param=nan|sel.its=nan|pred.type=nan|ind.max_depth=nan|ind.type=nan|sel.param=nan|pred.its=nan,0,0.805308,4,0.0,80.5308,nltcs
101,pred.param=nan|sel.its=nan|pred.type=nan|ind.max_depth=nan|ind.type=nan|sel.param=nan|pred.its=nan,1,0.792394,4,6.25,80.5308,nltcs
101,pred.param=nan|sel.its=nan|pred.type=nan|ind.max_depth=nan|ind.type=nan|sel.param=nan|pred.its=nan,2,0.0,4,18.75,80.5308,nltcs
101,pred.param=nan|sel.its=nan|pred.type=nan|ind.max_depth=nan|ind.type=nan|sel.param=nan|pred.its=nan,3,0.0,4,25.0,80.5308,nltcs
101,pred.param=nan|sel.its=nan|pred.type=nan|ind.max_depth=nan|ind.type=nan|sel.param=nan|pred.its=nan,4,0.759175,4,37.5,80.5308,nltcs


In [9]:
df_lpt = build_df_lineplot(df_plt)
df_lpt.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,perc_miss,global_dataset_aligned_rank,rank,global_aligned_rank,score,aligned_rank
range_index,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,pred.param=0.1|sel.its=4.0|pred.type=IT|ind.max_depth=16.0|ind.type=DT|sel.param=2.0|pred.its=8.0,0.0,25.992857,3.05,177.807143,0.700156,18.15
1,pred.param=0.1|sel.its=4.0|pred.type=IT|ind.max_depth=16.0|ind.type=DT|sel.param=2.0|pred.its=8.0,10.0,27.985714,3.185714,190.142857,0.68591,19.4
2,pred.param=0.1|sel.its=4.0|pred.type=IT|ind.max_depth=16.0|ind.type=DT|sel.param=2.0|pred.its=8.0,20.0,31.042857,2.957143,218.85,0.663228,17.514286
3,pred.param=0.1|sel.its=4.0|pred.type=IT|ind.max_depth=16.0|ind.type=DT|sel.param=2.0|pred.its=8.0,30.0,24.05,2.95,117.19,0.639165,16.33
4,pred.param=0.1|sel.its=4.0|pred.type=IT|ind.max_depth=16.0|ind.type=DT|sel.param=2.0|pred.its=8.0,40.0,27.057143,2.65,178.992857,0.645064,15.914286


In [10]:
mem_usage(df_lpt)


    4.60 kiloB
    


In [11]:
df_cfg.dtypes

dataset          category
pred.param        float64
sel.its           float64
pred.type        category
ind.max_depth     float64
ind.type         category
sel.param         float64
pred.its          float64
name             category
dtype: object

## Plot Config

### Layout

In [12]:
graph_style = {'width':             '85%',
               'float':             'right',
               'z-index':            1,
               'position':           'relative',
               'margin-bottom':      '2cm'}

dropdown_menu_style = {'width':            '14%',
                       'backgroundColor':  'rgb(250, 250, 250)',
                       'float':            'left',
                       'z-index':            0,
                       'position':          'relative',
                       'border':            '1px solid gray'}

slider_menu_style = {'width':             '95%',
                     'backgroundColor':  'rgb(250, 250, 250)',
                     'float':            'left',
                     'margin-left':       '1cm',
                     'margin-bottom':     '2cm'}

extra_style = {'border':            '1px solid black'}

sep = html.Div(style={'clear': 'both'})

txt_box_style = {'width':             '25%',
                 'float':             'left',
                 'z-index':            1,
                 'position':           'relative',
                 'margin-left':       '2cm',
                 'border':            '1px solid gray'}

### Menus

#### Dropdown

In [13]:
# Dropdown Menus
perf_dd_menus = generate_dropdown_menus_from_df(df_cfg, ignore_columns=['name'])



            Could not sort this column. Typically because there is a mix
            of int/float and strings.
            



In [14]:
# Extra DropDown menus
targ_idx_dd = generate_dropdown_menus_from_df(df_qry, relevant_columns=['t_idx'])
show_data_dd = generate_dropdown_menu("show_data", [False, True], labels=['No', 'Yes'], default=0, multi=False)
rank_score_dd = generate_dropdown_menu("y_field",
                                       ['rank',
                                        'score',
                                        'aligned_rank',
                                        'global_aligned_rank',
                                        'global_dataset_aligned_rank'],
                                       labels=['Avg. Rank', 'Avg. F1-Score','Aligned Rank','Global Aligned Rank', 'GDS Aligned Rank'],
                                       default='score',
                                       multi=False)

perf_dd_menus.extend(targ_idx_dd)
perf_dd_menus.extend(show_data_dd)
perf_dd_menus.extend(rank_score_dd)

perf_dd_menus = html.Div(perf_dd_menus,
                         style=dropdown_menu_style)

#### Sliders

In [15]:
perf_sl_menus = generate_slider_menu('perc_miss')
perf_sl_menus.extend(generate_slider_menu('base_perf'))

perf_sl_menus = html.Div(perf_sl_menus,
                         style=slider_menu_style)

## Static App

In [16]:
perf_graph = dcc.Graph(id='lineplot', 
                       figure=generate_graph(df_lpt, kind='line', show_data=False))

stat_perf_contents = html.Div([perf_graph],
                              style=graph_style)

## Dynamic App

Re-uses some stuff from the static configuration.

### Initialization

In [17]:
dyn_perf_contents = [perf_dd_menus,
                     stat_perf_contents,
                     sep,
                     perf_sl_menus]

In [18]:
# Init App.
app = dash.Dash()
app.layout = html.Div(dyn_perf_contents)

In [19]:
menu_inputs, menu_names = extract_menu_inputs_menu_names_from_layout(app.layout)
#menu_names

### Main Callback

This callback method needs to handle everything at once.

In [None]:
kind='line' # Hardcoded here.

@app.callback(
    Output('lineplot', 'figure'),
    menu_inputs)
def update_lineplot(*args):
    
    menus = zip(menu_names, args)
    
    # Initializations
    filt_df = df_plt
    filt_df_params = df_cfg
    y_field = 'score'
    y_title = 'Average '+ str(y_field)
    
    for name, values in menus:

        if name in {'t_idx'}:
            filt_df = filter_dataframe(filt_df, name, values)
        elif name in {'perc_miss'}:
            filt_df = filter_dataframe(filt_df, name, values, kind='range')
        elif name in {'base_perf'}:
            filt_df = filter_dataframe(filt_df, name, values, kind='range')
        elif name in {'show_data'}:
            show_data = values
            #print("show_data value: {}".format(values))
        elif name in {'baseline'}:
            baseline_name = values
        elif name in {'y_field'}:
            y_field = values
            y_title = 'Average '+ str(y_field)
        else:
            filt_df_params = filter_dataframe(filt_df_params, name, values)
    
    try:
        #print(filt_df.head())
        filt_df = filt_df.loc[filt_df_params.index.values]  # Only keep the entries with indices present in df_params
        plot_df = build_df_lineplot(filt_df)
        
    except ValueError as e: 
        msg = "Caught ValueError, this -sometimes- happens whenever no data is present in the plot: {}".format(e)
        print(msg)
        plot_df = filt_df
        pass

    figure_parameters = generate_graph(plot_df,
                                       kind=kind,
                                       show_data=show_data,
                                       y_title=y_title,
                                       x_title='Missing Attributes (%)',
                                       y_field=y_field)
    
    return figure_parameters

### Run App

Run the actual browser applet.

In [None]:
app.run_server(port=8885)

 * Running on http://127.0.0.1:8885/ (Press CTRL+C to quit)
127.0.0.1 - - [28/Jan/2019 16:10:10] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:10:11] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:10:11] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:10:11] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:14] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:15] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:16] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:18] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:19] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:20] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:22] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:23] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:24] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:25] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:10:26] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:27] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:10:28] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:29] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:10:30] "POST /_dash-update-component HTTP/1.1" 200 -


Caught ValueError, this -sometimes- happens whenever no data is present in the plot: Cannot set a frame with no defined index and a value that cannot be converted to a Series


127.0.0.1 - - [28/Jan/2019 16:10:32] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:10:52] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:11:06] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:11:09] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [28/Jan/2019 16:11:30] "POST /_dash-update-component HTTP/1.1" 200 -
