# Visualization - Relative Timings App

Relative timings between different methods.

A plot inspired by one in the COBRAS paper.

## Preliminaries

### Imports

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
import json
import sys
import pickle as pkl
import warnings

from os.path import dirname

# Dash
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
from dash.dependencies import Input, Output, State

In [2]:
# Custom

root_dir = dirname(dirname(os.getcwd()))
src_dir = os.path.join(root_dir, 'src')
sys.path.append(src_dir)

import exp
from exp.utils.extra import mem_usage
from exp.runner.RunExp import RunExp
from exp.runner.RunMercs import RunMercs
#from exp.eval.preprocess import (preprocess_aggr_df,
                                 #build_df_plt,
                                #build_df_lineplot)
        
        
        
from exp.visual.menus import (generate_dropdown_menu,
                              generate_dropdown_menus_from_df,
                              generate_slider_menu)
from exp.visual.plots import (generate_graph)
from exp.visual.callback import (extract_menu_inputs_menu_names_from_layout,
                                filter_dataframe)

In [3]:
from exp.eval.preprocess import *

In [4]:
root_dir

'/cw/dtailocal/Dropbox/Files/KUL/research/codebases/homework'

### Methods

Some custom methods I need in this notebook.

In [5]:
def merge_aggregated_outputs_multiple_exps(exp_idxs, **kwargs):
    """
    Merge aggregated outputs from multiple experiments.
    """
    
    f = collect_aggregated_outputs_from_exp
    
    gen = (f(exp_idx, **kwargs) for exp_idx in exp_idxs)
    
    result = {}
    for g in gen:
        result = {k: pd.concat([result.get(k, None),v], sort=False)
                  for k,v in g.items()}    
        
    return result

def collect_aggregated_outputs_from_exp(exp_idx, **kwargs):
    """
    Load the aggregated outputs by a single experiment.
    """
    
    # Preliminaries
    dfs = {}
    
    # Actions
    re = RunExp.load(idx=exp_idx, **kwargs)
    for output in re.aggr_outputs:
        dfs[output] = re.load_output(kind=output)
    return dfs

## Global Parameters

This is the single most important thing you need to specify, i.e., from which experiments do you want to collect the results?

In [6]:
exp_idxs = [1,2,3]

## Collect Data

Now, the actual work starts.

In [7]:
dfs = merge_aggregated_outputs_multiple_exps(exp_idxs, root_dir=root_dir)

In [28]:
df_res = preprocess_aggr_df(dfs['results'], kind='res')
df_qry = preprocess_aggr_df(dfs['qry_codes'], kind='qry')
df_cfg = preprocess_aggr_df(dfs['mod_config'], kind='cfg')
df_tmg = preprocess_aggr_df(dfs['timings'], kind='tmg')

In [34]:
df_res.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,macro_f1
idx,f_idx,q_idx,Unnamed: 3_level_1
359,0,0,0.804711
359,0,1,0.774152
359,0,2,0.779767
359,0,3,0.530433
359,0,4,0.679154


In [35]:
df_tmg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ind_time,inf_time
idx,f_idx,q_idx,Unnamed: 3_level_1,Unnamed: 4_level_1
359,0,0,2.645484,0.021886
359,0,1,2.645484,0.019425
359,0,2,2.645484,0.023968
359,0,3,2.645484,0.019199
359,0,4,2.645484,0.017643


In [38]:
df_qry.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,targ,t_idx,perc_miss
idx,q_idx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
296,0,"(4,)",4,0.0
296,1,"(4,)",4,6.25
296,2,"(4,)",4,18.75
296,3,"(4,)",4,25.0
296,4,"(4,)",4,37.5


In [39]:
df_cfg.head()

Unnamed: 0_level_0,dataset,predict.algo,predict.its,predict.param,fit.sel.param,fit.ind.max_depth,fit.ind.type,fit.sel.its,mod.keyword,mod.type,fit.ind.flatten,name
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
296,nltcs,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,predict.algo=RW|predict.its=16.0|predict.param...
297,msnbc,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,predict.algo=RW|predict.its=16.0|predict.param...
298,jester,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,predict.algo=RW|predict.its=16.0|predict.param...
299,kdd,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,predict.algo=RW|predict.its=16.0|predict.param...
300,plants,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,predict.algo=RW|predict.its=16.0|predict.param...


In [15]:
df_lpt = build_df_tmg_default(df_tmg,
                              df_cfg,
                              include_columns=('mod.keyword', 'predict.algo', 'predict.its'),
                              baseline=('predict.algo', 'MI') )
df_lpt.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ind_time,inf_time,dataset,predict.algo,predict.its,predict.param,fit.sel.param,fit.ind.max_depth,fit.ind.type,fit.sel.its,mod.keyword,mod.type,fit.ind.flatten,inf_time_base,inf_time_rel
idx,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
296,predict.algo=RW|predict.its=16.0,2.645484,0.039692,nltcs,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.070153,0.565794
297,predict.algo=RW|predict.its=16.0,29.564063,0.183264,msnbc,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.353498,0.51843
298,predict.algo=RW|predict.its=16.0,64.346555,0.185694,jester,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.292883,0.634022
299,predict.algo=RW|predict.its=16.0,136.599056,0.383417,kdd,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.456529,0.839852
300,predict.algo=RW|predict.its=16.0,19.19173,0.105925,plants,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.19059,0.555774


In [16]:
df_lpt.index.get_level_values(level='name').unique().values

array(['predict.algo=RW|predict.its=16.0',
       'predict.algo=RW|predict.its=32.0',
       'predict.algo=IT|predict.its=2.0',
       'predict.algo=IT|predict.its=8.0',
       'predict.algo=IT|predict.its=16.0',
       'predict.algo=MI|predict.its=0.1',
       'predict.algo=MAFI|predict.its=0.1'], dtype=object)

## Plot Config

### Layout

In [17]:
graph_style = {'width':             '85%',
               'float':             'right',
               'z-index':            1,
               'position':           'relative',
               'margin-bottom':      '2cm'}

dropdown_menu_style = {'width':            '14%',
                       'backgroundColor':  'rgb(250, 250, 250)',
                       'float':            'left',
                       'z-index':            0,
                       'position':          'relative',
                       'border':            '1px solid gray'}

slider_menu_style = {'width':             '95%',
                     'backgroundColor':  'rgb(250, 250, 250)',
                     'float':            'left',
                     'margin-left':       '1cm',
                     'margin-bottom':     '2cm'}

extra_style = {'border':            '1px solid black'}

sep = html.Div(style={'clear': 'both'})

txt_box_style = {'width':             '25%',
                 'float':             'left',
                 'z-index':            1,
                 'position':           'relative',
                 'margin-left':       '2cm',
                 'border':            '1px solid gray'}

### Menus

#### Dropdown

In [18]:
# Dropdown Menus
perf_dd_menus = generate_dropdown_menus_from_df(df_cfg, ignore_columns=['name'])

In [19]:
# Extra DropDown menus
targ_idx_dd = generate_dropdown_menus_from_df(df_qry, relevant_columns=['t_idx'])
show_data_dd = generate_dropdown_menu("show_data", [False, True], labels=['No', 'Yes'], default=0, multi=False)
rank_score_dd = generate_dropdown_menu("y_field",
                                       ['rank',
                                        'score',
                                        'aligned_rank',
                                        'global_aligned_rank',
                                        'global_dataset_aligned_rank'],
                                       labels=['Avg. Rank', 'Avg. F1-Score','Aligned Rank','Global Aligned Rank', 'GDS Aligned Rank'],
                                       default='score',
                                       multi=False)

perf_dd_menus.extend(targ_idx_dd)
perf_dd_menus.extend(show_data_dd)
perf_dd_menus.extend(rank_score_dd)

perf_dd_menus = html.Div(perf_dd_menus,
                         style=dropdown_menu_style)

#### Sliders

In [20]:
perf_sl_menus = generate_slider_menu('perc_miss')
perf_sl_menus.extend(generate_slider_menu('base_perf'))

perf_sl_menus = html.Div(perf_sl_menus,
                         style=slider_menu_style)

## Static App

In [21]:
df_lpt.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ind_time,inf_time,dataset,predict.algo,predict.its,predict.param,fit.sel.param,fit.ind.max_depth,fit.ind.type,fit.sel.its,mod.keyword,mod.type,fit.ind.flatten,inf_time_base,inf_time_rel
idx,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
296,predict.algo=RW|predict.its=16.0,2.645484,0.039692,nltcs,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.070153,0.565794
297,predict.algo=RW|predict.its=16.0,29.564063,0.183264,msnbc,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.353498,0.51843
298,predict.algo=RW|predict.its=16.0,64.346555,0.185694,jester,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.292883,0.634022
299,predict.algo=RW|predict.its=16.0,136.599056,0.383417,kdd,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.456529,0.839852
300,predict.algo=RW|predict.its=16.0,19.19173,0.105925,plants,RW,16.0,1.0,2,16,DT,4,md16,Mercs,,0.19059,0.555774


In [22]:
scatterplot_fig = generate_graph(df_lpt,
                                 kind='scatter',
                                 x_field='dataset',
                                 y_field='inf_time_rel')

perf_graph = dcc.Graph(id='scatterplot', 
                       figure=scatterplot_fig)

stat_perf_contents = html.Div([perf_graph],
                              style=graph_style)

## Dynamic App

Re-uses some stuff from the static configuration.

### Initialization

In [23]:
dyn_perf_contents = [perf_dd_menus,
                     stat_perf_contents,
                     sep,
                     perf_sl_menus]

In [24]:
# Init App.
app = dash.Dash()
app.layout = html.Div(dyn_perf_contents)

In [25]:
menu_inputs, menu_names = extract_menu_inputs_menu_names_from_layout(app.layout)
#menu_names

### Main Callback

This callback method needs to handle everything at once.

In [26]:
@app.callback(
    Output('scatterplot', 'figure'),
    menu_inputs)
def update_lineplot(*args):
    
    menus = zip(menu_names, args)

    lineplot_fig = figure = generate_graph(df_lpt,
                                       kind='scatter',
                                       x_field='dataset',
                                       y_field='inf_time_rel')
    
    return lineplot_fig

### Run App

Run the actual browser applet.

In [27]:
app.run_server(port=8886)

 * Running on http://127.0.0.1:8886/ (Press CTRL+C to quit)
