# Visualization - Relative Performance App

Dietterich-inspired visuals. Performance relative to the 0% one.

## Preliminaries

### Imports

In [17]:
# Imports
import os
import numpy as np
import pandas as pd
import json
import sys
import pickle as pkl
import warnings

from os.path import dirname

# Dash
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
from dash.dependencies import Input, Output, State

In [18]:
# Custom

root_dir = dirname(dirname(os.getcwd()))
src_dir = os.path.join(root_dir, 'src')
sys.path.append(src_dir)

import exp
from exp.utils.extra import mem_usage
from exp.runner.RunExp import RunExp
from exp.runner.RunMercs import RunMercs
from exp.eval.preprocess import *
from exp.visual.menus import (generate_dropdown_menu,
                              generate_dropdown_menus_from_df,
                              generate_slider_menu)
from exp.visual.plots import (generate_graph)
from exp.visual.callback import (extract_menu_inputs_menu_names_from_layout,
                                filter_dataframe)

In [19]:
root_dir

'/cw/dtailocal/Dropbox/Files/KUL/research/codebases/homework'

### Methods

Some custom methods I need in this notebook.

In [20]:
def merge_aggregated_outputs_multiple_exps(exp_idxs, **kwargs):
    """
    Merge aggregated outputs from multiple experiments.
    """
    
    f = collect_aggregated_outputs_from_exp
    
    gen = (f(exp_idx, **kwargs) for exp_idx in exp_idxs)
    
    result = {}
    for g in gen:
        result = {k: pd.concat([result.get(k, None),v], sort=False)
                  for k,v in g.items()}    
        
    return result

def collect_aggregated_outputs_from_exp(exp_idx, **kwargs):
    """
    Load the aggregated outputs by a single experiment.
    """
    
    # Preliminaries
    dfs = {}
    
    # Actions
    re = RunExp.load(idx=exp_idx, **kwargs)
    for output in re.aggr_outputs:
        dfs[output] = re.load_output(kind=output)
    return dfs

## Sandbox

In [21]:
import plotly
from plotly.offline import iplot
plotly.offline.init_notebook_mode(connected=True)

x0 = np.random.randn(500)
x1 = np.random.randn(500)+1

trace1 = go.Histogram(
    x=x0,
    opacity=0.75
)
trace2 = go.Histogram(
    x=x1,
    opacity=0.75
)

data = [trace1, trace2]
layout = go.Layout(barmode='overlay')
fig = go.Figure(data=data, layout=layout)

#iplot(fig, filename='overlaid histogram')

## Global Parameters

This is the single most important thing you need to specify, i.e., from which experiments do you want to collect the results?

In [22]:
exp_idxs = [1,2,3,100]

## Collect Data

Now, the actual work starts.

In [23]:
dfs = merge_aggregated_outputs_multiple_exps(exp_idxs, root_dir=root_dir)

In [24]:
df_res = preprocess_aggr_df(dfs['results'], kind='res')
df_qry = preprocess_aggr_df(dfs['qry_codes'], kind='qry')
df_cfg = preprocess_aggr_df(dfs['mod_config'], kind='cfg')
df_tmg = preprocess_aggr_df(dfs['timings'], kind='tmg')

In [25]:
df_tmg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ind_time,inf_time
idx,q_idx,Unnamed: 2_level_1,Unnamed: 3_level_1
108,0,0.737374,0.012517
108,1,0.737374,0.013914
108,2,0.737374,0.028757
108,3,0.737374,0.034269
108,4,0.737374,0.033578


In [26]:
df_hst = build_tmg_histogram(df_tmg, df_cfg)
df_hst.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,q_idx,ind_time,inf_time,dataset
idx,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
108,RW,0,0.737374,0.012517,nltcs
108,RW,1,0.737374,0.013914,nltcs
108,RW,2,0.737374,0.028757,nltcs
108,RW,3,0.737374,0.034269,nltcs
108,RW,4,0.737374,0.033578,nltcs


In [40]:
x0 = df_hst.xs('RW', level='name')['inf_time']
x1 = df_hst.xs('PGM', level='name')['inf_time']

trace1 = go.Histogram(
    x=x0,
    opacity=0.75,
    cumulative=True
)
trace2 = go.Histogram(
    x=x1,
    opacity=0.75,
    histnorm='probability density'
)

data = [trace1, trace2]
layout = go.Layout(barmode='overlay')
fig = go.Figure(data=data, layout=layout)

iplot(fig, filename='overlaid histogram')

PlotlyDictValueError: 'cumulative' has invalid value inside 'histogram'

Path To Error: ['cumulative']

Current path: []
Current parent object_names: []

With the current parents, 'cumulative' can be used as follows:

Under ('figure', 'data', 'histogram'):

    editType: calc
    role: object




In [11]:
df_lpt = build_df_tmg_default(df_tmg, df_cfg)
df_lpt.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ind_time,inf_time,dataset,inf_time_base,inf_time_rel
idx,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
108,RW,0.737374,0.036755,nltcs,0.010513,3.496265
109,RW,24.488404,0.156875,msnbc,0.059858,2.620773
110,RW,40.633083,0.159274,jester,0.036793,4.328947
111,RW,142.025769,0.333592,kdd,0.09152,3.645035
112,RW,17.931461,0.117207,plants,0.02613,4.485555


## Plot Config

### Layout

In [None]:
graph_style = {'width':             '85%',
               'float':             'right',
               'z-index':            1,
               'position':           'relative',
               'margin-bottom':      '2cm'}

dropdown_menu_style = {'width':            '14%',
                       'backgroundColor':  'rgb(250, 250, 250)',
                       'float':            'left',
                       'z-index':            0,
                       'position':          'relative',
                       'border':            '1px solid gray'}

slider_menu_style = {'width':             '95%',
                     'backgroundColor':  'rgb(250, 250, 250)',
                     'float':            'left',
                     'margin-left':       '1cm',
                     'margin-bottom':     '2cm'}

extra_style = {'border':            '1px solid black'}

sep = html.Div(style={'clear': 'both'})

txt_box_style = {'width':             '25%',
                 'float':             'left',
                 'z-index':            1,
                 'position':           'relative',
                 'margin-left':       '2cm',
                 'border':            '1px solid gray'}

### Menus

#### Dropdown

In [None]:
# Dropdown Menus
perf_dd_menus = generate_dropdown_menus_from_df(df_cfg, ignore_columns=['name'])

In [None]:
# Extra DropDown menus
targ_idx_dd = generate_dropdown_menus_from_df(df_qry, relevant_columns=['t_idx'])
show_data_dd = generate_dropdown_menu("show_data", [False, True], labels=['No', 'Yes'], default=0, multi=False)
rank_score_dd = generate_dropdown_menu("y_field",
                                       ['rank',
                                        'score',
                                        'score_rel',
                                        'aligned_rank',
                                        'global_aligned_rank',
                                        'global_dataset_aligned_rank'],
                                       labels=['Avg. Rank', 'Avg. F1-Score', 'Avg. Rel. F1-Score','Aligned Rank','Global Aligned Rank', 'GDS Aligned Rank'],
                                       default='score',
                                       multi=False)

perf_dd_menus.extend(targ_idx_dd)
perf_dd_menus.extend(show_data_dd)
perf_dd_menus.extend(rank_score_dd)

perf_dd_menus = html.Div(perf_dd_menus,
                         style=dropdown_menu_style)

#### Sliders

In [None]:
perf_sl_menus = generate_slider_menu('perc_miss')
perf_sl_menus.extend(generate_slider_menu('score_base'))

perf_sl_menus = html.Div(perf_sl_menus,
                         style=slider_menu_style)

## Static App

In [None]:
histogram_fig = generate_graph(df_hst,
                               kind='hist',
                               x_field='inf_time')

perf_graph = dcc.Graph(id='histogram', 
                       figure=histogram_fig)

stat_perf_contents = html.Div([perf_graph],
                              style=graph_style)

In [None]:
perf_graph = dcc.Graph(id='lineplot', 
                       figure=generate_graph(df_lpt, kind='line', show_data=False))

stat_perf_contents = html.Div([perf_graph],
                              style=graph_style)

## Dynamic App

Re-uses some stuff from the static configuration.

### Initialization

In [None]:
dyn_perf_contents = [perf_dd_menus,
                     stat_perf_contents,
                     sep,
                     perf_sl_menus]

In [None]:
# Init App.
app = dash.Dash()
app.layout = html.Div(dyn_perf_contents)

In [None]:
menu_inputs, menu_names = extract_menu_inputs_menu_names_from_layout(app.layout)
#menu_names

### Main Callback

This callback method needs to handle everything at once.

In [None]:
kind='line' # Hardcoded here.

@app.callback(
    Output('lineplot', 'figure'),
    menu_inputs)
def update_lineplot(*args):
    
    menus = zip(menu_names, args)
    
    # Initializations
    filt_df = df_plt
    filt_df_params = df_cfg
    y_field = 'score'
    y_title = 'Average '+ str(y_field)
    
    for name, values in menus:

        if name in {'t_idx'}:
            filt_df = filter_dataframe(filt_df, name, values)
        elif name in {'perc_miss'}:
            filt_df = filter_dataframe(filt_df, name, values, kind='range')
        elif name in {'score_base'}:
            filt_df = filter_dataframe(filt_df, name, values, kind='range')
        elif name in {'show_data'}:
            show_data = values
            #print("show_data value: {}".format(values))
        elif name in {'baseline'}:
            baseline_name = values
        elif name in {'y_field'}:
            y_field = values
            y_title = 'Average '+ str(y_field)
        else:
            filt_df_params = filter_dataframe(filt_df_params, name, values)
    
    try:
        #print(filt_df.head())
        filt_df = filt_df.loc[filt_df_params.index.values]  # Only keep the entries with indices present in df_params
        plot_df = build_df_lineplot(filt_df)
        
    except ValueError as e: 
        msg = "Caught ValueError, this -sometimes- happens whenever no data is present in the plot: {}".format(e)
        print(msg)
        plot_df = filt_df
        pass

    figure_parameters = generate_graph(plot_df,
                                       kind=kind,
                                       show_data=show_data,
                                       y_title=y_title,
                                       x_title='Missing Attributes (%)',
                                       y_field=y_field)
    
    return figure_parameters

### Run App

Run the actual browser applet.

In [None]:
app.run_server(port=8885)