In [13]:
import os

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cufflinks as cf
import plotly.offline

from IPython.display import display, HTML
from ipywidgets import interact, interactive_output, fixed, interact_manual, HBox, VBox, Layout, Dropdown, Checkbox, ToggleButtons
from ipywidgets import SelectionRangeSlider, SelectionSlider
import ipywidgets as widgets

cf.go_offline()
pd.set_option('display.max_columns', 100)
# data_dir = '../../output/csv/deletion/'
data_dir = '../../'

df = pd.read_csv(os.path.join(data_dir, 'results.csv'))
df.head()

Unnamed: 0,dataset,model_type,criterion,adversary,rs,n_estimators,max_depth,lmbda,topd,min_support,epsilon,method,train_time,amortized,speedup_vs_naive,auc,acc,auc_diff_avg,auc_diff_std,acc_diff_avg,acc_diff_std,num_retrains,avg_retrain_depth
0,surgical,forest,gini,random,1,10,1,-1.0,-1,-1,0.0,naive,0.125366,0.121687,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0
1,surgical,forest,gini,random,1,10,1,-1.0,-1,-1,0.0,exact,0.060107,0.000338,360.510385,0.751857,0.749915,0.0,0.0,0.0,0.0,0,-1.0
2,surgical,forest,gini,random,1,10,1,0.0,-1,-1,0.0,random,0.059152,0.000429,283.493354,0.594726,0.749915,0.157219,5.5e-05,0.0,0.0,0,-1.0
3,surgical,forest,gini,random,1,10,1,1e-06,1,2500,0.01,cedar,0.095555,0.00061,199.601712,0.594726,0.749915,0.157219,5.5e-05,0.0,0.0,0,-1.0
4,surgical,forest,gini,random,1,10,1,1e-05,1,2500,0.01,cedar,0.059328,0.000433,280.802223,0.594726,0.749915,0.157219,5.5e-05,0.0,0.0,0,-1.0


In [15]:
# plot options
x = Dropdown(description='x', options=['n_estimators', 'max_depth', 'epsilon', 'lmbda'])
x_scale = Checkbox(description='x log scale', value=False)
metric = ToggleButtons(description='Metric', options=['auc', 'acc'])
categories = Dropdown(description='Categories', options=['lmbda', 'epsilon', 'n_estimators', 'max_depth'])

# experiment options
dataset = Dropdown(description='Dataset', options=['surgical', 'adult', 'bank_marketing', 'flight_delays', 'diabetes',
                                                   'skin', 'census', 'twitter', 'gas_sensor', 'higgs', 'all'])
criterion = ToggleButtons(description='Criterion', options=['gini', 'entropy'])
adversary = ToggleButtons(description='Adversary', options=['random', 'root'])
rs = SelectionSlider(description='rs', options=[1])

# hyperparameter options
trees = SelectionSlider(description='No. trees', options=[10, 25, 50, 100, 250, 'all'], value='all')
depth = SelectionSlider(description='Max depth', options=[1, 3, 5, 10, 20, 'all'], value=1)
topd = SelectionSlider(description='Top d', options=[1, 2, 3])
min_support = SelectionSlider(description='Min Support', options=[2500, 5000, 10000], value=2500)
lmbda = SelectionRangeSlider(description='Lambda', options=[1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], index=(0, 5))
epsilon = SelectionSlider(description='Epsilon', options=[0.01, 0.1, 0.5, 1.0, 'all'])

# create ui
box_1 = VBox([x, categories, x_scale])
box_2 = VBox([dataset, criterion, adversary, metric, rs])
box_3 = VBox([trees, depth, topd, min_support, lmbda, epsilon])
ui = HBox([box_2, box_3, box_1])

# plot graphs
def f(x, x_scale, metric, categories,
      dataset, criterion, adversary, rs,
      trees, depth, topd, min_support, lmbda, epsilon):
    
    # filter results
    temp = df.copy()
    if dataset != 'all':
        temp = temp[temp['dataset'] == dataset]
    if criterion != 'all':
        temp = temp[temp['criterion'] == criterion]
    if adversary != 'all':
        temp = temp[temp['adversary'] == adversary]
    if trees != 'all':
        temp = temp[temp['n_estimators'] == trees]
    if depth != 'all':
        temp = temp[temp['max_depth'] == depth]
    exact = temp[temp['method'] == 'exact']
    random = temp[temp['method'] == 'random']
    if topd != 'all':
        temp = temp[temp['topd'] == topd]
    if min_support != 'all':
        temp = temp[temp['min_support'] == min_support]
#     if lmbda != 'all':
    temp = temp[(temp['lmbda'] >= lmbda[0]) & (temp['lmbda'] <= lmbda[1])]
    if epsilon != 'all':
        temp = temp[(temp['epsilon'] == epsilon) | (temp['epsilon'] == 0.00)]
    if rs != 'all':
        temp = temp[temp['rs'] == rs]
        
    temp = pd.concat([temp, exact, random])

    # plot results
    if len(temp) > 0:
        
        y0 = 'amortized'
        y1 = metric
        y2 = 'speedup_vs_naive'
        y3 = '{}_diff_avg'.format(metric)
        
        fig = plt.figure(figsize=(15, 4.5), constrained_layout=True)
        gs = gridspec.GridSpec(nrows=2, ncols=3, figure=fig)
        ax0 = fig.add_subplot(gs[:, 0])
        ax1 = fig.add_subplot(gs[:, 1])
        ax2 = fig.add_subplot(gs[0, 2])
        ax3 = fig.add_subplot(gs[1, 2])
        
        ax0.set_xlabel(x)
        ax1.set_xlabel(x)
        ax2.set_xlabel(x)
        ax3.set_xlabel(x)
        
        ax0.set_ylabel('{} (s)'.format(y0))
        ax1.set_ylabel(y1)
        ax2.set_ylabel(y2)
        ax3.set_ylabel(y3)

        ax0.set_title('Absolute efficiency (lower -> better)')
        ax1.set_title('Absolute utility (higher -> better)')
        ax2.set_title('Relative efficiency (higher -> better)')
        ax3.set_title('Relative utility (lower -> better)')

        if x_scale:
            ax0.set_xscale('log')
            ax1.set_xscale('log')
            ax2.set_xscale('log')
            ax3.set_xscale('log')

        colors = ['black', 'red', 'orange', 'magenta', 'purple', 'green', 'cyan', 'blue']
        markersize = 120
        alpha = 0.5

        for i, (label, gf) in enumerate(temp.groupby(categories)):
            ax0.plot(gf[x], gf[y0], label=label, color=colors[i], alpha=alpha, marker='o')
            ax1.plot(gf[x], gf[y1], label=label, color=colors[i], alpha=alpha, marker='o')
            ax2.plot(gf[x], gf[y2], label=label, color=colors[i], alpha=alpha, marker='o')
            ax3.plot(gf[x], gf[y3], label=label, color=colors[i], alpha=alpha, marker='o')

        ax0.legend(title=categories)
    
    else:
        plt.clf()

out = interactive_output(f, {'x': x, 'x_scale': x_scale, 'metric': metric, 'categories': categories,
                             'dataset': dataset, 'criterion': criterion, 'adversary': adversary, 'rs': rs,
                             'trees': trees, 'depth': depth, 'topd': topd, 'min_support': min_support,
                             'lmbda': lmbda, 'epsilon': epsilon})
display(ui, out)

HBox(children=(VBox(children=(Dropdown(description='Dataset', options=('surgical', 'adult', 'bank_marketing', …

Output()