### EGGS Performance Results

In [12]:
import os
from itertools import product

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cufflinks as cf
import plotly.offline

from IPython.display import display, HTML
from ipywidgets import interactive_output, HBox, VBox, Layout 
from ipywidgets import Dropdown, Checkbox, ToggleButtons, SelectionRangeSlider, SelectionSlider
from ipywidgets import SelectMultiple
import ipywidgets as widgets

cf.go_offline()
pd.set_option('display.max_columns', 100)

data_dir = '../../'
df = pd.read_csv(os.path.join(data_dir, 'results.csv'))
df.head()

Unnamed: 0,dataset,rs,base_estimator,feature_type,test_type,sgl_method,sgl_stacks,pgm,auc,ap
0,youtube,1,lr,limited,limited,,0,,0.760079,0.412271
1,youtube,1,lr,limited,limited,,0,mrf,0.772618,0.431724
2,youtube,1,lr,limited,limited,holdout,1,,0.776352,0.445177
3,youtube,1,lr,limited,limited,holdout,1,psl,0.754987,0.414969
4,youtube,1,lr,limited,limited,holdout,1,mrf,0.776333,0.463178


In [13]:
# plot options
x = Dropdown(description='x', options=['topd', 'max_depth'])
metric = ToggleButtons(description='Metric', options=['auc', 'ap'], value='auc')
categories = Dropdown(description='Categories', options=['lmbda', 'topd', 'max_depth'])
error_rate = Checkbox(description='Error rate', value=False)
scatter_plot = Checkbox(description='Scatter plot', value=False)

# experiment options
dataset = Dropdown(description='Dataset', options=['youtube', 'twitter', 'soundcloud'], value='youtube')
feature_type = ToggleButtons(description='Feature set', options=['full', 'limited'], value='limited')
test_type = ToggleButtons(description='Test', options=['full', 'inductive'], value='full')
base_estimator = ToggleButtons(description='Criterion', options=['lr', 'lgb'], value='lr')
rs = SelectionSlider(description='rs', options=[1], value=1)

# hyperparameter options
sgl_method = SelectMultiple(description='SGL method', options=['None', 'holdout', 'cv'], value=('None', 'holdout', 'cv'))
sgl_stacks = SelectionRangeSlider(description='SGL stacks', options=[0, 1, 2], index=(0, 2))
pgm = SelectMultiple(description='PGM', options=['None', 'psl', 'mrf'], value=('None', 'psl', 'mrf'))

# create ui
box_1 = VBox([x, categories, metric, rs, error_rate, scatter_plot])
box_2 = VBox([dataset, feature_type, test_type, base_estimator])
box_3 = VBox([sgl_method, sgl_stacks, pgm])
ui = HBox([box_2, box_3, box_1])

# plot graphs
def f(x, metric, categories, error_rate, scatter_plot,
      dataset, feature_type, test_type, base_estimator, rs,
      sgl_method, sgl_stacks, pgm):
    
    # filter results
    temp = df.copy()
    temp = temp[temp['dataset'] == dataset]
    temp = temp[temp['feature_type'] == feature_type]
    temp = temp[temp['test_type'] == test_type]
    temp = temp[temp['base_estimator'] == base_estimator]
    temp = temp[temp['rs'] == rs]
    
    exp_df = temp.copy()
    
    temp = temp[temp['sgl_method'].isin(list(sgl_method))]
    temp = temp[(temp['sgl_stacks'] >= sgl_stacks[0]) & (temp['sgl_stacks'] <= sgl_stacks[1])]
    temp = temp[temp['pgm'].isin(list(pgm))]

    sgl_method_list = ['None', 'holdout', 'cv']
    sgl_stacks_list = [0, 1, 2]
    pgm_list = ['None', 'psl', 'mrf']

    tuples = product(*[pgm_list, sgl_method_list, sgl_stacks_list])

    res = []

    # get baseline
    baseline = None
    
    pgm_color_dict = {'None': 'blue', 'psl': 'orange', 'mrf': 'purple'}
    colors = []
    
    if len(exp_df) > 0:
        temp2 = exp_df[(exp_df['sgl_method'] == 'None') & (exp_df['sgl_stacks'] == 0) & (exp_df['pgm'] == 'None')]
        values = temp2.values[0]
        val_ndx = -2 if metric == 'auc' else -1
        baseline = values[val_ndx] if not error_rate else 1 - values[val_ndx]

    for pgm_i, method_i, stacks_i in tuples:
        temp2 = temp[(temp['sgl_method'] == method_i) & (temp['sgl_stacks'] == stacks_i) & (temp['pgm'] == pgm_i)]

        if len(temp2) > 0:
            values = temp2.values[0]
            
            if method_i == 'None' and stacks_i == 0 and pgm_i == 'None':
                continue
            else:
                colors.append(pgm_color_dict[pgm_i])
                res.append({'key': '{}\n{}\n{}'.format(method_i, stacks_i, pgm_i),
                            metric: values[val_ndx] if not error_rate else 1 - values[val_ndx]})

    # plot results
    if len(temp) > 0:

        fig, ax0 = plt.subplots(figsize=(15, 5))

        color = {'auc': 'purple', 'ap': 'purple'}

        plot_df = pd.DataFrame(res)

        if scatter_plot:
            ax0.scatter(plot_df['key'], plot_df[metric], color=colors)
        else:
            ax0.bar(plot_df['key'], plot_df[metric], color=colors)
        
        if error_rate:
            ax0.set_ylabel('1 - {}'.format(metric))
            ax0.set_title('Lower is better')
        else:
            ax0.set_ylabel(metric)
            ax0.set_title('Higher is better')
        
        if baseline:
            ax0.axhline(baseline, linestyle='--', color='black', label='baseline')
            ax0.legend()
    
    else:
        plt.clf()

out = interactive_output(f, {'x': x, 'metric': metric, 'categories': categories, 'error_rate': error_rate, 'scatter_plot': scatter_plot,
                             'dataset': dataset, 'feature_type': feature_type, 'test_type': test_type,
                             'base_estimator': base_estimator, 'rs': rs,
                             'sgl_method': sgl_method, 'sgl_stacks': sgl_stacks, 'pgm': pgm})
display(ui, out)

HBox(children=(VBox(children=(Dropdown(description='Dataset', options=('youtube', 'twitter', 'soundcloud'), va…

Output()