# Comparing Strategies

In [1]:
import os
import json
import altair as alt
import pandas as pd
from os.path import join, exists
from performance_visualizer import load_search_performances

In [2]:
FOLDER_RUNS = '../../evaluations/ablation_studies'
   
def collect_all_results(mode):
    all_results = {}
    folder_path = join(FOLDER_RUNS, mode)
    datasets = sorted([x for x in os.listdir(folder_path) if os.path.isdir(join(folder_path, x))])
    
    for dataset in datasets:
        search_results_path = join(folder_path, dataset, 'output/temp/search_results.json')
        with open(search_results_path) as fout:
            search_results = json.load(fout)
            if len(search_results) > 0:
                all_results[dataset] = search_results[dataset]
    
    return all_results

def collect_best_scores(dataset):
    modes = ['ablation_full', 'ablation_no_autogrammar', 'ablation_no_prioritization', 'ablation_no_tuning']
    
    for mode in modes:
        folder_path = join(FOLDER_RUNS, mode)
        search_results_path = join(folder_path, dataset, 'output/temp/search_results.json')
        with open(search_results_path) as fout:
            search_results = json.load(fout)
            score = round(search_results[dataset].get('best_score', 0), 4)
            print(mode, score)
            
def save_all_results():
    modes = ['ablation_full', 'ablation_no_autogrammar', 'ablation_no_prioritization', 'ablation_no_tuning']
    
    for mode in modes:
        all_results = collect_all_results(mode)
        with open('resource/%s.json' % mode, 'w') as fout:
            json.dump(all_results, fout, indent=4)

def plot_comparison_performances(performances):
    bars = alt.Chart().mark_point(filled=True, size=40).encode(
       x=alt.X('method', scale=alt.Scale(zero=True), axis=alt.Axis(grid=False, title=None, labels=False, ticks=False)),
       y=alt.Y('score', axis=alt.Axis(grid=False), aggregate='max', title='Scores'),
       color=alt.Color('method', legend=alt.Legend(title='', orient='none',
                        legendX=50, legendY=-20,
                        direction='horizontal',
                        titleAnchor='middle')),
    )

    text = bars.mark_text(
        align='center',
        baseline='bottom',
        dx=0,
        dy=-5,
        angle=45

    ).encode(
        text='max(score):Q'
    )

    return alt.layer(
            bars,
            text,
            data=performances
            ).facet(
                column=alt.Column('dataset:N', header=alt.Header(title=None, labelOrient='bottom')),
            ).configure_view(
                strokeWidth=0.0,
                continuousWidth=10,
                continuousHeight=180,
            ).configure_title(
                fontSize=11,
                anchor='middle',
                color='black',
                orient='bottom'
            ).properties(
                title='Datasets'
            )

def plot_number_pipelines(performances, dataset):
    pipelines_counter = performances.groupby(['dataset', 'method']).size().reset_index(name='pipelines')
    bars = alt.Chart(pipelines_counter[pipelines_counter['dataset'] == dataset]).mark_bar().encode(x='method:N', y='pipelines:Q')
    text = bars.mark_text(align='center', baseline='middle').encode(text='pipelines:Q')

    return (bars + text).properties(height=200, title='Number of Pipelines')

In [3]:
file_path = 'resource/ablation_full.json'
full_performances = load_search_performances(file_path, 'Full')

file_path = 'resource/ablation_no_tuning.json'
notuning_performances = load_search_performances(file_path, 'No Tuning')

file_path = 'resource/ablation_no_prioritization.json'
nopriorization_performances = load_search_performances(file_path, 'No Prioritization')

file_path = 'resource/ablation_no_autogrammar.json'
noautomatic_performances = load_search_performances(file_path, 'No Auto Grammar')

all_performances = pd.concat([full_performances, nopriorization_performances, noautomatic_performances], ignore_index=True)

# Get only the pipelines produced in the first 30 minutes
max_minutes =  30
all_performances = all_performances[(all_performances['time'].dt.minute < max_minutes) & (all_performances['time'].dt.hour == 0)]

In [4]:
selected_datasets = ['185_baseball_MIN_METADATA' , '299_libras_move_MIN_METADATA', 'LL1_ACLED_TOR_online_behavior_MIN_METADATA',
            'LL1_GS_process_classification_tabular_MIN_METADATA', '1567_poker_hand_MIN_METADATA']

all_performances = all_performances[all_performances['dataset'].isin(selected_datasets)]
all_performances = all_performances.replace('LL1_ACLED_TOR_online_behavior_MIN_METADATA', 'ACLED')
all_performances = all_performances.replace('185_baseball_MIN_METADATA', 'BASEBALL')
all_performances = all_performances.replace('LL1_GS_process_classification_tabular_MIN_METADATA', 'GS')
all_performances = all_performances.replace('299_libras_move_MIN_METADATA', 'LIBRAS')
all_performances = all_performances.replace('1567_poker_hand_MIN_METADATA', 'POKER HAND')

all_performances['score'] = all_performances['score'].round(decimals=2)

In [5]:
plot_comparison_performances(all_performances)

In [6]:
dataset = 'ACLED'
all_performances[all_performances['dataset'] == dataset].groupby(['method', 'dataset'], sort=False)['score'].max().reset_index()

Unnamed: 0,method,dataset,score
0,Full,ACLED,0.58
1,No Prioritization,ACLED,0.54


In [7]:
plot_number_pipelines(all_performances, dataset)