# Run scenarios

## Imports

In [None]:
import papermill as pm
import os
import numpy as np
import pandas as pd
from IPython.core.display import display
import matplotlib.pyplot as plt

## Getting scenarios from folder structure

In [None]:
scenarios_dir =  './../datasets/polvo/'

In [None]:
from os.path import join

SCENARIOS_DIR = [join(scenarios_dir, file_dir) for file_dir in os.listdir(scenarios_dir)
                                                         if file_dir.endswith('scenario')]
if len(SCENARIOS_DIR) == 0:
    SCENARIOS_DIR = ['']

In [None]:
from itertools import product    
from os.path import join

variances = np.asarray([0.25]).tolist()
preferences = np.arange(-100, 50, 2.5).tolist()
methods = ['affinity_propagation']

arguments = product(variances, SCENARIOS_DIR, preferences, methods)

output_dirs = [join(SCENARIO_DIR, method) for SCENARIO_DIR in SCENARIOS_DIR for method in methods]

## Running experiments

### Clearing

In [None]:
from shutil import rmtree
from os import makedirs

for output_dir in output_dirs:
    rmtree(output_dir, ignore_errors=True)
    makedirs(output_dir)

### Running

In [None]:
import ipyparallel as ipp
rc = ipp.Client()
# dview = rc[:]
lview = rc.load_balanced_view();

In [None]:
def execute_notebook(arguments):
    import os
    from os.path import join
    import papermill as pm
    from shutil import copyfile
    
    variance = arguments[0]
    scenario_dir = arguments[1]
    preference = arguments[2]
    method = arguments[3]
    output_dir = join(scenario_dir, method)
    
    pm.execute_notebook(
       method+'.ipynb',
       '/tmp/output_variance'+str(variance)+'.ipynb',
       parameters = dict(PREFERENCE=preference,
                         OUTPUT_DIR=output_dir,
                         VARIANCE_FILTER=variance,
                         SCENARIO=scenario_dir,
                         DATASET_PATH=join(scenario_dir, 'dataset.csv'))
    )

In [None]:
lview.map_sync(execute_notebook, list(arguments));

## Recovering results

In [None]:
import glob

results = {}
for output_dir in output_dirs:
    results_scenario_df = pd.DataFrame()
    result_csvs = glob.glob(join(output_dir+'/output_variance*.csv'))
    for result_csv in result_csvs:
        result = pd.read_csv(result_csv, index_col=0)
        results_scenario_df = results_scenario_df.append(result)
    results[output_dir] = results_scenario_df

## Saving results to CSV

In [None]:
for output_dir in output_dirs:
    export_df = results[output_dir]
    export_df.index = preferences
    export_df.to_csv(os.path.join(output_dir, 'output_resume.csv'))

## Plotting results

In [None]:
for output_dir in output_dirs:
    fig, ax1 = plt.subplots(figsize=(20, 10))
    for metric_name in results[output_dir].columns.values:
        if metric_name != 'processed_features':
            ax1.plot(preferences, results[output_dir][metric_name])
        
    ax1.set_xlabel('std variance filter')
    ax1.set_ylabel('metric value')
    ax1.grid()
    
    ax2 = ax1.twinx()
    ax2.set_figsize=(20, 10)
    ax2.plot(preferences, results[output_dir]['processed_features'],  'r--', label='processed_features')
    ax2.legend(loc=1)
    ax2.set_ylabel('number of analyzed features')
    
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines1 + lines2, labels1 + labels2, loc=0)
    
    plt.title(output_dir)
    plt.savefig(os.path.join(output_dir, 'output_resume_plot.pdf'), format='pdf')
    plt.show()