# C3O Paper: Performance Model Evalutation 

In [1]:
import sys
sys.path.insert(0, '..')
import collections

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error as MAPE
from IPython.display import display, HTML
from ipywidgets import IntProgress

from RuntimePrediction.Predict import Predictor as C3OPredictor
from RuntimePrediction.DefaultModels import (GradientBoosting as GB,
                                             ErnestModel as EM)
from RuntimePrediction.CustomModels import (BasicOptimisticModel as BOM,
                                            OptimisticGradientBoosting as OGB)

## Define Models

In [2]:
Model = collections.namedtuple('Model', ['name', 'predictor', 'kwargs'])

models = [
          Model('Ernest', EM, {}),
          Model('GBM', GB, {}), 
          
          Model('BOM', BOM, {}), 
          Model('OGB', OGB, {}), 
   
          Model('C3O', C3OPredictor, {}),
         ]

## Extract Runtime Data

In [3]:
sort_df = pd.read_csv('../data/sort.tsv', sep='\t')
grep_df = pd.read_csv('../data/grep.tsv', sep='\t')
sgd_df = pd.read_csv('../data/sgd.tsv', sep='\t')
kmeans_df = pd.read_csv('../data/kmeans.tsv', sep='\t')
pagerank_df = pd.read_csv('../data/pagerank.tsv', sep='\t')

In [4]:
Job = collections.namedtuple('Job', ['name', 'X', 'y'])

def get_training_data(df, features, filters):
    # Get medians
    g = df.groupby(by=['instance_count','machine_type']+features)
    df = pd.DataFrame(g.median().to_records())
    # Apply filters 
    # e.g. only for one machine type each, the full c3o-experiments were conducted
    # No full cartesian product!
    for k, s, v in filters:
        if s == '==': df = df[df[k] == v] 
        if s == '>' : df = df[df[k] >  v]
    X = df[['instance_count'] + features]
    y = (df[['gross_runtime']]).squeeze()
    return X, y

td = get_training_data
jobs = [
    Job('Sort',
        *(td(sort_df,
            ['data_size_MB'],
            [('machine_type', '==', 'c4.2xlarge'),
             ('line_length', '==', 100)] )) ),
    Job('Grep',
        *(td(grep_df,
            ['data_size_MB', 'p_occurrence'],
            [('machine_type', '==', 'm4.2xlarge')] )) ),
    Job('SGDLR',
        *(td(sgd_df,
            ['observations', 'features', 'iterations'],
            [('machine_type', '==', 'r4.2xlarge'),
             ('instance_count', '>', 2)] )) ),
    Job('K-Means',
        *(td(kmeans_df,
            ['observations', 'features', 'k'],
            [('machine_type', '==', 'r4.2xlarge'),
             ('instance_count', '>', 2)] )) ),
    Job('Page Rank',
        *(td(pagerank_df,
            ['links', 'pages', 'convergence_criterion'],
            [('machine_type', '==', 'r4.2xlarge')] )) ),
]

## Train-Test Split Creations

In [5]:
common_features = {'Grep': ['p_occurrence'],
                   'SGDLR': ['iterations'],
                   'K-Means': ['k'],
                   'Page Rank': ['pages', 'convergence_criterion']}

def get_groups(X, y, common_features):
    # Get groups of observations that share the same value for certain features
    Xy = X.merge(y, right_index=True, left_index=True)
    grouped = Xy.groupby(by=common_features, as_index=True)
    X_keys, y_keys = [k for k in Xy.keys() if k != 'gross_runtime'], ['gross_runtime']
    Xs = [group[X_keys] for name, group in grouped]
    ys = [group[y_keys].squeeze() for name, group in grouped]
    return Xs, ys

In [6]:
def create_local_training_data(job, splits, test_size=.1):
    # Get groups of local data (again creates several splits)
    counter = 0
    while True:
        for X, y in zip(*get_groups(job.X, job.y, common_features[job.name])):  
            if counter >= splits: return
            yield train_test_split(X, y, test_size=test_size); counter += 1
                
def create_global_training_data(job, splits, test_size=.1):
    for _ in range(splits):
        yield train_test_split(job.X,job.y,test_size=test_size)

## Evaluate Models on the Training Datasets

In [7]:
# Display tables side by side and get rid of horizontal scroll bar under tables
HTML('''<style> \
     .output {flex-direction: row; flex-wrap: wrap} \
     div.output_subarea {padding-left: 0mm; padding-right:3mm} \
     </style>''')

In [8]:
def display_evaluation_counts(job_name):
    
    try: results = pd.read_csv('performance_results.csv', header=None)
    except: display(HTML(f"<h3>{job_name}</h3> None")); return
    
    rres = [(job, model, ds, round(float(mape),4)) for job, model, ds, mape in results.values]
    df = pd.DataFrame(rres, columns = ('Job', 'Model', 'Dataset', 'MAPE'))
    groups = df.groupby(by=['Job', 'Model', 'Dataset'], as_index=False)
    amounts = [(*group_key, len(instances)) for group_key, instances in groups]
    df = pd.DataFrame(amounts, columns=('Job', 'Model', 'Dataset', 'Amount'))
    
    # Filter info for just the job we are interested in
    jobdf = df[df['Job'] == job_name].set_index(['Model', 'Dataset'])[['Amount']].unstack()
    # Remove redundant info
    columns = (list(map(lambda x: x[1], jobdf.columns)))  # Puts global and local in order
    displaydf = pd.DataFrame(jobdf.values, index=jobdf.index, columns=columns)    
    
    # axis=None for whole table
    try: table_html = displaydf.style._repr_html_()
    except: table_html = "None"
    
    display(HTML(f"<h3>{job_name}</h3>" + table_html ))

### Amount of Conducted Evaluation Experiments Thus Far

In [9]:
for job in jobs:
    display_evaluation_counts(job.name)

Unnamed: 0_level_0,global
Model,Unnamed: 1_level_1
BOM,300
C3O,300
Ernest,300
GBM,300
OGB,300


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,300,300
C3O,300,300
Ernest,300,300
GBM,300,300
OGB,300,300


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,300,300
C3O,300,300
Ernest,300,300
GBM,300,300
OGB,300,300


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,300,300
C3O,300,300
Ernest,300,300
GBM,300,300
OGB,300,300


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,300,300
C3O,300,300
Ernest,300,300
GBM,300,300
OGB,300,300


### Calculating the Model Prediction Errors

In [10]:
def evaluate(model, X_train, X_test, y_train, y_test, error_metric):
    model_instance = model.predictor(**model.kwargs)
    model_instance.fit(X_train, y_train)
    y_pred = model_instance.predict(X_test)
    error = error_metric(y_test, y_pred)
    return error

In [11]:
def create_new_global_evaluations(amount):
    progress_bar = IntProgress(min=0, max=len(jobs)*amount); display(progress_bar)
    with open('performance_results.csv', 'at') as f:
        for job in jobs:
            for split in create_global_training_data(job, splits=amount):
                for model in models:
                    error = evaluate(model, *split, MAPE)
                    f.write(f"{job.name},{model.name},global,{error}\n")
                progress_bar.value += 1
                    
def create_new_local_evaluations(amount):
    progress_bar = IntProgress(min=0, max=len(jobs[1:])*amount); display(progress_bar)
    with open('performance_results.csv', 'at') as f:
        for job in jobs[1:]:
            for split in create_local_training_data(job, splits=amount):
                for model in models:
                    error = evaluate(model, *split, MAPE)
                    f.write(f"{job.name},{model.name},local,{error}\n")
                progress_bar.value += 1

### Conduct Additional Model Evaluation Experiments

In [12]:
create_new_global_evaluations(amount=0)

IntProgress(value=0, max=0)

In [13]:
create_new_local_evaluations(amount=0)

IntProgress(value=0, max=0)

### Display the Results

In [14]:
def display_job_evaluation(job_name):
    
    try: results = pd.read_csv('performance_results.csv', header=None)
    except: print("No evaluation results thus far"); return
    
    rres = [(job, model, ds, round(float(mape),4)) for job, model, ds, mape in results.values]
    df = pd.DataFrame(rres, columns = ('Job', 'Model', 'Dataset', 'MAPE'))
    groups = df.groupby(by=['Job', 'Model', 'Dataset'], as_index=False)
    df = groups.mean()
    # Filter info for just the job we are interested in
    jobdf = df[df['Job'] == job_name].set_index(['Model', 'Dataset'])[['MAPE']].unstack()
    # Remove redundant info
    columns = (list(map(lambda x: x[1], jobdf.columns)))  # Puts global and local in order
    displaydf = pd.DataFrame(jobdf.values, index=jobdf.index, columns=columns)    
    
    # Style the output to highlight the important information
    def highlight_row_min(row):
        return ['color: black' if cell == min(row) else 'color: dimgray' for cell in row]
    
    def highlight_min(data, color='aquamarine', bold=False):
        # highlight the minimum in a Series or DataFrame
        attr1 = f"background-color: {color}"
        attr2 = f"font-weight: {'bold' if bold else 'normal'}"
        if data.ndim == 1:  # Series from .apply(axis=0) or axis=1
            is_min = data == data.min()
            return [attr1 if v else '' for v in is_min]
        else:  # from .apply(axis=None)
            is_min = data == data.min().min()
            return pd.DataFrame(np.where(is_min, attr2, ''),
                                index=data.index, columns=data.columns)
        
    table_html = displaydf.style.apply(highlight_row_min,axis=1) \
                                .apply(highlight_min, axis=0, color='aquamarine') \
                                .apply(highlight_min, axis=None, bold=True) \
                                .format("{:.2%}") \
                                ._repr_html_()
    
    display(HTML(f"<h3>{job_name}</h3>" + table_html ))

### Performance of the C3O Predictor and its Constituent Models

In [15]:
for job in jobs:
    display_job_evaluation(job.name)

Unnamed: 0_level_0,global
Model,Unnamed: 1_level_1
BOM,6.39%
C3O,2.61%
Ernest,5.82%
GBM,4.43%
OGB,2.61%


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,12.95%,6.45%
C3O,2.74%,5.05%
Ernest,39.38%,7.53%
GBM,2.74%,5.54%
OGB,9.35%,4.47%


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,12.66%,6.04%
C3O,2.25%,6.22%
Ernest,21.85%,10.00%
GBM,2.25%,6.89%
OGB,7.79%,6.54%


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,5.74%,5.51%
C3O,2.17%,5.22%
Ernest,15.31%,14.04%
GBM,2.17%,8.60%
OGB,5.50%,5.70%


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,15.08%,3.99%
C3O,2.77%,4.29%
Ernest,34.85%,10.93%
GBM,2.71%,5.25%
OGB,3.17%,4.05%


### Updated Amount of Conducted Experiments per Evaluation Experiments Category

In [16]:
for job in jobs:
    display_evaluation_counts(job.name)

Unnamed: 0_level_0,global
Model,Unnamed: 1_level_1
BOM,300
C3O,300
Ernest,300
GBM,300
OGB,300


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,300,300
C3O,300,300
Ernest,300,300
GBM,300,300
OGB,300,300


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,300,300
C3O,300,300
Ernest,300,300
GBM,300,300
OGB,300,300


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,300,300
C3O,300,300
Ernest,300,300
GBM,300,300
OGB,300,300


Unnamed: 0_level_0,global,local
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
BOM,300,300
C3O,300,300
Ernest,300,300
GBM,300,300
OGB,300,300
