## Imports

In [1]:
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, cross_validate
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from joblib import dump
from collections import Counter
from skmultilearn.model_selection.measures import get_combination_wise_output_matrix
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.metrics import *

from aqosd_experiments.config import *
from aqosd_experiments.data import *
from aqosd_experiments.utils import *
from aqosd_experiments.plot import *
from aqosd_experiments.scorers import *
from osms import OverheadSensitiveMetricSelection

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
warnings.filterwarnings("ignore", category=FutureWarning) 

## Load Config

In [2]:
save=True

## Load and prepare data

In [3]:
metrics, bottlenecks = import_and_prepare_data(RAW_DATASET_PATH,  HOST_LIST)
print('Shape of metrics : ',metrics.shape,'\t','Shape of bottlenecks : ',bottlenecks.shape)
print('Label cardinality = %.5f \t Label density = %.5f' % (bottlenecks.sum(axis=1).mean(),bottlenecks.mean(axis=1).mean()))

2020-12-13 18:14:37 2020-12-14 21:16:59
Shape of metrics :  (16321, 104) 	 Shape of bottlenecks :  (16321, 32)
Label cardinality = 2.02984 	 Label density = 0.06343


In [4]:
metric_names, bottleneck_names = list(metrics.columns), list(bottlenecks.columns)
print(metric_names)
print(100*'-')
print(bottleneck_names)

['SRV./: Free inodes in %', 'SRV./: Space utilization', 'SRV./: Used space', 'SRV./boot: Free inodes in %', 'SRV./boot: Space utilization', 'SRV./boot: Used space', 'SRV.Available memory', 'SRV.Available memory in %', 'SRV.CPU idle time', 'SRV.CPU iowait time', 'SRV.CPU softirq time', 'SRV.CPU system time', 'SRV.CPU user time', 'SRV.CPU utilization', 'SRV.Context switches per second', 'SRV.Free swap space', 'SRV.Free swap space in %', 'SRV.Interface enp0s8: Bits received', 'SRV.Interface enp0s8: Bits sent', 'SRV.Interrupts per second', 'SRV.Load average (15m avg)', 'SRV.Load average (1m avg)', 'SRV.Load average (5m avg)', 'SRV.Memory utilization', 'SRV.Number of processes', 'SRV.Number of running processes', 'GW1./: Free inodes in %', 'GW1./: Space utilization', 'GW1./: Used space', 'GW1./boot: Free inodes in %', 'GW1./boot: Space utilization', 'GW1./boot: Used space', 'GW1.Available memory', 'GW1.Available memory in %', 'GW1.CPU idle time', 'GW1.CPU iowait time', 'GW1.CPU softirq time

In [5]:
metrics = scale_metrics(metrics, StandardScaler()) #MinMaxScaler
train_indexes, test_indexes = next(CV_2.split(metrics, bottlenecks))

In [6]:
X_train, y_train = metrics.iloc[train_indexes, :], bottlenecks.iloc[train_indexes, :]
X_test, y_test = metrics.iloc[test_indexes, :], bottlenecks.iloc[test_indexes, :]
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((13060, 104), (13060, 32), (3261, 104), (3261, 32))

In [7]:
X_train, y_train, X_test, y_test = X_train.values, y_train.values,  X_test.values, y_test.values
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((13060, 104), (13060, 32), (3261, 104), (3261, 32))

In [8]:
pd.DataFrame({
    'train': Counter(str(c) for row in get_combination_wise_output_matrix(y_train, order=2) for c in row),
    'test' : Counter(str(c) for row in get_combination_wise_output_matrix(y_test, order=2) for c in row)
}).T.fillna(0.0)

Unnamed: 0,"(19, 19)","(1, 1)","(1, 19)","(3, 19)","(3, 3)","(30, 30)","(3, 30)","(11, 30)","(11, 11)","(4, 4)",...,"(1, 23)","(16, 28)","(14, 28)","(5, 24)","(0, 23)","(5, 26)","(1, 7)","(13, 30)","(6, 30)","(7, 31)"
train,1020.0,737.0,58.0,47.0,954.0,721.0,79.0,61.0,1168.0,860.0,...,24.0,29.0,8.0,16.0,9.0,11.0,15.0,18.0,5.0,10.0
test,254.0,184.0,15.0,11.0,242.0,180.0,20.0,15.0,292.0,215.0,...,6.0,7.0,2.0,4.0,3.0,2.0,4.0,5.0,1.0,3.0


In [9]:
#compute_measure_per_label(results, y_test, labels)

In [10]:
#fig = perf_viz(results, y_test)

In [11]:
selection="ML-kNN"

In [12]:
n_metric_SRV = sum(1 for s in metric_names if 'SRV.' in s)
n_metric_GW1 = sum(1 for s in metric_names if 'GW1.' in s)
n_metric_GW11 = sum(1 for s in metric_names if 'GW11.' in s)
n_metric_GW111 = sum(1 for s in metric_names if 'GW111.' in s)
print("n_metric : { SRV : ",n_metric_SRV,", GW1 : ",n_metric_GW1, ", GW11 : ",n_metric_GW11,", GW111 : ",n_metric_GW111,"}")

n_metric : { SRV :  26 , GW1 :  26 , GW11 :  26 , GW111 :  26 }


## Scenario 4 : Limited budget (1/4 total overhead) + Overhead increases by a factor of 0.5 from SRV --> GW111

In [None]:
scn_name='scn_4'
SRV_costs= 0.5 * np.ones(n_metric_SRV)
GW1_costs= 1.0 * np.ones(n_metric_GW1)
GW11_costs= 1.5 * np.ones(n_metric_GW11)
GW111_costs= 2.0 * np.ones(n_metric_GW111)          
overheads=np.concatenate([SRV_costs, GW1_costs, GW11_costs, GW111_costs])
overhead_budget=np.sum(overheads)//4
best_clf=CLASSIFIERS[selection]
osdms = OverheadSensitiveMetricSelection(best_clf, overheads=overheads, overhead_budget=overhead_budget, 
                                         scoring=SCORING, verbose=2, cv=CV, n_jobs=-1)
start=time.time()
osdms.fit(metrics.values, bottlenecks.values, user_metric_names=metric_names)
print(5*'-'+'>','time:',(time.time()-start)//60+1,'minutes')
if save:
    dump(osdms, MODELS_PATH + scn_name + 'OSDMS.joblib')
print('best combination (Score: %.5f, numb : %d):\n%s' % (osdms.k_score_,len(osdms.k_metric_names_),
                                                           osdms.k_metric_names_))
fig, df = plot_osdm(osdms)
if save:
    df.to_csv(MODELS_PATH + scn_name + '_metric_selection.csv', index=True)
    fig.savefig(FIG_PATH + scn_name + '_metric_selection.pdf', bbox_inches='tight')

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


## Scenario 3 : Limited budget (1/2 total overhead) + Overhead increases by a factor of 0.5 from SRV --> GW111

In [None]:
scn_name='scn_3'
SRV_costs= 0.5 * np.ones(n_metric_SRV)
GW1_costs= 1.0 * np.ones(n_metric_GW1)
GW11_costs= 1.5 * np.ones(n_metric_GW11)
GW111_costs= 2.0 * np.ones(n_metric_GW111)          
overheads=np.concatenate([SRV_costs, GW1_costs, GW11_costs, GW111_costs])
overhead_budget=np.sum(overheads)//2
best_clf=CLASSIFIERS[selection]
osdms = OverheadSensitiveMetricSelection(best_clf, overheads=overheads, overhead_budget=overhead_budget, 
                                         scoring=SCORING, verbose=2, cv=CV, n_jobs=-1)
start=time.time()
osdms.fit(metrics.values, bottlenecks.values, user_metric_names=metric_names)
print(5*'-'+'>','time:',(time.time()-start)//60+1,'minutes')
if save:
    dump(osdms, MODELS_PATH + scn_name + 'OSDMS.joblib')
print('best combination (Score: %.5f, numb : %d):\n%s' % (osdms.k_score_,len(osdms.k_metric_names_),
                                                           osdms.k_metric_names_))
fig, df = plot_osdm(osdms)
if save:
    df.to_csv(MODELS_PATH + scn_name + '_metric_selection.csv', index=True)
    fig.savefig(FIG_PATH + scn_name + '_metric_selection.pdf', bbox_inches='tight')

## Scenario 2 : Limited budget (1/4 total overhead) + Same overhead for all metrics

In [None]:
scn_name='scn_2'
SRV_costs= 1.0 * np.ones(n_metric_SRV)
GW1_costs= 1.0 * np.ones(n_metric_GW1)
GW11_costs= 1.0 * np.ones(n_metric_GW11)
GW111_costs= 1.0 * np.ones(n_metric_GW111)          
overheads=np.concatenate([SRV_costs, GW1_costs, GW11_costs, GW111_costs])
overhead_budget=np.sum(overheads)//4
bbest_clf=CLASSIFIERS[selection]
osdms = OverheadSensitiveMetricSelection(best_clf, overheads=overheads, overhead_budget=overhead_budget, 
                                         scoring=SCORING, verbose=2, cv=CV, n_jobs=-1)
start=time.time()
osdms.fit(metrics.values, bottlenecks.values, user_metric_names=metric_names)
print(5*'-'+'>','time:',(time.time()-start)//60+1,'minutes')
if save:
    dump(osdms, MODELS_PATH + scn_name + 'OSDMS.joblib')
print('best combination (Score: %.5f, numb : %d):\n%s' % (osdms.k_score_,len(osdms.k_metric_names_),
                                                           osdms.k_metric_names_))
fig, df = plot_osdm(osdms)
if save:
    df.to_csv(MODELS_PATH + scn_name + '_metric_selection.csv', index=True)
    fig.savefig(FIG_PATH + scn_name + '_metric_selection.pdf', bbox_inches='tight')

## Scenario 1 : Limited budget (1/2 total overhead) + Same overhead for all metrics

In [None]:
scn_name='scn_1'
SRV_costs= 1.0 * np.ones(n_metric_SRV)
GW1_costs= 1.0 * np.ones(n_metric_GW1)
GW11_costs= 1.0 * np.ones(n_metric_GW11)
GW111_costs= 1.0 * np.ones(n_metric_GW111)          
overheads=np.concatenate([SRV_costs, GW1_costs, GW11_costs, GW111_costs])
overhead_budget=np.sum(overheads)//2
best_clf=CLASSIFIERS[selection]
osdms = OverheadSensitiveMetricSelection(best_clf, overheads=overheads, overhead_budget=overhead_budget, 
                                         scoring=SCORING, verbose=2, cv=CV, n_jobs=-1)
start=time.time()
osdms.fit(metrics.values, bottlenecks.values, user_metric_names=metric_names)
print(5*'-'+'>','time:',(time.time()-start)//60+1,'minutes')
if save:
    dump(osdms, MODELS_PATH + scn_name + 'OSDMS.joblib')
print('best combination (Score: %.5f, numb : %d):\n%s' % (osdms.k_score_,len(osdms.k_metric_names_),
                                                           osdms.k_metric_names_))
fig, df = plot_osdm(osdms)
if save:
    df.to_csv(MODELS_PATH + scn_name + '_metric_selection.csv', index=True)
    fig.savefig(FIG_PATH + scn_name + '_metric_selection.pdf', bbox_inches='tight')

## Scenario 0 : Unlimited budget + Same overhead for all metrics

In [None]:
scn_name='scn_0'
SRV_costs= 1.0 * np.ones(n_metric_SRV)
GW1_costs= 1.0 * np.ones(n_metric_GW1)
GW11_costs= 1.0 * np.ones(n_metric_GW11)
GW111_costs= 1.0 * np.ones(n_metric_GW111)          
overheads=np.concatenate([SRV_costs, GW1_costs, GW11_costs, GW111_costs])
overhead_budget=np.sum(overheads)
best_clf=CLASSIFIERS[selection]
osdms = OverheadSensitiveMetricSelection(best_clf, overheads=overheads, overhead_budget=overhead_budget, 
                                         scoring=SCORING, verbose=2, test_indexes=test_indexes, n_jobs=-1)
start=time.time()
osdms.fit(metrics.values, bottlenecks.values, user_metric_names=metric_names)
print(5*'-'+'>','time:',(time.time()-start)//60+1,'minutes')
if save:
    dump(osdms, MODELS_PATH + scn_name + 'OSDMS.joblib')
print('best combination (Score: %.5f, numb : %d):\n%s' % (osdms.k_score_,len(osdms.k_metric_names_),
                                                           osdms.k_metric_names_))
fig, df = plot_osdm(osdms)
if save:
    df.to_csv(MODELS_PATH + scn_name + '_metric_selection.csv', index=True)
    fig.savefig(FIG_PATH + scn_name + '_metric_selection.pdf', bbox_inches='tight')