In [1]:
#%load_ext watermark
#%watermark -a 'Ouedraogo Clovis' -u -d -v -m

## Imports

In [2]:
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, cross_validate
from sklearn.preprocessing import MinMaxScaler
from joblib import dump

from aqosd_experiments.config import CLASSIFIERS, PARAM_GRIDS, RAW_DATASET_PATH, HOST_LIST, CV, MODELS_PATH, FIG_PATH
from aqosd_experiments.data import import_and_prepare_data, scale_metrics, over_sampling
from aqosd_experiments.plot import plot_number_of_instance, plot_osdm, plt_long_stats, plt_corr_metrics, \
    plot_multicollinear_metrics, plt_corr_bottlenecks, plt_all_data
from aqosd_experiments.scorers import process_score, SCORING,_hamming_loss_wrapper,_coverage_error_wrapper,\
    _label_ranking_loss_wrapper
from osms import OverheadSensitiveMetricSelection
from sklearn.metrics import *

## Load Config

In [3]:
%whos str  int  tuple OrderedDict

Variable           Type           Data/Info
-------------------------------------------
CLASSIFIERS        OrderedDict    OrderedDict([('Extra Tree<...>ti Label kNN', MLkNN())])
FIG_PATH           str            C:/Users/couedrao/Pycharm<...>/../data/output/plotting/
HOST_LIST          tuple          n=4
MODELS_PATH        str            C:/Users/couedrao/Pycharm<...>data/output/saved_models/
RAW_DATASET_PATH   str            C:/Users/couedrao/Pycharm<...>ents/../data/raw_dataset/
SCORING            OrderedDict    OrderedDict([('subset acc<...>_scorer(zero_one_loss))])


In [4]:
#raw_dataset_path, host_list, models_path, fig_path= RAW_DATASET_PATH,  HOST_LIST, MODELS_PATH, FIG_PATH
#classifiers,param_grids = CLASSIFIERS, PARAM_GRIDS
#scoring, cv = SCORING, CV
save=False

## Load and prepare data

In [5]:
metrics, bottlenecks = import_and_prepare_data(RAW_DATASET_PATH, HOST_LIST)
print('Shape of metrics : ',metrics.shape,'\t','Shape of bottlenecks : ',bottlenecks.shape) #42813
print('Label cardinality = %.5f \t Label density = %.5f' % (bottlenecks.sum(axis=1).mean(),bottlenecks.mean(axis=1).mean()))

Shape of metrics :  (97343, 105) 	 Shape of bottlenecks :  (97343, 32)
Label cardinality = 1.96252 	 Label density = 0.06133


In [6]:
metric_names, bottleneck_names = list(metrics.columns), list(bottlenecks.columns)
print(metric_names)
print(100*'-')
print(bottleneck_names)

['SRV./: Free inodes in %', 'SRV./: Space utilization', 'SRV./: Used space', 'SRV./boot: Free inodes in %', 'SRV./boot: Space utilization', 'SRV./boot: Used space', 'SRV.Available memory', 'SRV.Available memory in %', 'SRV.CPU idle time', 'SRV.CPU iowait time', 'SRV.CPU softirq time', 'SRV.CPU system time', 'SRV.CPU user time', 'SRV.CPU utilization', 'SRV.Context switches per second', 'SRV.Free swap space', 'SRV.Free swap space in %', 'SRV.Interface enp0s8: Bits received', 'SRV.Interface enp0s8: Bits sent', 'SRV.Interrupts per second', 'SRV.Load average (15m avg)', 'SRV.Load average (1m avg)', 'SRV.Load average (5m avg)', 'SRV.Memory utilization', 'SRV.Number of processes', 'SRV.Number of running processes', 'GW1./: Free inodes in %', 'GW1./: Space utilization', 'GW1./: Used space', 'GW1./boot: Free inodes in %', 'GW1./boot: Space utilization', 'GW1./boot: Used space', 'GW1.Available memory', 'GW1.Available memory in %', 'GW1.CPU idle time', 'GW1.CPU iowait time', 'GW1.CPU softirq time

In [7]:
metrics.head()

Unnamed: 0_level_0,SRV./: Free inodes in %,SRV./: Space utilization,SRV./: Used space,SRV./boot: Free inodes in %,SRV./boot: Space utilization,SRV./boot: Used space,SRV.Available memory,SRV.Available memory in %,SRV.CPU idle time,SRV.CPU iowait time,...,GW111.Free swap space in %,GW111.Interface enp0s8: Bits received,GW111.Interface enp0s8: Bits sent,GW111.Interrupts per second,GW111.Load average (15m avg),GW111.Load average (1m avg),GW111.Load average (5m avg),GW111.Memory utilization,GW111.Number of processes,GW111.Number of running processes
cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-13 18:14:37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-12-13 18:14:38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-12-13 18:14:39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-12-13 18:14:40,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-12-13 18:14:41,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
bottlenecks.head()

Unnamed: 0,GW1.cpu,GW1.diskio,GW1.diskspace,GW1.memory,GW1.network delay,GW1.network packet corrupt,GW1.network packet duplicate,GW1.network packet loss,GW11.cpu,GW11.diskio,...,GW111.network packet duplicate,GW111.network packet loss,SRV.cpu,SRV.diskio,SRV.diskspace,SRV.memory,SRV.network delay,SRV.network packet corrupt,SRV.network packet duplicate,SRV.network packet loss
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
import matplotlib.pyplot as plt
from tensorflow.python.keras.layers import Dense, LSTM, Conv1D
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from seglearn.pipe import Pype
import seglearn as sgl
from seglearn.split import TemporalKFold

n_vars, n_classes =len(metric_names), len(bottleneck_names)

def crnn_model(width, n_vars=n_vars, n_classes=n_classes, conv_kernel_size=5, conv_filters=2, lstm_units=2):
    input_shape = (width, n_vars)
    model = Sequential()
    model.add(Conv1D(filters=conv_filters, kernel_size=conv_kernel_size,
                     padding='valid', activation='relu', input_shape=input_shape))
    model.add(LSTM(units=lstm_units, dropout=0.1, recurrent_dropout=0.1))
    model.add(Dense(n_classes, activation="softmax"))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
    
def y_func(y):
    y = np.sum(y, axis=1)
    y[y > 1] = 1
    return y

X_train, X_test, y_train, y_test = train_test_split(metrics, bottlenecks, test_size=0.25, shuffle=False)    

windows= [20]#[1, 10, 15, 20]
for w in windows: 
    pipe = Pype([('seg', sgl.Segment(order='C', width=w, overlap=0, y_func=y_func)),
                 ('scaler', StandardScaler()),
                 ('crnn', KerasClassifier(build_fn=crnn_model, width=w, epochs=10, batch_size=256, verbose=0))])
    
    classifier.fit(X_train, y_train)
    y_test_predict = classifier.predict(X_test)
    print(classification_report(y_test, y_test_predict, target_names=bottleneck_names))
    accuracy = accuracy_score(y_test, y_test_predict)
    hloss    = _hamming_loss_wrapper(y_test, y_test_predict)
    cerror   = _coverage_error_wrapper(y_test, y_test_predict)
    rloss    = _label_ranking_loss_wrapper(y_test, y_test_predict)
    print('>','time:',(time.time()-start)//60+1,'minutes')
    print('Accuracy:', format(accuracy, '.3f'),'Hamming loss:', format(hloss, '.3f'),
          'Coverage Error', format(cerror, '.3f'),'Ranking Loss', format(rloss, '.3f'))
    

In [2]:
windows = [1,5,10,15,20,30,60,120][::-1]
windows

[120, 60, 30, 20, 15, 10, 5, 1]

## Scenario 4 : Limited budget (1/4 total overhead) + Overhead increases by a factor of 0.5 from SRV --> GW111

## Scenario 3 : Limited budget (1/2 total overhead) + Overhead increases by a factor of 0.5 from SRV --> GW111

## Scenario 2 : Limited budget (1/4 total overhead) + Same overhead for all metrics

## Scenario 1 : Limited budget (1/2 total overhead) + Same overhead for all metrics

## Scenario 0 : Unlimited budget + Same overhead for all metrics