In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, cross_validate
from sklearn.preprocessing import MinMaxScaler

from aqosd_experiments.config import CLASSIFIERS, PARAM_GRIDS, RAW_DATASET_PATH, HOST_LIST, CV, MODELS_PATH
from aqosd_experiments.data import import_and_prepare_data, scale_X, over_sampling
from aqosd_experiments.plot import plot_number_of_instance, plot_osdm
from aqosd_experiments.scorers import process_score, SCORING
from osms import OverheadSensitiveMetricSelection

In [2]:
raw_dataset_path, host_list, models_path= RAW_DATASET_PATH,  HOST_LIST, MODELS_PATH

In [3]:
classifiers,param_grids = CLASSIFIERS, PARAM_GRIDS
scoring, cv = SCORING, CV

In [4]:
X, y = import_and_prepare_data(raw_dataset_path, host_list)
print('Shape of X : ',X.shape,2*'\t','Shape of y : ',y.shape)

Shape of X :  (4112, 64) 		 Shape of y :  (4112, 32)


In [5]:
X=scale_X(X, MinMaxScaler()) 
X, y = over_sampling(X, y, int(X.shape[0]*0.1))
print('Shape of X : ',X.shape,2*'\t','Shape of y : ',y.shape)

MLSMOTE : Synthetic data have been added to the train set
Resampled
Shape of X :  (6315, 63) 		 Shape of y :  (6315, 32)


In [6]:
metrics, bottlenecks = list(X.columns), list(y.columns)
print(metrics)
print(100*'-')
print(bottlenecks)

['/: Space utilization_SRV', 'Available memory in %_SRV', 'CPU idle time_SRV', 'CPU iowait time_SRV', 'CPU nice time_SRV', 'CPU softirq time_SRV', 'CPU system time_SRV', 'CPU user time_SRV', 'CPU utilization_SRV', 'Context switches per second_SRV', 'Free swap space in %_SRV', 'Interrupts per second_SRV', 'Load average (15m avg)_SRV', 'Load average (1m avg)_SRV', 'Load average (5m avg)_SRV', 'Memory utilization_SRV', '/: Space utilization_GW1', 'Available memory in %_GW1', 'CPU idle time_GW1', 'CPU iowait time_GW1', 'CPU nice time_GW1', 'CPU softirq time_GW1', 'CPU system time_GW1', 'CPU user time_GW1', 'CPU utilization_GW1', 'Context switches per second_GW1', 'Free swap space in %_GW1', 'Interrupts per second_GW1', 'Load average (15m avg)_GW1', 'Load average (1m avg)_GW1', 'Load average (5m avg)_GW1', 'Memory utilization_GW1', '/: Space utilization_GW11', 'Available memory in %_GW11', 'CPU idle time_GW11', 'CPU iowait time_GW11', 'CPU nice time_GW11', 'CPU softirq time_GW11', 'CPU syst

In [7]:
maximun = X.shape[1]
best_clf=classifiers["Random Forest"]

In [8]:
osdms = OverheadSensitiveMetricSelection(best_clf, verbose=2, scoring='accuracy', cv=cv, n_jobs=-1, k_metrics=(1, maximun))
osdms.fit(X.values, y.values, custom_metric_names=metrics)
#dump(osdms, models_path + 'OSDMS.joblib')
print('best combination (Score: %.3f): %s\n' % (osdms.k_score_, osdms.k_metric_idx_))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.

STOPPING EARLY DUE TO KEYBOARD INTERRUPT...

KeyError: None

In [None]:
fig, df = plot_osdm(osdms)
df.to_csv(models_path + 'feature_selection.csv', index=True)