In [None]:
%load_ext watermark
%watermark -a 'Ouedraogo Clovis' -u -d -v -m

## Imports

In [None]:
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, cross_validate
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from joblib import dump
from collections import Counter
from skmultilearn.model_selection.measures import get_combination_wise_output_matrix
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.metrics import *

from aqosd_experiments.config import *
from aqosd_experiments.data import *
from aqosd_experiments.utils import *
from aqosd_experiments.plot import *
from aqosd_experiments.scorers import *
from osms import OverheadSensitiveMetricSelection

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
warnings.filterwarnings("ignore", category=FutureWarning) 

## Load Config

In [None]:
save=True

## Load and prepare data

In [None]:
metrics, bottlenecks = import_and_prepare_data(RAW_DATASET_PATH,  HOST_LIST)
print('Shape of metrics : ',metrics.shape,'\t','Shape of bottlenecks : ',bottlenecks.shape)
print('Label cardinality = %.5f \t Label density = %.5f' % (bottlenecks.sum(axis=1).mean(),bottlenecks.mean(axis=1).mean()))

In [None]:
metric_names, bottleneck_names = list(metrics.columns), list(bottlenecks.columns)
print(metric_names)
print(100*'-')
print(bottleneck_names)

In [None]:
metrics = scale_metrics(metrics, StandardScaler()) #MinMaxScaler
train_indexes, test_indexes = next(CV_2.split(metrics, bottlenecks))

In [None]:
X_train, y_train = metrics.iloc[train_indexes, :], bottlenecks.iloc[train_indexes, :]
X_test, y_test = metrics.iloc[test_indexes, :], bottlenecks.iloc[test_indexes, :]
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
X_train, y_train, X_test, y_test = X_train.values, y_train.values,  X_test.values, y_test.values
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
pd.DataFrame({
    'train': Counter(str(c) for row in get_combination_wise_output_matrix(y_train, order=2) for c in row),
    'test' : Counter(str(c) for row in get_combination_wise_output_matrix(y_test, order=2) for c in row)
}).T.fillna(0.0)

In [None]:
selection="LP [Neural Net]"

In [None]:
n_metric_SRV = sum(1 for s in metric_names if 'SRV.' in s)
n_metric_GW1 = sum(1 for s in metric_names if 'GW1.' in s)
n_metric_GW11 = sum(1 for s in metric_names if 'GW11.' in s)
n_metric_GW111 = sum(1 for s in metric_names if 'GW111.' in s)
print("n_metric : { SRV : ",n_metric_SRV,", GW1 : ",n_metric_GW1, ", GW11 : ",n_metric_GW11,
      ", GW111 : ",n_metric_GW111,"}")
SRV_costs= 0.5 * np.ones(n_metric_SRV)
GW1_costs= 1.0 * np.ones(n_metric_GW1)
GW11_costs= 1.5 * np.ones(n_metric_GW11)
GW111_costs= 2.0 * np.ones(n_metric_GW111)          
overheads=np.concatenate([SRV_costs, GW1_costs, GW11_costs, GW111_costs])

In [None]:
scn_name='scn_1'
overhead_budget=np.sum(overheads)
bbest_clf=CLASSIFIERS[selection]
osdms = OverheadSensitiveMetricSelection(best_clf, overheads=overheads, overhead_budget=overhead_budget, 
                                         scoring=SCORING, verbose=2, test_indexes=test_indexes, n_jobs=-1)
start=time.time()
osdms.fit(metrics.values, bottlenecks.values, user_metric_names=metric_names)
print(5*'-'+'>','time:',(time.time()-start)//60+1,'minutes')
if save:
    dump(osdms, MODELS_PATH + scn_name + 'OSDMS.joblib')
print('best combination (Score: %.5f, numb : %d):\n%s' % (osdms.k_score_,len(osdms.k_metric_names_),
                                                           osdms.k_metric_names_))
fig, df = plot_osdm(osdms)
if save:
    df.to_csv(MODELS_PATH + scn_name + '_metric_selection.csv', index=True)
    fig.savefig(FIG_PATH + scn_name + '_metric_selection.pdf', bbox_inches='tight')