In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

from ipynb.fs.full.Cert_Aux_Functions2 import *

# Importa a biblioteca pandas 
import pandas as pd

# Importa datetime e timedelta para verificar se há gaps de tempo nos datasets preparados
from datetime import datetime, timedelta

# Importa a biblioteca os
import os
from pathlib import Path

## Bibliotecas sklearn
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector as selector
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder, LabelEncoder
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler, FunctionTransformer
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.impute import SimpleImputer

# Using Kernel PCA
from sklearn.decomposition import PCA, KernelPCA

# Using Gaussian Mixtures
from sklearn.mixture import GaussianMixture

# One Class SVM
# https://scikit-learn.org/stable/auto_examples/svm/plot_oneclass.html
from sklearn import svm
from sklearn import linear_model

#Para as figuras
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

#Importa bibliotecas Numpy
import numpy as np

# Importa as bibliotecas para os cálculos do desvio padrao
from scipy.stats import norm
from scipy.stats import chi2
import statistics

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

from keras import optimizers, Sequential
from keras.models import Model
#from keras.utils import plot_model
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed
from keras.callbacks import ModelCheckpoint, TensorBoard

from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

In [2]:
# Logon + HTTP + USB + Device
df_lhud_1hora = pd.read_pickle("df_lhud_1hora_file.pkl")
df_lhud_1hora.sort_values('date', ascending=True, inplace = True)
df_lhud_1hora.reset_index(inplace = True, drop=True)

## Separa 4 primeiros meses de dados
df_lhud_1hora4m = df_lhud_1hora[(df_lhud_1hora['date'] <= '2010-05-02')]

train_index = df_lhud_1hora4m.index[-1]
test_index = df_lhud_1hora[(df_lhud_1hora['date'] <= '2010-09-02')].index[-1]


## Conjunto de teste
df_lhud_1hora_test = df_lhud_1hora.iloc[train_index:test_index]

### Transformações

#Nomes das features numericas de acordo com o dataset
lhud_numeric_features = ['logon', 'logoff','down','up','vis',
                     'conn','disc','trm','frm','open',
                     'write','copy','delete']

#Nomes das features categoricas - comum a todos os datasets

#numeric_transformer = StandardScaler() ## removing the mean and scaling to unit variance.
numeric_transformer = MinMaxScaler() ## Varia de 0 a 1
#numeric_transformer = SimpleImputer()

hour_categories = np.arange(0, 24)
dow_categories  = np.arange(0, 7)
user_categories = df_lhud_1hora4m.user.unique()

#categorical_features = ['user','hour', 'dow']
categorical_features = ['hour', 'dow']
categorical_transformer = OneHotEncoder(
#    categories = [user_categories, hour_categories, dow_categories]
    categories = [hour_categories, dow_categories]
)

user_feature = ['user']
user_transformer = OrdinalEncoder(categories = [user_categories])
user_transformer_wd = OneHotEncoder(categories = [user_categories])

lhud_preprocessor = ColumnTransformer(
    transformers=[
        ('lhud_num', numeric_transformer, lhud_numeric_features),
        ('lhud_cat', categorical_transformer, categorical_features),
        ('lhud_user', user_transformer_wd, user_feature), #### ALTERADO PARA TESTAR ONEHOTENCODER ("_wd")
    ])

lhud_preprocessor_deep = ColumnTransformer(
    transformers=[
        ('lhud_num', numeric_transformer, lhud_numeric_features),
    ])

lhud_preprocessor_wide = ColumnTransformer(
    transformers=[
        ('lhud_cat', categorical_transformer, categorical_features),
        ('lhud_user_wd', user_transformer_wd, user_feature),
    ])

#Transformaçoes simples, com dados todos juntos
columns = lhud_numeric_features + categorical_features + user_feature
trans_lhud_4m    = lhud_preprocessor.fit_transform(df_lhud_1hora4m[columns])
trans_lhud_test  = lhud_preprocessor.transform(df_lhud_1hora_test[columns])
trans_lhud       = lhud_preprocessor.transform(df_lhud_1hora[columns])
trans_lhud_4m_13 = trans_lhud_4m[:,:13].toarray()
trans_lhud_test_13 =   trans_lhud_test[:,:13].toarray()

#Transformaçoes deep
columns_deep = lhud_numeric_features# + categorical_features + user_feature
trans_lhud_4m_deep    = lhud_preprocessor_deep.fit_transform(df_lhud_1hora4m[columns_deep])
trans_lhud_test_deep       = lhud_preprocessor_deep.transform(df_lhud_1hora_test[columns_deep])

#Transformaçoes wide
columns_wide = categorical_features + user_feature
trans_lhud_4m_wide    = lhud_preprocessor_wide.fit_transform(df_lhud_1hora4m[columns_wide])
trans_lhud_test_wide       = lhud_preprocessor_wide.transform(df_lhud_1hora_test[columns_wide])


In [27]:
type(trans_lhud_4m), trans_lhud_test_13.shape, trans_lhud_4m_13.shape

(scipy.sparse.csr.csr_matrix, (3278931, 13), (3294562, 13))

In [3]:
### Carrega do arquivos e dados do insider1

### Carrega do arquivo
acm2278_test = pd.read_pickle("acm2278_test.pkl")

### Carrega do arquivo
acm2278_test_full = pd.read_pickle("acm2278_test_full.pkl")

user = 'ACM2278'


### Carrega Labels preditos do Snorkel *** Ajustados
labels_pd = pd.read_hdf("labels_pd.hdf",'df')
labels_pd['anom'] = np.where((labels_pd[0]== 1),-1,1)
labels_pd[labels_pd['anom'] == -1].shape, labels_pd[labels_pd['anom'] == 1].shape

### Carrega Labels preditos do Snorkel *** General
labels_g_pd = pd.read_hdf("labels_g_pd.hdf",'df')
labels_g_pd['anom'] = np.where((labels_g_pd[0]== 1),-1,1)
labels_g_pd[labels_g_pd['anom'] == -1].shape, labels_g_pd[labels_g_pd['anom'] == 1].shape

((6229, 2), (3272702, 2))

In [37]:
### Menos eficiente
# PAra não RNN com entradas 4044
ds = tf.data.Dataset.from_tensor_slices(trans_lhud[0:test_index])

ds = ds.map(lambda x: tf.concat(
            [tf.cast(x[:-1],tf.float32),
             tf.one_hot(tf.cast(x[-1],tf.int32),depth=4000)]
            ,-1),num_parallel_calls=tf.data.AUTOTUNE).map(lambda windows: 
            (windows, windows[:13]),
            num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds.take(train_index).batch(1024).cache().prefetch(1)
ds_test = ds.skip(train_index).batch(1024).cache().prefetch(1)

In [7]:
ds_train.element_spec

(TensorSpec(shape=(None, 4044), dtype=tf.float32, name=None),
 TensorSpec(shape=(None, 13), dtype=tf.float32, name=None))

In [None]:
######## - inicio SPARSE

In [4]:
##USANDO SPARSE - Mais Eficiente
# PAra não RNN com entradas 4044

ds_train = tf.data.Dataset.from_tensor_slices(convert_sparse_matrix_to_sparse_tensor(trans_lhud_4m))
output_train = tf.data.Dataset.from_tensor_slices(trans_lhud_4m_13)
ds_train = tf.data.Dataset.zip(({"input": ds_train}, {'output': output_train}))

ds_test = tf.data.Dataset.from_tensor_slices(convert_sparse_matrix_to_sparse_tensor(trans_lhud_test))
output_test = tf.data.Dataset.from_tensor_slices(trans_lhud_test_13)
ds_test = tf.data.Dataset.zip(({"input": ds_test}, {'output': output_test}))

ds_train = ds_train.batch(1024).cache().prefetch(4)
ds_test = ds_test.batch(1024).cache().prefetch(4)

ds_train.element_spec, ds_test.element_spec

(({'input': SparseTensorSpec(TensorShape([None, 4044]), tf.float64)},
  {'output': TensorSpec(shape=(None, 13), dtype=tf.float64, name=None)}),
 ({'input': SparseTensorSpec(TensorShape([None, 4044]), tf.float64)},
  {'output': TensorSpec(shape=(None, 13), dtype=tf.float64, name=None)}))

In [None]:
##### - fim SPARSE

In [5]:
scores_df = pd.DataFrame(columns=["Model","Params","Accuracy","Precision","Recall","F1-Score","ROC-AUC", "CM"])
index=0
scores_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Model      0 non-null      object
 1   Params     0 non-null      object
 2   Accuracy   0 non-null      object
 3   Precision  0 non-null      object
 4   Recall     0 non-null      object
 5   F1-Score   0 non-null      object
 6   ROC-AUC    0 non-null      object
 7   CM         0 non-null      object
dtypes: object(8)
memory usage: 0.0+ bytes


In [10]:
import warnings
warnings.filterwarnings("ignore")

In [41]:
def objective(space):
    
    keras.backend.clear_session()
    
    input_feat = keras.layers.Input(shape=[4044], name="input", sparse=True, dtype=tf.float64)
            
    sae = keras.models.Sequential([

        #keras.layers.BatchNormalization(),
    
        keras.layers.Dense(space['first'], 
                                  kernel_initializer=space['kernel_init']),
        
        keras.layers.BatchNormalization(),
        keras.layers.Activation(space['activ']),
        
        keras.layers.ActivityRegularization(space['l1_reg']),

        keras.layers.Dense(space['second'], 
                                  kernel_initializer=space['kernel_init']),
        
        keras.layers.BatchNormalization(),
        keras.layers.Activation(space['activ']),
    
        keras.layers.Dense(space['middle'], 
                                  kernel_initializer=space['kernel_init']),
        
        keras.layers.BatchNormalization(),
        keras.layers.Activation(space['activ']),
    
        keras.layers.Dense(space['second'], 
                                  kernel_initializer=space['kernel_init']),
    
        keras.layers.BatchNormalization(),
        keras.layers.Activation(space['activ']),
    
        keras.layers.Dense(space['first'], 
                                  kernel_initializer=space['kernel_init']),
        
        keras.layers.BatchNormalization(),
        keras.layers.Activation(space['activ']),
    
    ])
    
    sae = sae(input_feat)
    
    output = keras.layers.Dense(13, name="output")(sae)
    
    model = keras.models.Model(inputs=[input_feat], outputs=[output])
        
    model.compile(loss=space['loss_ob'], optimizer=space['optimizer'], metrics=space['metrics'])

    history = model.fit(ds_train, epochs=space['epochs'], verbose=0)
    
    y_pred = model.predict(ds_test)
    
    anomalyScores, den_thres = dnn_tf_anomScores(
    y_pred, trans_lhud_test_13, df_lhud_1hora_test, 98.0)
    
    a,p,r,f,cm,auc_sc = benchmark_snorkel(labels_g_pd,anomalyScores)
    
    global index
    scores_df.loc[index,:]=np.array([index,space,a,p,r,f,auc_sc,np.reshape(cm,(4))],dtype=object)
    
    index=index+1
    
    print(space,r)
    
    return {'loss': -r, 'status': STATUS_OK, 'space': space,
            'model': model, 'f1_score': f,'auc_sc': auc_sc,
           'precision': p, 'recall': r, 'c_matrix': cm}


space ={'first': hp.choice("first", np.arange(250, 500, 20)),
        'second': hp.choice('second', np.arange(50, 300, 25)),
        'middle': hp.choice('middle', np.arange(20, 60, 10)),
        'kernel_init' : hp.choice('kernel_init', ["he_normal"]),
        'activ' : hp.choice('activ', ["elu"]),
        #'dropout' : hp.choice('dropout', [0.0, 0.05, 0.1, 0.15, 0.2]),
        'loss_ob' : hp.choice('loss_ob', ["mae"]),
        'optimizer' : hp.choice('optimizer', ["nadam"]),
        'metrics' : hp.choice('metrics', ["mean_squared_error"]),
        'epochs' : hp.choice('epochs', [10]),
        'l1_reg' : hp.choice('l1_reg', [0.00001, 0.00005, 0.0001,0.0005,0.001,0.005,0.01])
    }

In [42]:
## Usando ActivityRegularization - testando possíveis valores
## Usando formato que funcionou com SPARSE!
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=30,
            trials=trials)

{'activ': 'elu', 'epochs': 10, 'first': 410, 'kernel_init': 'he_normal', 'l1_reg': 0.005, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 20, 'optimizer': 'nadam', 'second': 100}
0.6436025044148338                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 270, 'kernel_init': 'he_normal', 'l1_reg': 0.001, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 50, 'optimizer': 'nadam', 'second': 175}
0.6341306790817146                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 290, 'kernel_init': 'he_normal', 'l1_reg': 5e-05, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 20, 'optimizer': 'nadam', 'second': 125}
0.4835447102263606                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 410, 'kernel_init': 'he_

In [55]:
#scores_df.sort_values(['ROC-AUC'], ascending=[False])#.loc[:,('Params','Recall','ROC-AUC')]
scores_df.sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
19,19,"{'activ': 'elu', 'epochs': 10, 'first': 370, '...",0.980701,0.065007,0.68438,0.118735,0.901565,"[3211387, 61315, 1966, 4263]"
25,25,"{'activ': 'elu', 'epochs': 10, 'first': 410, '...",0.980675,0.064365,0.677637,0.117564,0.840294,"[3211344, 61358, 2008, 4221]"
7,7,"{'activ': 'elu', 'epochs': 10, 'first': 390, '...",0.980674,0.064335,0.677316,0.117508,0.850816,"[3211342, 61360, 2010, 4219]"
33,33,"{'activ': 'elu', 'epochs': 10, 'first': 370, '...",0.980667,0.064167,0.67555,0.117201,0.887542,"[3211331, 61371, 2021, 4208]"
6,6,"{'activ': 'elu', 'epochs': 10, 'first': 470, '...",0.980655,0.063862,0.672339,0.116644,0.898767,"[3211311, 61391, 2041, 4188]"
18,18,"{'activ': 'elu', 'epochs': 10, 'first': 410, '...",0.980638,0.063435,0.667844,0.115865,0.915037,"[3211283, 61419, 2069, 4160]"
8,8,"{'activ': 'elu', 'epochs': 10, 'first': 290, '...",0.980614,0.06284,0.661583,0.114778,0.907809,"[3211244, 61458, 2108, 4121]"
29,29,"{'activ': 'elu', 'epochs': 10, 'first': 290, '...",0.980602,0.062551,0.658533,0.114249,0.864082,"[3211225, 61477, 2127, 4102]"
10,10,"{'activ': 'elu', 'epochs': 10, 'first': 410, '...",0.980545,0.061132,0.643603,0.111659,0.925715,"[3211132, 61570, 2220, 4009]"
39,39,"{'activ': 'elu', 'epochs': 10, 'first': 390, '...",0.980541,0.06101,0.642318,0.111436,0.895504,"[3211124, 61578, 2228, 4001]"


In [52]:
#19	19	{'activ': 'elu', 'epochs': 10, 'first': 370, '...	0.980701	0.065007	0.68438	0.118735	0.901565	[3211387, 61315, 1966, 4263]
#{'activ': 'elu', 'epochs': 10, 'first': 370, 'kernel_init': 'he_normal', 'l1_reg': 0.0005, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 40, 'optimizer': 'nadam', 'second': 275}
#0.6843795151709745
model = trials.results[9]['model'] ## NÃO REINICIOU A CONTAGEM DO SCORES_DF
model.save('best_bench_SAEBN_Snorkel3-068438.h5')

In [50]:
#10	10	{'activ': 'elu', 'epochs': 10, 'first': 410, '...	0.980545	0.061132	0.643603	0.111659	0.925715	[3211132, 61570, 2220, 4009]
#{'activ': 'elu', 'epochs': 10, 'first': 410, 'kernel_init': 'he_normal', 'l1_reg': 0.005, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 20, 'optimizer': 'nadam', 'second': 100}
#0.6436025044148338
## ROC-AUC 92%
model = trials.results[0]['model']  ## NÃO REINICIOU A CONTAGEM DO SCORES_DF
model.save('best_bench_SAEBN_Snorkel4-0643REC.h5')

In [12]:
## Usando ActivityRegularization - Atingiu 67%
## Usando formato que funcionou com SPARSE!
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10,
            trials=trials)

{'activ': 'elu', 'epochs': 10, 'first': 410, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 50, 'optimizer': <keras.optimizer_v2.nadam.Nadam object at 0x0000023DFCC57C10>, 'second': 100}
0.6182372772515653                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 410, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 20, 'optimizer': <keras.optimizer_v2.nadam.Nadam object at 0x0000023DFCC57C10>, 'second': 150}
0.4918927596724996                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 330, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 50, 'optimizer': <keras.optimizer_v2.nadam.Nadam object at 0x0000023DFCC57C10>, 'second': 275}
0.5951196018622572                                                       

In [13]:
scores_df.sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
7,7,"{'activ': 'elu', 'epochs': 10, 'first': 390, '...",0.980674,0.064335,0.677316,0.117508,0.850816,"[3211342, 61360, 2010, 4219]"
6,6,"{'activ': 'elu', 'epochs': 10, 'first': 470, '...",0.980655,0.063862,0.672339,0.116644,0.898767,"[3211311, 61391, 2041, 4188]"
8,8,"{'activ': 'elu', 'epochs': 10, 'first': 290, '...",0.980614,0.06284,0.661583,0.114778,0.907809,"[3211244, 61458, 2108, 4121]"
3,3,"{'activ': 'elu', 'epochs': 10, 'first': 370, '...",0.980506,0.060156,0.633328,0.109876,0.838358,"[3211068, 61634, 2284, 3945]"
0,0,"{'activ': 'elu', 'epochs': 10, 'first': 410, '...",0.980449,0.058723,0.618237,0.107258,0.902904,"[3210974, 61728, 2378, 3851]"
2,2,"{'activ': 'elu', 'epochs': 10, 'first': 330, '...",0.980361,0.056527,0.59512,0.103248,0.797498,"[3210830, 61872, 2522, 3707]"
5,5,"{'activ': 'elu', 'epochs': 10, 'first': 290, '...",0.980359,0.056466,0.594477,0.103136,0.836853,"[3210826, 61876, 2526, 3703]"
9,9,"{'activ': 'elu', 'epochs': 10, 'first': 410, '...",0.980301,0.055003,0.579066,0.100464,0.82157,"[3210731, 61971, 2622, 3607]"
4,4,"{'activ': 'elu', 'epochs': 10, 'first': 490, '...",0.980194,0.052349,0.551132,0.095616,0.87364,"[3210556, 62146, 2796, 3433]"
1,1,"{'activ': 'elu', 'epochs': 10, 'first': 410, '...",0.979969,0.046723,0.491893,0.08534,0.796113,"[3210188, 62514, 3165, 3064]"


In [None]:
#{'activ': 'elu', 'epochs': 10, 'first': 390, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 40, 'optimizer': <keras.optimizer_v2.nadam.Nadam object at 0x0000023DFCC57C10>, 'second': 275}
#0.6773157810242415
#7	7	{'activ': 'elu', 'epochs': 10, 'first': 390, '...	0.980674	0.064335	0.677316	0.117508	0.850816	[3211342, 61360, 2010, 4219]
model = trials.results[7]['model']
model.save('best_bench_SAEBN_Snorkel1-06773.h5')

In [None]:
#{'activ': 'elu', 'epochs': 10, 'first': 470, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 30, 'optimizer': <keras.optimizer_v2.nadam.Nadam object at 0x0000023DFCC57C10>, 'second': 175}
#0.6723390592390431
#6	6	{'activ': 'elu', 'epochs': 10, 'first': 470, '...	0.980655	0.063862	0.672339	0.116644	0.898767	[3211311, 61391, 2041, 4188]
model = trials.results[6]['model']
model.save('best_bench_SAEBN_Snorkel2-06723.h5')

In [None]:
#8	8	{'activ': 'elu', 'epochs': 10, 'first': 290, '...	0.980614	0.06284	0.661583	0.114778	0.907809	[3211244, 61458, 2108, 4121]
#{'activ': 'elu', 'epochs': 10, 'first': 290, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 50, 'optimizer': <keras.optimizer_v2.nadam.Nadam object at 0x0000023DFCC57C10>, 'second': 150}
#0.6615829186065179
model = trials.results[8]['model']
model.save('best_bench_SAEBN_Snorkel3-06615.h5')

In [39]:
model = trials.results[8]['model']
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 4044)]            0         
_________________________________________________________________
sequential (Sequential)      (None, 290)               1279410   
_________________________________________________________________
output (Dense)               (None, 13)                3783      
Total params: 1,283,193
Trainable params: 1,281,333
Non-trainable params: 1,860
_________________________________________________________________


In [82]:
## Usando formato que funcionou (hot encoder no tf) - Aqui não usa SPARSE!
## Atingiu 66,8%
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

{'activ': 'elu', 'epochs': 10, 'first': 310, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 40, 'optimizer': 'nadam', 'second': 275}
0.44148338417081395                                                                                                    
{'activ': 'elu', 'epochs': 10, 'first': 270, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 20, 'optimizer': 'nadam', 'second': 50}
0.5639749558516616                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 430, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 20, 'optimizer': 'nadam', 'second': 75}
0.5999357842350297                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 470, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_e

{'activ': 'elu', 'epochs': 10, 'first': 470, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 50, 'optimizer': 'nadam', 'second': 275}
0.6521110932733987                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 370, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 20, 'optimizer': 'nadam', 'second': 200}
0.5614063252528496                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 490, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 30, 'optimizer': 'nadam', 'second': 100}
0.5464761598972547                                                                                                     
{'activ': 'elu', 'epochs': 10, 'first': 310, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared

In [84]:
scores_df.sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
7,7,"{'activ': 'elu', 'epochs': 10, 'first': 370, '...",0.980641,0.063526,0.668807,0.116032,0.902545,"[3211289, 61413, 2063, 4166]"
31,31,"{'activ': 'elu', 'epochs': 10, 'first': 490, '...",0.980625,0.06313,0.664633,0.115307,0.905458,"[3211263, 61439, 2089, 4140]"
77,77,"{'activ': 'elu', 'epochs': 10, 'first': 450, '...",0.980611,0.062779,0.660941,0.114667,0.846139,"[3211240, 61462, 2112, 4117]"
55,55,"{'activ': 'elu', 'epochs': 10, 'first': 470, '...",0.980578,0.061941,0.652111,0.113135,0.894649,"[3211185, 61517, 2167, 4062]"
83,83,"{'activ': 'elu', 'epochs': 10, 'first': 370, '...",0.980574,0.061834,0.650987,0.11294,0.893987,"[3211178, 61524, 2174, 4055]"
...,...,...,...,...,...,...,...,...
84,84,"{'activ': 'elu', 'epochs': 10, 'first': 450, '...",0.979771,0.041782,0.439878,0.076315,0.888392,"[3209863, 62839, 3489, 2740]"
8,8,"{'activ': 'elu', 'epochs': 10, 'first': 490, '...",0.979583,0.03707,0.390271,0.067708,0.872021,"[3209554, 63148, 3798, 2431]"
44,44,"{'activ': 'elu', 'epochs': 10, 'first': 310, '...",0.97956,0.036506,0.384331,0.066678,0.848122,"[3209517, 63185, 3835, 2394]"
50,50,"{'activ': 'elu', 'epochs': 10, 'first': 350, '...",0.978519,0.010461,0.11013,0.019107,0.787696,"[3207809, 64893, 5543, 686]"


In [91]:
trials.results[7]['space']

{'activ': 'elu',
 'epochs': 10,
 'first': 370,
 'kernel_init': 'he_normal',
 'loss_ob': 'mae',
 'metrics': 'mean_squared_error',
 'middle': 30,
 'optimizer': 'nadam',
 'second': 200}

In [94]:
scores_df.iloc[7]

Model                                                        7
Params       {'activ': 'elu', 'epochs': 10, 'first': 370, '...
Accuracy                                              0.980641
Precision                                             0.063526
Recall                                                0.668807
F1-Score                                              0.116032
ROC-AUC                                               0.902545
CM                                [3211289, 61413, 2063, 4166]
Name: 7, dtype: object

In [90]:
#{'activ': 'elu', 'epochs': 10, 'first': 370, 'kernel_init': 'he_normal', 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'middle': 30, 'optimizer': 'nadam', 'second': 200}
#0.6688071921656766
#7	{'activ': 'elu', 'epochs': 10, 'first': 370, '...	0.980641	0.063526	0.668807	0.116032	0.902545	[3211289, 61413, 2063, 4166]
model = trials.results[7]['model']
model.save('best_bench_SAEBN_Snorkel.h5')