In [8]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

from ipynb.fs.full.Cert_Aux_Functions2 import *

# Importa a biblioteca pandas 
import pandas as pd

# Importa datetime e timedelta para verificar se há gaps de tempo nos datasets preparados
from datetime import datetime, timedelta

# Importa a biblioteca os
import os
from pathlib import Path

## Bibliotecas sklearn
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector as selector
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder, LabelEncoder
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler, FunctionTransformer
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.impute import SimpleImputer

# Using Kernel PCA
from sklearn.decomposition import PCA, KernelPCA

# Using Gaussian Mixtures
from sklearn.mixture import GaussianMixture

# One Class SVM
# https://scikit-learn.org/stable/auto_examples/svm/plot_oneclass.html
from sklearn import svm
from sklearn import linear_model

#Para as figuras
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

#Importa bibliotecas Numpy
import numpy as np

# Importa as bibliotecas para os cálculos do desvio padrao
from scipy.stats import norm
from scipy.stats import chi2
import statistics

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

from keras import optimizers, Sequential
from keras.models import Model
#from keras.utils import plot_model
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed
from keras.callbacks import ModelCheckpoint, TensorBoard

from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Logon + HTTP + USB + Device
df_lhud_1hora = pd.read_pickle("df_lhud_1hora_file.pkl")
df_lhud_1hora.sort_values('date', ascending=True, inplace = True)
df_lhud_1hora.reset_index(inplace = True, drop=True)

## Separa 4 primeiros meses de dados
df_lhud_1hora4m = df_lhud_1hora[(df_lhud_1hora['date'] <= '2010-05-02')]

train_index = df_lhud_1hora4m.index[-1]
test_index = df_lhud_1hora[(df_lhud_1hora['date'] <= '2010-09-02')].index[-1]


## Conjunto de teste
df_lhud_1hora_test = df_lhud_1hora.iloc[train_index:test_index]

### Transformações

#Nomes das features numericas de acordo com o dataset
lhud_numeric_features = ['logon', 'logoff','down','up','vis',
                     'conn','disc','trm','frm','open',
                     'write','copy','delete']

#Nomes das features categoricas - comum a todos os datasets

#numeric_transformer = StandardScaler() ## removing the mean and scaling to unit variance.
numeric_transformer = MinMaxScaler() ## Varia de 0 a 1
#numeric_transformer = SimpleImputer()

hour_categories = np.arange(0, 24)
dow_categories  = np.arange(0, 7)
user_categories = df_lhud_1hora4m.user.unique()

#categorical_features = ['user','hour', 'dow']
categorical_features = ['hour', 'dow']
categorical_transformer = OneHotEncoder(
#    categories = [user_categories, hour_categories, dow_categories]
    categories = [hour_categories, dow_categories]
)

user_feature = ['user']
user_transformer = OrdinalEncoder(categories = [user_categories])
user_transformer_wd = OneHotEncoder(categories = [user_categories])

lhud_preprocessor = ColumnTransformer(
    transformers=[
        ('lhud_num', numeric_transformer, lhud_numeric_features),
        ('lhud_cat', categorical_transformer, categorical_features),
        ('lhud_user', user_transformer_wd, user_feature), #### ALTERADO PARA TESTAR ONEHOTENCODER ("_wd")
    ])

lhud_preprocessor_deep = ColumnTransformer(
    transformers=[
        ('lhud_num', numeric_transformer, lhud_numeric_features),
    ])

lhud_preprocessor_wide = ColumnTransformer(
    transformers=[
        ('lhud_cat', categorical_transformer, categorical_features),
        ('lhud_user_wd', user_transformer_wd, user_feature),
    ])

#Transformaçoes simples, com dados todos juntos
columns = lhud_numeric_features + categorical_features + user_feature
trans_lhud_4m    = lhud_preprocessor.fit_transform(df_lhud_1hora4m[columns])
trans_lhud_test  = lhud_preprocessor.transform(df_lhud_1hora_test[columns])
trans_lhud       = lhud_preprocessor.transform(df_lhud_1hora[columns])
trans_lhud_4m_13 = trans_lhud_4m[:,:13].toarray()
trans_lhud_test_13 =   trans_lhud_test[:,:13].toarray()

#Transformaçoes deep
columns_deep = lhud_numeric_features# + categorical_features + user_feature
trans_lhud_4m_deep    = lhud_preprocessor_deep.fit_transform(df_lhud_1hora4m[columns_deep])
trans_lhud_test_deep       = lhud_preprocessor_deep.transform(df_lhud_1hora_test[columns_deep])

#Transformaçoes wide
columns_wide = categorical_features + user_feature
trans_lhud_4m_wide    = lhud_preprocessor_wide.fit_transform(df_lhud_1hora4m[columns_wide])
trans_lhud_test_wide       = lhud_preprocessor_wide.transform(df_lhud_1hora_test[columns_wide])


In [3]:
type(trans_lhud_4m), trans_lhud_test_13.shape, trans_lhud_4m_13.shape

(scipy.sparse.csr.csr_matrix, (3278931, 13), (3294562, 13))

In [4]:
### Carrega do arquivos e dados do insider1

### Carrega do arquivo
acm2278_test = pd.read_pickle("acm2278_test.pkl")

### Carrega do arquivo
acm2278_test_full = pd.read_pickle("acm2278_test_full.pkl")

user = 'ACM2278'


### Carrega Labels preditos do Snorkel *** Ajustados
labels_pd = pd.read_hdf("labels_pd.hdf",'df')
labels_pd['anom'] = np.where((labels_pd[0]== 1),-1,1)
labels_pd[labels_pd['anom'] == -1].shape, labels_pd[labels_pd['anom'] == 1].shape

### Carrega Labels preditos do Snorkel *** General
labels_g_pd = pd.read_hdf("labels_g_pd.hdf",'df')
labels_g_pd['anom'] = np.where((labels_g_pd[0]== 1),-1,1)
labels_g_pd[labels_g_pd['anom'] == -1].shape, labels_g_pd[labels_g_pd['anom'] == 1].shape

((6229, 2), (3272702, 2))

In [5]:
# PAra não RNN com entradas deep 13 e wide 4031

ds_deep_train = tf.data.Dataset.from_tensor_slices(trans_lhud_4m_deep)
ds_wide_train = tf.data.Dataset.from_tensor_slices(convert_sparse_matrix_to_sparse_tensor(trans_lhud_4m_wide))

ds_train = tf.data.Dataset.zip(({"deep_input": ds_deep_train, "wide_input": ds_wide_train}, ds_deep_train))

ds_deep_test = tf.data.Dataset.from_tensor_slices(trans_lhud_test_deep)
ds_wide_test = tf.data.Dataset.from_tensor_slices(convert_sparse_matrix_to_sparse_tensor(trans_lhud_test_wide))

ds_test = tf.data.Dataset.zip(({"deep_input": ds_deep_test, "wide_input": ds_wide_test}, ds_deep_test))

ds_train = ds_train.batch(1024).cache().prefetch(4)
ds_test = ds_test.batch(1024).cache().prefetch(4)

ds_train.element_spec, ds_test.element_spec

(({'deep_input': TensorSpec(shape=(None, 13), dtype=tf.float64, name=None),
   'wide_input': SparseTensorSpec(TensorShape([None, 4031]), tf.float64)},
  TensorSpec(shape=(None, 13), dtype=tf.float64, name=None)),
 ({'deep_input': TensorSpec(shape=(None, 13), dtype=tf.float64, name=None),
   'wide_input': SparseTensorSpec(TensorShape([None, 4031]), tf.float64)},
  TensorSpec(shape=(None, 13), dtype=tf.float64, name=None)))

In [14]:
def objective(space):
    
    #keras.backend.clear_session()
    
    input_feat = keras.layers.Input(shape=[13], name="deep_input")
    input_time_user = keras.layers.Input(shape=[4031], name="wide_input")
            
    sae = keras.models.Sequential([

        #keras.layers.BatchNormalization(),
    
        keras.layers.Dense(space['first'], 
                           kernel_initializer=space['kernel_init'], 
                           activation=space['activ']),
        
        keras.layers.BatchNormalization(),
       # keras.layers.Activation(space['activ']),
        
        #keras.layers.ActivityRegularization(space['l1_reg']),

        keras.layers.Dense(space['second'],
                           kernel_initializer=space['kernel_init'], 
                           activation=space['activ']),
        
        keras.layers.BatchNormalization(),
        #keras.layers.Activation(space['activ']),
    
        #keras.layers.Dense(space['middle'], 
        #                          kernel_initializer=space['kernel_init']),
        
        #keras.layers.BatchNormalization(),
        #keras.layers.Activation(space['activ']),
    
        keras.layers.Dense(space['second'], 
                           kernel_initializer=space['kernel_init'], 
                           activation=space['activ']),
    
        keras.layers.BatchNormalization(),
        #keras.layers.Activation(space['activ']),
    
        keras.layers.Dense(space['first'],
                           kernel_initializer=space['kernel_init'], 
                           activation=space['activ']),
        
        keras.layers.BatchNormalization(),
        #keras.layers.Activation(space['activ']),
    
    ])
    
    sae = sae(input_feat)
    
    concat = keras.layers.concatenate([input_time_user, sae])
    
    last = keras.models.Sequential()
    last.add(keras.layers.Dense(space['last'], 
                                kernel_initializer=space['kernel_init']))#, 
                                #activation=space['activ']))
    if space['last_batch_layer']:
        last.add(keras.layers.BatchNormalization())
    last.add(keras.layers.Activation("elu"))
    if space['l1reg_layer']:
        last.add(keras.layers.ActivityRegularization(space['l1_reg']))
    if space['dropout_layer']:
        last.add(keras.layers.Dropout(space['dropout']))
    last = last(concat)
    
    output = keras.layers.Dense(13, name="output")(last)
    
    model = keras.models.Model(inputs=[input_feat, input_time_user], outputs=[output])
        
    model.compile(loss=space['loss_ob'], optimizer=space['optimizer'], metrics=space['metrics'])

    history = model.fit(ds_train, epochs=space['epochs'], verbose=0)
    
    y_pred = model.predict(ds_test)
    
    anomalyScores, den_thres = dnn_tf_anomScores(
    y_pred, trans_lhud_test_deep, df_lhud_1hora_test, 98.0)
    
    a,p,r,f,cm,auc_sc = benchmark_snorkel(labels_g_pd,anomalyScores)
    
    global index
    scores_df.loc[index,:]=np.array([index,space,a,p,r,f,auc_sc,np.reshape(cm,(4))],dtype=object)
    
    index=index+1
    
    print(space,r)
    
    return {'loss': -r, 'status': STATUS_OK, 'space': space,
            'model': model, 'f1_score': f,'auc_sc': auc_sc,
           'precision': p, 'recall': r, 'c_matrix': cm}


space ={'first': hp.choice("first", np.arange(8, 13, 1)),
        'second': hp.choice('second', np.arange(3, 8, 1)),
        #'middle': hp.choice('middle', np.arange(20, 60, 10)),
        'last' : hp.choice('last', np.arange(50,500,25)),
        'kernel_init' : hp.choice('kernel_init', ["he_normal"]),
        'activ' : hp.choice('activ', ["elu"]),
        'loss_ob' : hp.choice('loss_ob', ["mae"]),
        'optimizer' : hp.choice('optimizer', ["nadam"]),
        'metrics' : hp.choice('metrics', ["mean_squared_error"]),
        'epochs' : hp.choice('epochs', [10]),
        'l1_reg' : hp.choice('l1_reg', [0.00001, 0.00005, 0.0001,0.0005,0.001,0.005,0.01]),
        'dropout' : hp.choice('dropout', [0.0, 0.05, 0.1, 0.15, 0.2]),
        'last_batch_layer': hp.choice('last_batch_layer', [True, False]),
        'l1reg_layer': hp.choice('l1reg_layer', [True, False]),
        'dropout_layer':hp.choice('dropout_layer', [True, False])
    }

In [15]:
scores_df = pd.DataFrame(columns=["Model","Params","Accuracy","Precision","Recall","F1-Score","ROC-AUC", "CM"])
index=0
#scores_df.info()

trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

{'activ': 'elu', 'dropout': 0.1, 'dropout_layer': True, 'epochs': 10, 'first': 10, 'kernel_init': 'he_normal', 'l1_reg': 0.005, 'l1reg_layer': False, 'last': 425, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 6}
0.6089259913308718                                                                                                                                          
{'activ': 'elu', 'dropout': 0.15, 'dropout_layer': True, 'epochs': 10, 'first': 11, 'kernel_init': 'he_normal', 'l1_reg': 0.01, 'l1reg_layer': True, 'last': 200, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 7}
0.2713116069995184                                                                                                                                          
{'activ': 'elu', 'dropout': 0.2, 'dropout_layer': False, 'epochs': 10, 'first': 12, 'kernel_init': 'he_normal', 'l1_reg': 0.01, 'l1reg_l

0.6379836249799326                                                                                                                                          
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': True, 'epochs': 10, 'first': 10, 'kernel_init': 'he_normal', 'l1_reg': 0.0005, 'l1reg_layer': False, 'last': 250, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 7}
0.24546476159897254                                                                                                                                         
{'activ': 'elu', 'dropout': 0.1, 'dropout_layer': True, 'epochs': 10, 'first': 9, 'kernel_init': 'he_normal', 'l1_reg': 0.001, 'l1reg_layer': True, 'last': 175, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 3}
0.6477765291379034                                                                                                                      

0.6562851179964682                                                                                                                                          
{'activ': 'elu', 'dropout': 0.2, 'dropout_layer': True, 'epochs': 10, 'first': 11, 'kernel_init': 'he_normal', 'l1_reg': 0.01, 'l1reg_layer': True, 'last': 350, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 6}
0.5933536683255739                                                                                                                                          
{'activ': 'elu', 'dropout': 0.15, 'dropout_layer': True, 'epochs': 10, 'first': 11, 'kernel_init': 'he_normal', 'l1_reg': 0.01, 'l1reg_layer': False, 'last': 350, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 6}
0.3615347567827902                                                                                                                       

0.6517900144485471                                                                                                                                          
{'activ': 'elu', 'dropout': 0.0, 'dropout_layer': True, 'epochs': 10, 'first': 8, 'kernel_init': 'he_normal', 'l1_reg': 5e-05, 'l1reg_layer': False, 'last': 325, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 7}
0.4276769947021994                                                                                                                                          
{'activ': 'elu', 'dropout': 0.15, 'dropout_layer': False, 'epochs': 10, 'first': 12, 'kernel_init': 'he_normal', 'l1_reg': 0.0005, 'l1reg_layer': True, 'last': 175, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 7}
0.3692406485792262                                                                                                                    

In [16]:
#2a Rodada
#scores_df.sort_values(['ROC-AUC'], ascending=[False])#.loc[:,('Params','Recall','ROC-AUC')]
scores_df.sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
81,81,"{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...",0.980661,0.06403,0.674105,0.116951,0.897007,"[3211322, 61380, 2030, 4199]"
78,78,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.980661,0.06403,0.674105,0.116951,0.861453,"[3211322, 61380, 2030, 4199]"
66,66,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.980611,0.062764,0.66078,0.114639,0.899047,"[3211239, 61463, 2113, 4116]"
71,71,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.980595,0.062368,0.656606,0.113915,0.900408,"[3211213, 61489, 2139, 4090]"
67,67,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.980595,0.062368,0.656606,0.113915,0.890895,"[3211213, 61489, 2139, 4090]"
...,...,...,...,...,...,...,...,...
1,1,"{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...",0.979131,0.02577,0.271312,0.04707,0.906039,"[3208813, 63889, 4539, 1690]"
98,98,"{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...",0.979083,0.024566,0.258629,0.04487,0.763975,"[3208734, 63968, 4618, 1611]"
89,89,"{'activ': 'elu', 'dropout': 0.0, 'dropout_laye...",0.979049,0.023712,0.249639,0.04331,0.863443,"[3208678, 64024, 4674, 1555]"
38,38,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.979033,0.023315,0.245465,0.042586,0.840589,"[3208652, 64050, 4700, 1529]"


In [17]:
#81	81	{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...	0.980661	0.06403	0.674105	0.116951	0.897007	[3211322, 61380, 2030, 4199]
#{'activ': 'elu', 'dropout': 0.15, 'dropout_layer': True, 'epochs': 10, 'first': 12, 'kernel_init': 'he_normal', 'l1_reg': 0.0001, 'l1reg_layer': True, 'last': 275, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 7}
#0.6741049927757264 
model = trials.results[81]['model']
model.save('best_WiDee_V7.h5')

In [18]:
#78	78	{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...	0.980661	0.06403	0.674105	0.116951	0.861453	[3211322, 61380, 2030, 4199]
#{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': True, 'epochs': 10, 'first': 12, 'kernel_init': 'he_normal', 'l1_reg': 0.0001, 'l1reg_layer': True, 'last': 275, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 6}
#0.6741049927757264 
model = trials.results[78]['model']
model.save('best_WiDee_V8.h5')

In [19]:
scores_df.sort_values(['ROC-AUC'], ascending=[False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
63,63,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.980503,0.060065,0.632365,0.109709,0.944141,"[3211062, 61640, 2290, 3939]"
73,73,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.980592,0.062307,0.655964,0.113803,0.929167,"[3211209, 61493, 2143, 4086]"
49,49,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.980551,0.06127,0.645047,0.11191,0.927612,"[3211141, 61561, 2211, 4018]"
52,52,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.980529,0.060721,0.639268,0.110907,0.927335,"[3211105, 61597, 2247, 3982]"
32,32,"{'activ': 'elu', 'dropout': 0.0, 'dropout_laye...",0.980415,0.057869,0.609247,0.105699,0.925333,"[3210918, 61784, 2434, 3795]"
...,...,...,...,...,...,...,...,...
3,3,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.979371,0.031763,0.334404,0.058016,0.801118,"[3209206, 63496, 4146, 2083]"
98,98,"{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...",0.979083,0.024566,0.258629,0.04487,0.763975,"[3208734, 63968, 4618, 1611]"
94,94,"{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...",0.979503,0.035072,0.369241,0.06406,0.76144,"[3209423, 63279, 3929, 2300]"
15,15,"{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...",0.978983,0.02208,0.232461,0.04033,0.673012,"[3208571, 64131, 4781, 1448]"


In [21]:
#Salvando melhor o melhor ROC-AUC
#63	63	{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...	0.980503	0.060065	0.632365	0.109709	0.944141	[3211062, 61640, 2290, 3939]
#{'activ': 'elu', 'dropout': 0.2, 'dropout_layer': False, 'epochs': 10, 'first': 11, 'kernel_init': 'he_normal', 'l1_reg': 0.01, 'l1reg_layer': False, 'last': 450, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 4}
#0.6323647455450313 
model = trials.results[63]['model']
model.save('best_WiDee_V9-ROC.h5')

In [10]:
#1a Rodada
#scores_df.sort_values(['ROC-AUC'], ascending=[False])#.loc[:,('Params','Recall','ROC-AUC')]
scores_df.sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
46,46,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.980606,0.062642,0.659496,0.114416,0.915661,"[3211231, 61471, 2121, 4108]"
73,73,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.980605,0.062627,0.659335,0.114388,0.912622,"[3211230, 61472, 2122, 4107]"
82,82,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.980605,0.062612,0.659175,0.114361,0.898631,"[3211229, 61473, 2123, 4106]"
67,67,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.980599,0.062474,0.65773,0.11411,0.916814,"[3211220, 61482, 2132, 4097]"
57,57,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.980598,0.062444,0.657409,0.114054,0.915884,"[3211218, 61484, 2134, 4095]"
...,...,...,...,...,...,...,...,...
10,10,"{'activ': 'elu', 'dropout': 0.0, 'dropout_laye...",0.979198,0.027448,0.288971,0.050134,0.840896,"[3208923, 63779, 4429, 1800]"
54,54,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.979155,0.02638,0.277733,0.048184,0.903616,"[3208853, 63849, 4499, 1730]"
8,8,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.979148,0.026197,0.275807,0.04785,0.836434,"[3208841, 63861, 4511, 1718]"
94,94,"{'activ': 'elu', 'dropout': 0.0, 'dropout_laye...",0.979128,0.025694,0.270509,0.046931,0.875916,"[3208808, 63894, 4544, 1685]"


In [11]:
#46	46	{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...	0.980606	0.062642	0.659496	0.114416	0.915661	[3211231, 61471, 2121, 4108]
#{'activ': 'elu', 'dropout': 0.1, 'dropout_layer': True, 'epochs': 10, 'first': 12, 'kernel_init': 'he_normal', 'l1_reg': 0.01, 'l1reg_layer': False, 'last': 325, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 3}
#0.6594959062449831
model = trials.results[46]['model']
model.save('best_WiDee_V4.h5')

In [12]:
#73	73	{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...	0.980605	0.062627	0.659335	0.114388	0.912622	[3211230, 61472, 2122, 4107]
#{'activ': 'elu', 'dropout': 0.2, 'dropout_layer': True, 'epochs': 10, 'first': 11, 'kernel_init': 'he_normal', 'l1_reg': 5e-05, 'l1reg_layer': False, 'last': 325, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 3}
#0.6593353668325574
model = trials.results[73]['model']
model.save('best_WiDee_V5.h5')

In [16]:
scores_df.sort_values(['ROC-AUC'], ascending=[False])

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
16,16,"{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...",0.979893,0.044816,0.471825,0.081857,0.941597,"[3210062, 62640, 3290, 2939]"
9,9,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.980417,0.05793,0.609889,0.10581,0.940684,"[3210922, 61780, 2430, 3799]"
86,86,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.980453,0.05883,0.619361,0.107453,0.934897,"[3210981, 61721, 2371, 3858]"
79,79,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.979819,0.042971,0.4524,0.078487,0.932427,"[3209941, 62761, 3411, 2818]"
1,1,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.980575,0.061864,0.651308,0.112996,0.929472,"[3211180, 61522, 2172, 4057]"
...,...,...,...,...,...,...,...,...
51,51,"{'activ': 'elu', 'dropout': 0.0, 'dropout_laye...",0.979254,0.028851,0.303741,0.052696,0.851181,"[3209015, 63687, 4337, 1892]"
10,10,"{'activ': 'elu', 'dropout': 0.0, 'dropout_laye...",0.979198,0.027448,0.288971,0.050134,0.840896,"[3208923, 63779, 4429, 1800]"
59,59,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.979086,0.024642,0.259432,0.045009,0.838379,"[3208739, 63963, 4613, 1616]"
8,8,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.979148,0.026197,0.275807,0.04785,0.836434,"[3208841, 63861, 4511, 1718]"


In [17]:
#Salvando melhor 2o melhor ROC-AUC por causa do Recall baixo do primeiro
#9	9	{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...	0.980417	0.05793	0.609889	0.10581	0.940684	[3210922, 61780, 2430, 3799]
#{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': True, 'epochs': 10, 'first': 9, 'kernel_init': 'he_normal', 'l1_reg': 0.01, 'l1reg_layer': False, 'last': 350, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam', 'second': 3}
#0.6098892278054262
model = trials.results[9]['model']
model.save('best_WiDee_V6-ROC.h5')