In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

from ipynb.fs.full.Cert_Aux_Functions2 import *

# Importa a biblioteca pandas 
import pandas as pd

# Importa datetime e timedelta para verificar se há gaps de tempo nos datasets preparados
from datetime import datetime, timedelta

# Importa a biblioteca os
import os
from pathlib import Path

## Bibliotecas sklearn
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector as selector
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder, LabelEncoder
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler, FunctionTransformer
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.impute import SimpleImputer

# Using Kernel PCA
from sklearn.decomposition import PCA, KernelPCA

# Using Gaussian Mixtures
from sklearn.mixture import GaussianMixture

# One Class SVM
# https://scikit-learn.org/stable/auto_examples/svm/plot_oneclass.html
from sklearn import svm
from sklearn import linear_model

#Para as figuras
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

#Importa bibliotecas Numpy
import numpy as np

# Importa as bibliotecas para os cálculos do desvio padrao
from scipy.stats import norm
from scipy.stats import chi2
import statistics

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

from keras import optimizers, Sequential
from keras.models import Model
#from keras.utils import plot_model
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed
from keras.callbacks import ModelCheckpoint, TensorBoard

from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Logon + HTTP + USB + Device
df_lhud_1hora = pd.read_pickle("df_lhud_1hora_file.pkl")
df_lhud_1hora.sort_values('date', ascending=True, inplace = True)
df_lhud_1hora.reset_index(inplace = True, drop=True)

## Separa 4 primeiros meses de dados
df_lhud_1hora4m = df_lhud_1hora[(df_lhud_1hora['date'] <= '2010-05-02')]

train_index = df_lhud_1hora4m.index[-1]
test_index = df_lhud_1hora[(df_lhud_1hora['date'] <= '2010-09-02')].index[-1]


## Conjunto de teste
df_lhud_1hora_test = df_lhud_1hora.iloc[train_index:test_index]

### Transformações

#Nomes das features numericas de acordo com o dataset
lhud_numeric_features = ['logon', 'logoff','down','up','vis',
                     'conn','disc','trm','frm','open',
                     'write','copy','delete']

#Nomes das features categoricas - comum a todos os datasets

#numeric_transformer = StandardScaler() ## removing the mean and scaling to unit variance.
numeric_transformer = MinMaxScaler() ## Varia de 0 a 1
#numeric_transformer = SimpleImputer()

hour_categories = np.arange(0, 24)
dow_categories  = np.arange(0, 7)
user_categories = df_lhud_1hora4m.user.unique()

#categorical_features = ['user','hour', 'dow']
categorical_features = ['hour', 'dow']
categorical_transformer = OneHotEncoder(
#    categories = [user_categories, hour_categories, dow_categories]
    categories = [hour_categories, dow_categories]
)

user_feature = ['user']
user_transformer = OrdinalEncoder(categories = [user_categories])
user_transformer_wd = OneHotEncoder(categories = [user_categories])

lhud_preprocessor = ColumnTransformer(
    transformers=[
        ('lhud_num', numeric_transformer, lhud_numeric_features),
        ('lhud_cat', categorical_transformer, categorical_features),
        ('lhud_user', user_transformer, user_feature),
    ])

lhud_preprocessor_deep = ColumnTransformer(
    transformers=[
        ('lhud_num', numeric_transformer, lhud_numeric_features),
    ])

lhud_preprocessor_wide = ColumnTransformer(
    transformers=[
        ('lhud_cat', categorical_transformer, categorical_features),
        ('lhud_user_wd', user_transformer_wd, user_feature),
    ])

#Transformaçoes simples, com dados todos juntos
columns = lhud_numeric_features + categorical_features + user_feature
trans_lhud_4m    = lhud_preprocessor.fit_transform(df_lhud_1hora4m[columns])
trans_lhud_test  = lhud_preprocessor.transform(df_lhud_1hora_test[columns])
trans_lhud       = lhud_preprocessor.transform(df_lhud_1hora[columns])

#Transformaçoes deep
columns_deep = lhud_numeric_features# + categorical_features + user_feature
trans_lhud_4m_deep    = lhud_preprocessor_deep.fit_transform(df_lhud_1hora4m[columns_deep])
trans_lhud_test_deep       = lhud_preprocessor_deep.transform(df_lhud_1hora_test[columns_deep])

#Transformaçoes wide
columns_wide = categorical_features + user_feature
trans_lhud_4m_wide    = lhud_preprocessor_wide.fit_transform(df_lhud_1hora4m[columns_wide])
trans_lhud_test_wide       = lhud_preprocessor_wide.transform(df_lhud_1hora_test[columns_wide])


In [3]:
### Carrega do arquivos e dados do insider1

### Carrega do arquivo
acm2278_test = pd.read_pickle("acm2278_test.pkl")

### Carrega do arquivo
acm2278_test_full = pd.read_pickle("acm2278_test_full.pkl")

user = 'ACM2278'


### Carrega Labels preditos do Snorkel *** Ajustados
labels_pd = pd.read_hdf("labels_pd.hdf",'df')
labels_pd['anom'] = np.where((labels_pd[0]== 1),-1,1)
labels_pd[labels_pd['anom'] == -1].shape, labels_pd[labels_pd['anom'] == 1].shape

### Carrega Labels preditos do Snorkel *** General
labels_g_pd = pd.read_hdf("labels_g_pd.hdf",'df')
labels_g_pd['anom'] = np.where((labels_g_pd[0]== 1),-1,1)
labels_g_pd[labels_g_pd['anom'] == -1].shape, labels_g_pd[labels_g_pd['anom'] == 1].shape

((6229, 2), (3272702, 2))

In [4]:
#del ds_deep_test
#del ds_wide_test
#del ds_test

#del ds_deep_train
#del ds_wide_train
#del ds_train

n_steps = 1
wind = n_steps
batch_size = 1024

ds_deep_train = tf.data.Dataset.from_tensor_slices(trans_lhud_4m_deep)
ds_wide_train = tf.data.Dataset.from_tensor_slices(convert_sparse_matrix_to_sparse_tensor(trans_lhud_4m_wide))

ds_deep_train = ds_deep_train.window(wind, shift=n_steps, drop_remainder=True
                                    ).flat_map(lambda window: window.batch(wind)
                                    )

ds_wide_train = ds_wide_train.window(wind, shift=n_steps, drop_remainder=True
                                    ).flat_map(lambda window: window.batch(wind)
                                    )

ds_train = tf.data.Dataset.zip(({"deep_input": ds_deep_train, "wide_input": ds_wide_train}, ds_deep_train))

ds_train = ds_train.batch(batch_size).cache().prefetch(1)


ds_deep_test = tf.data.Dataset.from_tensor_slices(trans_lhud_test_deep)
ds_wide_test = tf.data.Dataset.from_tensor_slices(convert_sparse_matrix_to_sparse_tensor(trans_lhud_test_wide))

ds_deep_test = ds_deep_test.window(wind, shift=n_steps, drop_remainder=True
                                    ).flat_map(lambda window: window.batch(wind)
                                    )

ds_wide_test = ds_wide_test.window(wind, shift=n_steps, drop_remainder=True
                                    ).flat_map(lambda window: window.batch(wind)
                                    )


ds_test = tf.data.Dataset.zip(({"deep_input": ds_deep_test, "wide_input": ds_wide_test}, ds_deep_test))

ds_test = ds_test.batch(batch_size).cache().prefetch(1)

ds_train.element_spec, ds_test.element_spec

(({'deep_input': TensorSpec(shape=(None, None, 13), dtype=tf.float64, name=None),
   'wide_input': SparseTensorSpec(TensorShape([None, None, 4031]), tf.float64)},
  TensorSpec(shape=(None, None, 13), dtype=tf.float64, name=None)),
 ({'deep_input': TensorSpec(shape=(None, None, 13), dtype=tf.float64, name=None),
   'wide_input': SparseTensorSpec(TensorShape([None, None, 4031]), tf.float64)},
  TensorSpec(shape=(None, None, 13), dtype=tf.float64, name=None)))

In [5]:
##otimizado
def objective(space):

    input_feat = keras.layers.Input(shape=[None, 13], name="deep_input")
    input_time_user = keras.layers.Input(shape=[None, 4031], name="wide_input")
   
    conv1d_ae = keras.models.Sequential([
    keras.layers.Conv1D(filters=space['filters'], kernel_size=space['kernel_s'], strides=1, padding="same",
                        activation="selu", input_shape=[None, 13]),
    keras.layers.LSTM(space['first'], activation='tanh', return_sequences=True),
    keras.layers.LSTM(space['intermediate'], activation='tanh', return_sequences=False),
    keras.layers.RepeatVector(wind),
    keras.layers.LSTM(space['intermediate'], activation='tanh', return_sequences=True),
    keras.layers.LSTM(space['first'], activation='tanh', return_sequences=True),
    ])

    conv1d_ae = conv1d_ae(input_feat)

    concat = keras.layers.concatenate([input_time_user, conv1d_ae])

    #last = keras.models.Sequential()
    #last.add(keras.layers.Dense(space['last'], kernel_initializer=space['kernel_init']))
    #last.add(keras.layers.BatchNormalization())
    #last.add(keras.layers.Activation("elu"))
    #last.add(keras.layers.Dropout(space['dropout']))
    
    last = keras.models.Sequential()
    last.add(keras.layers.Dense(space['last'], 
                                kernel_initializer=space['kernel_init']))    
    if space['last_batch_layer']:
        last.add(keras.layers.BatchNormalization())
    last.add(keras.layers.Activation("elu"))
    if space['l1reg_layer']:
        last.add(keras.layers.ActivityRegularization(space['l1_reg']))
    if space['dropout_layer']:
        last.add(keras.layers.Dropout(space['dropout']))
    last = last(concat)    
    
    output = keras.layers.Dense(13, name="output")(last)

    model = keras.models.Model(inputs=[input_feat, input_time_user], outputs=[output])

    model.compile(loss=space['loss_ob'], optimizer=space['optimizer'], metrics=space['metrics'])

    history = model.fit(ds_train, epochs=space['epochs'], verbose=0)
    
    y_pred = model.predict(ds_test)
    
    #anomalyScores, den_thres = anomScores_Snorkel(
    anomalyScores, den_thres = dnn_tf_anomScores(
    y_pred, trans_lhud_test_deep, df_lhud_1hora_test, 99.0)
    
    a,p,r,f,cm,auc_sc = benchmark_snorkel(labels_g_pd,anomalyScores)
    
    global index
    scores_df.loc[index,:]=np.array([index,space,a,p,r,f,auc_sc,np.reshape(cm,(4))],dtype=object)
    
    index=index+1
    
    print(space,r)
    
    return {'loss': -r, 'status': STATUS_OK, 'space': space,
            'model': model, 'f1_score': f,'auc_sc': auc_sc,
           'precision': p, 'recall': r, 'c_matrix': cm}

space ={'filters': hp.choice('filters', [10, 11, 12, 13]),
        'kernel_s': hp.choice('kernel_s', [1, 2, 3, 4, 5, 6]),
        'first': hp.choice('first', np.arange(7,14,1)),
        'intermediate': hp.choice('intermediate', np.arange(4, 11, 1)),
        'last' : hp.choice('last', np.arange(100,500,25)),
        'kernel_init' : hp.choice('kernel_init', ["he_normal"]),
        'activ' : hp.choice('activ', ["elu"]),
        'loss_ob' : hp.choice('loss_ob', ["mae"]), #"Huber"
        'optimizer' : hp.choice('optimizer', ["nadam"]),
        'metrics' : hp.choice('metrics', ["mean_absolute_error","mean_squared_error"]),
        'epochs' : hp.choice('epochs', [10]),
        'l1_reg' : hp.choice('l1_reg', [0.00001, 0.00005, 0.0001,0.0005,0.001,0.005,0.01]),
        'dropout' : hp.choice('dropout', [0.0, 0.05, 0.1, 0.15, 0.2]),
        'last_batch_layer': hp.choice('last_batch_layer', [True, False]),
        'l1reg_layer': hp.choice('l1reg_layer', [True, False]),
        'dropout_layer':hp.choice('dropout_layer', [True, False])
    }

In [6]:
#Rodada com Recall ??

#comentadas pra manter os  resultados anteriores no pd.dataframe
#scores_df = pd.DataFrame(columns=["Model","Params","Accuracy","Precision","Recall","F1-Score","ROC-AUC", "CM"])
#index=0
#scores_df.info()

scores_df = pd.DataFrame(columns=["Model","Params","Accuracy","Precision","Recall","F1-Score","ROC-AUC", "CM"])
index=0

trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

{'activ': 'elu', 'dropout': 0.1, 'dropout_layer': False, 'epochs': 10, 'filters': 11, 'first': 12, 'intermediate': 6, 'kernel_init': 'he_normal', 'kernel_s': 4, 'l1_reg': 0.0005, 'l1reg_layer': True, 'last': 200, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.25317065339540856                                                                                                    
{'activ': 'elu', 'dropout': 0.1, 'dropout_layer': True, 'epochs': 10, 'filters': 10, 'first': 12, 'intermediate': 4, 'kernel_init': 'he_normal', 'kernel_s': 4, 'l1_reg': 1e-05, 'l1reg_layer': False, 'last': 275, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.454326537164874                                                                                                      
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': False, 'epochs': 10, 'filters': 11, 'first': 9, 'intermediate': 7, 'kernel_init': 

0.305667041258629                                                                                                      
{'activ': 'elu', 'dropout': 0.15, 'dropout_layer': False, 'epochs': 10, 'filters': 10, 'first': 11, 'intermediate': 10, 'kernel_init': 'he_normal', 'kernel_s': 5, 'l1_reg': 0.01, 'l1reg_layer': False, 'last': 225, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.24321720982501205                                                                                                    
{'activ': 'elu', 'dropout': 0.0, 'dropout_layer': False, 'epochs': 10, 'filters': 12, 'first': 10, 'intermediate': 6, 'kernel_init': 'he_normal', 'kernel_s': 3, 'l1_reg': 0.0005, 'l1reg_layer': False, 'last': 350, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.6166318831273078                                                                                                     
{'activ': 

0.2552576657569433                                                                                                     
{'activ': 'elu', 'dropout': 0.0, 'dropout_layer': False, 'epochs': 10, 'filters': 11, 'first': 10, 'intermediate': 6, 'kernel_init': 'he_normal', 'kernel_s': 2, 'l1_reg': 5e-05, 'l1reg_layer': False, 'last': 475, 'last_batch_layer': True, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.2876866270669449                                                                                                     
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': False, 'epochs': 10, 'filters': 12, 'first': 12, 'intermediate': 4, 'kernel_init': 'he_normal', 'kernel_s': 1, 'l1_reg': 0.0005, 'l1reg_layer': False, 'last': 100, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.30277733183496547                                                                                                    
{'activ': '

0.620966447262803                                                                                                      
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': False, 'epochs': 10, 'filters': 11, 'first': 13, 'intermediate': 9, 'kernel_init': 'he_normal', 'kernel_s': 5, 'l1_reg': 0.0001, 'l1reg_layer': True, 'last': 225, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.39894043987799005                                                                                                    
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': True, 'epochs': 10, 'filters': 13, 'first': 7, 'intermediate': 9, 'kernel_init': 'he_normal', 'kernel_s': 4, 'l1_reg': 0.0001, 'l1reg_layer': True, 'last': 225, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
0.2576657569433296                                                                                                     
{'activ': 'el

0.22491571680847647                                                                                                    
{'activ': 'elu', 'dropout': 0.2, 'dropout_layer': False, 'epochs': 10, 'filters': 12, 'first': 10, 'intermediate': 9, 'kernel_init': 'he_normal', 'kernel_s': 3, 'l1_reg': 0.0001, 'l1reg_layer': False, 'last': 275, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.5872531706533954                                                                                                     
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': True, 'epochs': 10, 'filters': 12, 'first': 13, 'intermediate': 10, 'kernel_init': 'he_normal', 'kernel_s': 5, 'l1_reg': 0.001, 'l1reg_layer': True, 'last': 375, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
0.5716808476480976                                                                                                     
{'activ': 'e

In [22]:
#scores_df.sort_values(['ROC-AUC'], ascending=[False])#.loc[:,('Params','Recall','ROC-AUC')]
scores_df.sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
94,94,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.990467,0.118329,0.622893,0.198877,0.900966,"[3243792, 28910, 2349, 3880]"
69,69,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.990456,0.11781,0.620164,0.198006,0.910598,"[3243775, 28927, 2366, 3863]"
7,7,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.990434,0.116682,0.614224,0.19611,0.930548,"[3243738, 28964, 2403, 3826]"
87,87,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.990411,0.115554,0.608284,0.194213,0.9292,"[3243701, 29001, 2440, 3789]"
84,84,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.990394,0.114669,0.603628,0.192727,0.926184,"[3243672, 29030, 2469, 3760]"
...,...,...,...,...,...,...,...,...
22,22,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.988962,0.043092,0.226842,0.072426,0.810787,"[3241325, 31377, 4816, 1413]"
31,31,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.988932,0.041598,0.218976,0.069915,0.869472,"[3241276, 31426, 4865, 1364]"
74,74,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.988931,0.041537,0.218655,0.069812,0.806844,"[3241274, 31428, 4867, 1362]"
36,36,"{'activ': 'elu', 'dropout': 0.0, 'dropout_laye...",0.988916,0.040775,0.214641,0.068531,0.863259,"[3241249, 31453, 4892, 1337]"


In [7]:
#Rodada com Recall 68,64%

#comentadas pra manter os  resultados anteriores no pd.dataframe
#scores_df = pd.DataFrame(columns=["Model","Params","Accuracy","Precision","Recall","F1-Score","ROC-AUC", "CM"])
#index=0
#scores_df.info()

scores_df = pd.DataFrame(columns=["Model","Params","Accuracy","Precision","Recall","F1-Score","ROC-AUC", "CM"])
index=0

trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

{'activ': 'elu', 'epochs': 10, 'filters': 13, 'first': 9, 'intermediate': 10, 'kernel_init': 'he_normal', 'kernel_s': 2, 'last': 325, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'adam'}
0.5827580671054744                                                                                                     
{'activ': 'elu', 'epochs': 10, 'filters': 10, 'first': 12, 'intermediate': 8, 'kernel_init': 'he_normal', 'kernel_s': 1, 'last': 250, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
0.3613742173703644                                                                                                     
{'activ': 'elu', 'epochs': 10, 'filters': 12, 'first': 13, 'intermediate': 5, 'kernel_init': 'he_normal', 'kernel_s': 1, 'last': 225, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'adam'}
0.5861293947664151                                                                                                     
{'activ': 'elu', 'e

{'activ': 'elu', 'epochs': 10, 'filters': 13, 'first': 12, 'intermediate': 7, 'kernel_init': 'he_normal', 'kernel_s': 3, 'last': 400, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.38914753572001926                                                                                                    
{'activ': 'elu', 'epochs': 10, 'filters': 10, 'first': 10, 'intermediate': 10, 'kernel_init': 'he_normal', 'kernel_s': 5, 'last': 175, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'adam'}
0.6195215925509713                                                                                                     
{'activ': 'elu', 'epochs': 10, 'filters': 11, 'first': 11, 'intermediate': 6, 'kernel_init': 'he_normal', 'kernel_s': 2, 'last': 325, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.3718092791780382                                                                                                     
{'activ': 'elu', 

100%|██████████████████████████████████████████| 100/100 [8:32:21<00:00, 307.41s/trial, best loss: -0.6864665275325093]


In [9]:
#scores_df.sort_values(['ROC-AUC'], ascending=[False])#.loc[:,('Params','Recall','ROC-AUC')]
scores_df.sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
73,73,"{'activ': 'elu', 'epochs': 10, 'filters': 10, ...",0.980708,0.065204,0.686467,0.119095,0.939151,"[3211399, 61303, 1953, 4276]"
23,23,"{'activ': 'elu', 'epochs': 10, 'filters': 10, ...",0.980705,0.065112,0.685503,0.118928,0.92963,"[3211393, 61309, 1959, 4270]"
20,20,"{'activ': 'elu', 'epochs': 10, 'filters': 10, ...",0.980699,0.064975,0.684058,0.118678,0.937634,"[3211384, 61318, 1968, 4261]"
43,43,"{'activ': 'elu', 'epochs': 10, 'filters': 11, ...",0.980694,0.064838,0.682614,0.118427,0.935471,"[3211375, 61327, 1977, 4252]"
38,38,"{'activ': 'elu', 'epochs': 10, 'filters': 10, ...",0.980692,0.064807,0.682293,0.118371,0.937759,"[3211373, 61329, 1979, 4250]"
...,...,...,...,...,...,...,...,...
97,97,"{'activ': 'elu', 'epochs': 10, 'filters': 13, ...",0.979416,0.032892,0.346284,0.060077,0.89085,"[3209280, 63422, 4072, 2157]"
21,21,"{'activ': 'elu', 'epochs': 10, 'filters': 10, ...",0.979411,0.03277,0.344999,0.059854,0.883029,"[3209272, 63430, 4080, 2149]"
8,8,"{'activ': 'elu', 'epochs': 10, 'filters': 13, ...",0.979387,0.03216,0.338578,0.05874,0.892773,"[3209232, 63470, 4120, 2109]"
26,26,"{'activ': 'elu', 'epochs': 10, 'filters': 12, ...",0.979384,0.032083,0.337775,0.058601,0.873759,"[3209227, 63475, 4125, 2104]"


In [22]:
#Rodada com Recall 68,61%

#comentadas pra manter os  resultados anteriores no pd.dataframe
#scores_df = pd.DataFrame(columns=["Model","Params","Accuracy","Precision","Recall","F1-Score","ROC-AUC", "CM"])
#index=0
#scores_df.info()

trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

{'activ': 'elu', 'epochs': 10, 'filters': 9, 'first': 12, 'intermediate': 4, 'kernel_init': 'he_normal', 'kernel_s': 1, 'last': 350, 'loss_ob': 'mse', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.34997591908813613                                                                                                                                         
{'activ': 'elu', 'epochs': 10, 'filters': 12, 'first': 13, 'intermediate': 6, 'kernel_init': 'he_normal', 'kernel_s': 5, 'last': 250, 'loss_ob': 'mse', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
0.301814095360411                                                                                                                                           
{'activ': 'elu', 'epochs': 10, 'filters': 8, 'first': 8, 'intermediate': 10, 'kernel_init': 'he_normal', 'kernel_s': 6, 'last': 325, 'loss_ob': 'mse', 'metrics': 'mean_absolute_error', 'optimizer': 'adam'}
0.38577620805907853                                               

0.34997591908813613                                                                                                                                         
{'activ': 'elu', 'epochs': 10, 'filters': 12, 'first': 8, 'intermediate': 7, 'kernel_init': 'he_normal', 'kernel_s': 3, 'last': 275, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.3618558356076417                                                                                                                                          
{'activ': 'elu', 'epochs': 10, 'filters': 8, 'first': 8, 'intermediate': 5, 'kernel_init': 'he_normal', 'kernel_s': 6, 'last': 200, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.35928720500882966                                                                                                                                         
{'activ': 'elu', 'epochs': 10, 'filters': 12, 'first': 9, 'intermediate': 6, 'kernel_init': 'he_normal', 'kernel_s':

0.48563172258789533                                                                                                                                         
{'activ': 'elu', 'epochs': 10, 'filters': 12, 'first': 9, 'intermediate': 7, 'kernel_init': 'he_normal', 'kernel_s': 6, 'last': 175, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.3539894043987799                                                                                                                                          
{'activ': 'elu', 'epochs': 10, 'filters': 12, 'first': 9, 'intermediate': 7, 'kernel_init': 'he_normal', 'kernel_s': 6, 'last': 475, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.6832557392839942                                                                                                                                          
{'activ': 'elu', 'epochs': 10, 'filters': 12, 'first': 9, 'intermediate': 7, 'kernel_init': 'he_normal', 'kernel_s'

In [24]:
#scores_df.sort_values(['ROC-AUC'], ascending=[False])#.loc[:,('Params','Recall','ROC-AUC')]
scores_df[100:].sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
176,176,"{'activ': 'elu', 'epochs': 10, 'filters': 12, ...",0.980707,0.065173,0.686145,0.11904,0.926301,"[3211397, 61305, 1955, 4274]"
108,108,"{'activ': 'elu', 'epochs': 10, 'filters': 8, '...",0.980697,0.064914,0.683416,0.118566,0.934261,"[3211380, 61322, 1972, 4257]"
169,169,"{'activ': 'elu', 'epochs': 10, 'filters': 12, ...",0.980696,0.064899,0.683256,0.118538,0.928241,"[3211379, 61323, 1973, 4256]"
136,136,"{'activ': 'elu', 'epochs': 10, 'filters': 8, '...",0.980696,0.064884,0.683095,0.11851,0.924981,"[3211378, 61324, 1974, 4255]"
177,177,"{'activ': 'elu', 'epochs': 10, 'filters': 12, ...",0.980694,0.064853,0.682774,0.118455,0.93863,"[3211376, 61326, 1976, 4253]"
...,...,...,...,...,...,...,...,...
154,154,"{'activ': 'elu', 'epochs': 10, 'filters': 13, ...",0.979104,0.025099,0.264248,0.045844,0.776766,"[3208769, 63933, 4583, 1646]"
144,144,"{'activ': 'elu', 'epochs': 10, 'filters': 13, ...",0.979085,0.024627,0.259271,0.044981,0.817335,"[3208738, 63964, 4614, 1615]"
105,105,"{'activ': 'elu', 'epochs': 10, 'filters': 8, '...",0.979068,0.0242,0.254776,0.044201,0.767131,"[3208710, 63992, 4642, 1587]"
143,143,"{'activ': 'elu', 'epochs': 10, 'filters': 8, '...",0.978287,0.004666,0.049125,0.008523,0.678384,"[3207429, 65273, 5923, 306]"


In [12]:
#Rodada com Recall 68,87%

scores_df = pd.DataFrame(columns=["Model","Params","Accuracy","Precision","Recall","F1-Score","ROC-AUC", "CM"])
index=0
#scores_df.info()

trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': True, 'epochs': 10, 'filters': 12, 'first': 11, 'intermediate': 8, 'kernel_init': 'he_normal', 'kernel_s': 2, 'l1_reg': 0.0005, 'l1reg_layer': False, 'last': 250, 'last_batch_layer': False, 'loss_ob': 'Huber', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.3116069995183818                                                                                                                                          
{'activ': 'elu', 'dropout': 0.15, 'dropout_layer': True, 'epochs': 10, 'filters': 9, 'first': 10, 'intermediate': 8, 'kernel_init': 'he_normal', 'kernel_s': 6, 'l1_reg': 5e-05, 'l1reg_layer': True, 'last': 300, 'last_batch_layer': True, 'loss_ob': 'Huber', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.27339861936105314                                                                                                                                         
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': True,

0.4360250441483384                                                                                                                                          
{'activ': 'elu', 'dropout': 0.0, 'dropout_layer': True, 'epochs': 10, 'filters': 13, 'first': 13, 'intermediate': 4, 'kernel_init': 'he_normal', 'kernel_s': 6, 'l1_reg': 0.01, 'l1reg_layer': False, 'last': 350, 'last_batch_layer': False, 'loss_ob': 'Huber', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.37919409214962274                                                                                                                                         
{'activ': 'elu', 'dropout': 0.1, 'dropout_layer': True, 'epochs': 10, 'filters': 12, 'first': 9, 'intermediate': 10, 'kernel_init': 'he_normal', 'kernel_s': 3, 'l1_reg': 0.0005, 'l1reg_layer': False, 'last': 125, 'last_batch_layer': True, 'loss_ob': 'Huber', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.2512441804462996                                       

0.4753572001926473                                                                                                                                          
{'activ': 'elu', 'dropout': 0.0, 'dropout_layer': False, 'epochs': 10, 'filters': 11, 'first': 11, 'intermediate': 9, 'kernel_init': 'he_normal', 'kernel_s': 2, 'l1_reg': 0.01, 'l1reg_layer': True, 'last': 200, 'last_batch_layer': False, 'loss_ob': 'Huber', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
0.3467651308396211                                                                                                                                          
{'activ': 'elu', 'dropout': 0.1, 'dropout_layer': False, 'epochs': 10, 'filters': 13, 'first': 8, 'intermediate': 4, 'kernel_init': 'he_normal', 'kernel_s': 6, 'l1_reg': 0.001, 'l1reg_layer': False, 'last': 400, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
0.6845400545834002                                           

0.27307754053620165                                                                                                                                         
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': False, 'epochs': 10, 'filters': 10, 'first': 10, 'intermediate': 10, 'kernel_init': 'he_normal', 'kernel_s': 1, 'l1_reg': 1e-05, 'l1reg_layer': False, 'last': 125, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
0.3750200674265532                                                                                                                                          
{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': False, 'epochs': 10, 'filters': 10, 'first': 11, 'intermediate': 6, 'kernel_init': 'he_normal', 'kernel_s': 1, 'l1_reg': 0.01, 'l1reg_layer': False, 'last': 450, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
0.42864023117675387                                      

In [13]:
#scores_df.sort_values(['ROC-AUC'], ascending=[False])#.loc[:,('Params','Recall','ROC-AUC')]
scores_df.sort_values(['Recall','ROC-AUC'], ascending=[False,False])#.loc[:,('Params','Recall','ROC-AUC')]

Unnamed: 0,Model,Params,Accuracy,Precision,Recall,F1-Score,ROC-AUC,CM
83,83,"{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...",0.980717,0.065417,0.688714,0.119485,0.940494,"[3211413, 61289, 1939, 4290]"
54,54,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.980701,0.065021,0.68454,0.118761,0.934148,"[3211387, 61315, 1965, 4264]"
74,74,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.980691,0.064762,0.681811,0.118288,0.935358,"[3211370, 61332, 1982, 4247]"
95,95,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.980689,0.064716,0.681329,0.118204,0.926758,"[3211367, 61335, 1985, 4244]"
66,66,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.980688,0.064685,0.681008,0.118148,0.929436,"[3211365, 61337, 1987, 4242]"
...,...,...,...,...,...,...,...,...
11,11,"{'activ': 'elu', 'dropout': 0.0, 'dropout_laye...",0.979118,0.025435,0.26778,0.046457,0.735527,"[3208791, 63911, 4561, 1668]"
78,78,"{'activ': 'elu', 'dropout': 0.15, 'dropout_lay...",0.979111,0.025267,0.266014,0.046151,0.859202,"[3208780, 63922, 4572, 1657]"
50,50,"{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...",0.979068,0.0242,0.254776,0.044201,0.623463,"[3208710, 63992, 4642, 1587]"
19,19,"{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...",0.979055,0.023864,0.251244,0.043588,0.798334,"[3208688, 64014, 4664, 1565]"


In [14]:
#83	83	{'activ': 'elu', 'dropout': 0.05, 'dropout_lay...	0.980717	0.065417	0.688714	0.119485	0.940494	[3211413, 61289, 1939, 4290]
#{'activ': 'elu', 'dropout': 0.05, 'dropout_layer': False, 'epochs': 10, 'filters': 7, 'first': 11, 'intermediate': 7, 'kernel_init': 'he_normal', 'kernel_s': 1, 'l1_reg': 1e-05, 'l1reg_layer': False, 'last': 125, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
#.6887140793064698 
model = trials.results[83]['model']
model.save('best_WiDee_Conv_LSTM_v4.h5')

In [15]:
#54	54	{'activ': 'elu', 'dropout': 0.1, 'dropout_laye...	0.980701	0.065021	0.68454	0.118761	0.934148	[3211387, 61315, 1965, 4264]
#{'activ': 'elu', 'dropout': 0.1, 'dropout_layer': False, 'epochs': 10, 'filters': 13, 'first': 8, 'intermediate': 4, 'kernel_init': 'he_normal', 'kernel_s': 6, 'l1_reg': 0.001, 'l1reg_layer': False, 'last': 400, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_squared_error', 'optimizer': 'nadam'}
#0.6845400545834002
model = trials.results[54]['model']
model.save('best_WiDee_Conv_LSTM_v5.h5')

In [16]:
#74	74	{'activ': 'elu', 'dropout': 0.2, 'dropout_laye...	0.980691	0.064762	0.681811	0.118288	0.935358	[3211370, 61332, 1982, 4247]
#{'activ': 'elu', 'dropout': 0.2, 'dropout_layer': False, 'epochs': 10, 'filters': 10, 'first': 11, 'intermediate': 4, 'kernel_init': 'he_normal', 'kernel_s': 6, 'l1_reg': 1e-05, 'l1reg_layer': False, 'last': 400, 'last_batch_layer': False, 'loss_ob': 'mae', 'metrics': 'mean_absolute_error', 'optimizer': 'nadam'}
#0.6818108845721624 
model = trials.results[74]['model']
model.save('best_WiDee_Conv_LSTM_v6.h5')