# **Bibliotecas e dependências**

In [None]:
# Importação de bibliotecas utilizadas.
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras

from sklearn.linear_model import Perceptron
from sklearn.model_selection import cross_val_score 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.neural_network import MLPClassifier

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# **Manipulação do conjunto de dados**

In [None]:
# Especificação dos tipos de dados para menor uso de memória.
dtypes = {
        'MachineIdentifier':                                    'category',
        'EngineVersion':                                        'category',
        'AppVersion':                                           'category',
        'AvSigVersion':                                         'category',
        'RtpStateBitfield':                                     'float16',
        'IsSxsPassiveMode':                                     'int8',
        'AVProductStatesIdentifier':                            'float32',
        'AVProductsInstalled':                                  'float16',
        'AVProductsEnabled':                                    'float16',
        'HasTpm':                                               'int8',
        'CountryIdentifier':                                    'int16',
        'CityIdentifier':                                       'float32',
        'OrganizationIdentifier':                               'float16',
        'GeoNameIdentifier':                                    'float16',
        'LocaleEnglishNameIdentifier':                          'int8',
        'Platform':                                             'category',
        'Processor':                                            'category',
        'OsVer':                                                'category',
        'OsBuild':                                              'int16',
        'OsSuite':                                              'int16',
        'OsPlatformSubRelease':                                 'category',
        'OsBuildLab':                                           'category',
        'SkuEdition':                                           'category',
        'IsProtected':                                          'float16',
        'SMode':                                                'float16',
        'IeVerIdentifier':                                      'float16',
        'SmartScreen':                                          'category',
        'Firewall':                                             'float16',
        'Census_MDC2FormFactor':                                'category',
        'Census_DeviceFamily':                                  'category',
        'Census_OEMNameIdentifier':                             'float16',
        'Census_OEMModelIdentifier':                            'float32',
        'Census_ProcessorCoreCount':                            'float16',
        'Census_ProcessorManufacturerIdentifier':               'float16',
        'Census_ProcessorModelIdentifier':                      'float16',
        'Census_PrimaryDiskTotalCapacity':                      'float32',
        'Census_PrimaryDiskTypeName':                           'category',
        'Census_SystemVolumeTotalCapacity':                     'float32',
        'Census_HasOpticalDiskDrive':                           'int8',
        'Census_TotalPhysicalRAM':                              'float32',
        'Census_ChassisTypeName':                               'category',
        'Census_InternalPrimaryDiagonalDisplaySizeInInches':    'float16',
        'Census_InternalPrimaryDisplayResolutionHorizontal':    'float16',
        'Census_InternalPrimaryDisplayResolutionVertical':      'float16',
        'Census_PowerPlatformRoleName':                         'category',
        'Census_OSVersion':                                     'category',
        'Census_OSArchitecture':                                'category',
        'Census_OSBranch':                                      'category',
        'Census_OSBuildNumber':                                 'int16',
        'Census_OSBuildRevision':                               'int32',
        'Census_OSEdition':                                     'category',
        'Census_OSSkuName':                                     'category',
        'Census_OSInstallTypeName':                             'category',
        'Census_OSInstallLanguageIdentifier':                   'float16',
        'Census_OSUILocaleIdentifier':                          'int16',
        'Census_OSWUAutoUpdateOptionsName':                     'category',
        'Census_IsPortableOperatingSystem':                     'int8',
        'Census_GenuineStateName':                              'category',
        'Census_ActivationChannel':                             'category',
        'Census_FlightRing':                                    'category',
        'Census_FirmwareManufacturerIdentifier':                'float16',
        'Census_FirmwareVersionIdentifier':                     'float32',
        'Census_IsSecureBootEnabled':                           'int8',
        'Census_IsVirtualDevice':                               'float16',
        'Census_IsTouchEnabled':                                'int8',
        'Census_IsPenCapable':                                  'int8',
        'Census_IsAlwaysOnAlwaysConnectedCapable':              'float16',
        'Wdft_IsGamer':                                         'float16',
        'Wdft_RegionIdentifier':                                'float16',
        'HasDetections':                                        'int8',
        'Census_MDC2FormFactor_new':                            'int64'
        }

In [None]:
# Carregamento do conjunto de dados
df = pd.read_csv('MMP_Cleaned_Encoded.csv', sep = ',', 
                 index_col= 'MachineIdentifier', dtype = dtypes)

In [None]:
# Seleção dos registros utilizados.
df_size = 200000
df = df.sample(n = df_size)

In [None]:
# Exportação do conjunto de dados utilizado (sample).
df.to_csv('MMP_Sample.csv', index=False)

In [None]:
# Indicação e atribuição das variáveis dependentes e independentes.
X = df.drop(['HasDetections'], axis = 1)
y = df['HasDetections']

In [None]:
# Tratamento de variáveis categóricas para utilização dos modelos.
Xdummies_df = pd.get_dummies(X)
X = Xdummies_df.values

In [None]:
# Divisão do conjunto de dados em subconjuntos de treino e teste.
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.25, 
                                                      train_size = 0.75, 
                                                      random_state = 0)

# **Definição dos modelos**

## **Perceptron**

In [None]:
# Implementação do modelo Perceptron, presente na biblioteca Scikit-Learn.
perceptron = Perceptron(penalty='l1', alpha=0.0001, fit_intercept=True, 
                        max_iter=1000, tol=0.001, shuffle=True, verbose=0,
                        eta0=1.0, n_jobs=None, random_state=0, 
                        early_stopping=False, validation_fraction=0.1, 
                        n_iter_no_change=5, class_weight=None, 
                        warm_start=False)

## **MLPClassifier**

In [None]:
# Implementação do modelo MLPClassifier, presente na biblioteca Scikit-Learn.
mlpc = MLPClassifier(hidden_layer_sizes=(100, 5), activation='tanh', 
                                solver='lbfgs', alpha=0.0001, batch_size='auto', 
                                learning_rate='constant', 
                                learning_rate_init=0.001, power_t=0.5, 
                                max_iter=200, shuffle=True, random_state=None, 
                                tol=0.0001, verbose=False, warm_start=False,
                                momentum=0.9, nesterovs_momentum=True, 
                                early_stopping=False, validation_fraction=0.1,
                                beta_1=0.9, beta_2=0.999, epsilon=1e-08,
                                n_iter_no_change=10, max_fun=15000)

## **Sequential**

In [None]:
# Implementação do modelo Sequential, presente na API Keras de TensorFlow.
sequential = keras.Sequential()
sequential.add(Dense(100, input_dim=X.shape[1], activation='tanh', 
                     kernel_initializer='random_normal'))
sequential.add(Dense(50, activation='tanh', 
                     kernel_initializer='random_normal'))
sequential.add(Dense(100, activation='tanh', 
                     kernel_initializer='random_normal'))
sequential.add(Dense(50, activation='tanh', 
                     kernel_initializer='random_normal'))
sequential.add(Dense(100, activation='tanh', 
                     kernel_initializer='random_normal'))
sequential.add(Dense(1, activation='tanh', 
                     kernel_initializer='random_normal'))
sequential.compile(loss='binary_crossentropy', 
                   optimizer=tensorflow.keras.optimizers.Adam(), 
                   metrics =['accuracy'])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


# **Treinamento e previsões**

In [None]:
# Treinamento e realização das predições para o modelo Perceptron.
perceptron.fit(X_train, y_train)

perceptron_predictions = perceptron.predict(X_valid)
perceptron_rocauc = roc_auc_score(y_valid, perceptron_predictions)

In [None]:
# Treinamento e realização das predições para o modelo MLPClassifier.
mlpc.fit(X_train, y_train)

mlpc_predictions = mlpc.predict(X_valid)
mlpc_roc = roc_auc_score(y_valid, mlpc_predictions)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [None]:
# Treinamento e realização das predições para o modelo Sequential.
sequential.fit(X_train, y_train, validation_data=(X_valid, y_valid), 
               verbose=0, epochs=200)

sequential_predictions = sequential.predict(X_valid)
sequential_roc = roc_auc_score(y_valid, sequential_predictions)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


# **Avaliação e pontuações**

In [None]:
print('\n----- Perceptron (Scikit-Learn) -----')
print('\nPontuação ROC AUC:', perceptron_rocauc)
print('\nMatriz de confusão: \n', 
      confusion_matrix(y_valid, perceptron_predictions))
print('\nRelatório de classificação: \n', 
      classification_report(y_valid, perceptron_predictions))


----- Perceptron (Scikit-Learn) -----

Pontuação ROC AUC: 0.5166280401088992

Matriz de confusão: 
 [[12395 12510]
 [11655 13440]]

Relatório de classificação: 
               precision    recall  f1-score   support

           0       0.52      0.50      0.51     24905
           1       0.52      0.54      0.53     25095

    accuracy                           0.52     50000
   macro avg       0.52      0.52      0.52     50000
weighted avg       0.52      0.52      0.52     50000



In [None]:
print('\n----- MLPClassifier (Scikit-Learn) -----')
print('\nPontuação ROC AUC:', mlpc_roc)
print('\nMatriz de confusão: \n', 
      confusion_matrix(y_valid, mlpc_predictions))
print('\nRelatório de classificação: \n', 
      classification_report(y_valid, mlpc_predictions))


----- MLPClassifier (Scikit-Learn) -----

Pontuação ROC AUC: 0.5074425514704433

Matriz de confusão: 
 [[19995  4910]
 [19774  5321]]

Relatório de classificação: 
               precision    recall  f1-score   support

           0       0.50      0.80      0.62     24905
           1       0.52      0.21      0.30     25095

    accuracy                           0.51     50000
   macro avg       0.51      0.51      0.46     50000
weighted avg       0.51      0.51      0.46     50000



In [None]:
print('\n----- Sequential (TensorFlow) -----')
print('\nPontuação ROC AUC:', sequential_roc)
print('\nMatriz de confusão: \n', 
      confusion_matrix(y_valid, sequential_predictions.round()))
print('\nRelatório de classificação: \n', 
      classification_report(y_valid, sequential_predictions.round()))


----- Sequential (TensorFlow) -----

Pontuação ROC AUC: 0.49801113928085117

Matriz de confusão: 
 [[    0 24905]
 [    0 25095]]

Relatório de classificação: 
               precision    recall  f1-score   support

           0       0.00      0.00      0.00     24905
           1       0.50      1.00      0.67     25095

    accuracy                           0.50     50000
   macro avg       0.25      0.50      0.33     50000
weighted avg       0.25      0.50      0.34     50000



  _warn_prf(average, modifier, msg_start, len(result))
