In [2]:
## Manipulação de dados
import pandas as pd
import numpy as np

## Bibliotecas utilitárias para preparar pipelines de dados
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import keras.utils

# Pra normalizar os dados
from sklearn.preprocessing import StandardScaler

## Bibliotecas de aprendizado de máquina
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Bibliotecas e métricas
from sklearn.metrics import confusion_matrix,roc_curve, auc, accuracy_score, recall_score, precision_score

## Bibliotecas de apresentação
import matplotlib.pyplot as plt
import seaborn as sns ## Trocar por matplotlib
import plotly.express as px

## Bibliotecas úteis
import itertools
from itertools import cycle
from scipy import interp


## Definindo algumas variáveis

In [4]:
DATA_PATH = "results/dados_ideal_size40.csv"
CLASS_NAMES = [0,1]
UNECESSARY = ['Unnamed: 0']
TARGET = 'class'

## Funções necessárias

In [6]:
def load_file(path):
    try:
        temp_df = pd.read_csv(path)
        print("Arquivo localizado")
        return temp_df
    except:
        raise SystemError("Não foi possível localizar o arquivo")
    
def make_model(output_bias=None, input_shape = 1, output_layers = 1):
    METRICS = [
      keras.metrics.SensitivityAtSpecificity(name='Sen', specificity= 0.5),
      keras.metrics.SpecificityAtSensitivity(name='Spe', sensitivity = 0.5),
      keras.metrics.BinaryAccuracy(name='Acc'),
      keras.metrics.AUC(name='AUC')
    ]

    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    
    model = keras.Sequential([
          keras.layers.Dense( input_shape, activation='relu', input_shape=(input_shape,)),
          keras.layers.Dense( 32, activation='relu'),
          keras.layers.Dropout(0.2),
          keras.layers.Dense( 32, activation='relu'),
          keras.layers.Dropout(0.2),
          #keras.layers.Dense( 16, activation='relu'),
          #keras.layers.Dropout(0.2),
          #keras.layers.Dense( 16, activation='relu', input_shape=(32,)), keras.layers.Dropout(0.1),
          #keras.layers.Dense( 4, activation='relu', input_shape=(16,)), keras.layers.Dropout(0.1),
          keras.layers.Dense( 1 , activation='sigmoid', bias_initializer=output_bias),
    ])

    model.compile(
        optimizer = keras.optimizers.Adam(learning_rate=1e-4), #15e-4
        loss = keras.losses.BinaryCrossentropy(),
        metrics = METRICS
    )

    return model

def SaveResult(path,resultado,classe):
    f = open('resultados.txt','a')
    f.write('\n')
    f.write(str(classe))
    f.write('\n')
    f.write(path)
    f.write('\n')
    f.write(str(resultado))
    f.write("\n \n################################# \n")
    f.close()

def gencsv(resultados):
    f = open('resultados.csv','a')
    for i in range(5):
        f.write(str(resultados[i])+',')
    f.write("\n")
    f.close

In [7]:
raw_df = load_file(path)
raw_df = raw_df.drop([UNECESSARY],axis = 1)
clean_df = raw_df.loc[clean_df['signalClass'].isin(CLASS_NAMES)]
split_by_class = lambda df, rows_target: df.loc[df[TARGET] == rows_target].copy()
norm_df = split_by_class(clean_df, rows_target = 0)
susp_df  = split_by_class(clean_df, rows_target = 1)
print("NORM --------- : ", norm_df.shape)
print("SUSP ---------- : ", susp_df.shape)
neg, pos = np.bincount(clean_df['signalClass'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(total, pos, 100 * pos / total))

NameError: name 'path' is not defined

In [None]:
x_train, y_train, x_test, y_test, x_val, y_val = train_test_split(clean_df, split_size = 0.15)

In [None]:
#Early Stopping:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    verbose=1,
    patience=2500,
    mode='min',
    restore_best_weights=True)

In [None]:
# Executando o Modelo:
BATCH_SIZE = None
num_epochs = 10000
print("Execução do modelo iniciada, aguarde...")
model = make_model(output_bias=None, input_shape = x_train.shape[1], output_layers = 1)
history = model.fit(
    x_train, y_train, 
    batch_size = BATCH_SIZE,
    epochs=num_epochs,
    callbacks=[early_stopping],
    validation_data=(x_val, y_val),
    validation_batch_size = BATCH_SIZE,
    class_weight=class_weight,
    verbose = 0
)

In [None]:
# Valores (validação) para as métricas após os pesos serem restaurados
model.evaluate(x = x_val, y = y_val, batch_size=None, return_dict=False)

# Valores após o Teste
print("Resultado: Teste")
baseline_results = model.evaluate(x_test, y_test,batch_size = BATCH_SIZE,  verbose=1)
SaveResult(DATA_PATH,baseline_results,CLASS_NAMES)