# Breast Cancer Wisconsin (Diagnostic) Data Set

Features are computed from a digitized image of a fine needle aspirate (FNA) of a breast mass. They describe characteristics of the cell nuclei present in the image. A few of the images can be found at [Web Link]

Separating plane described above was obtained using Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree Construction Via Linear Programming." Proceedings of the 4th Midwest Artificial Intelligence and Cognitive Science Society, pp. 97-101, 1992], a classification method which uses linear programming to construct a decision tree. Relevant features were selected using an exhaustive search in the space of 1-4 features and 1-3 separating planes.

The actual linear program used to obtain the separating plane in the 3-dimensional space is that described in: [K. P. Bennett and O. L. Mangasarian: "Robust Linear Programming Discrimination of Two Linearly Inseparable Sets", Optimization Methods and Software 1, 1992, 23-34].

This database is also available through the UW CS ftp server:
ftp ftp.cs.wisc.edu
cd math-prog/cpo-dataset/machine-learn/WDBC/


Fonte: https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(diagnostic)

Problema de classificação binário e com o dataframe já pronto!

##Iniciando bibliotecas

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
#import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import GridSearchCV
import time
#Dense:rede neural fully conected, todos os neurôneos conectados

## Carregando base de dados

In [2]:
previsores = pd.read_csv('entradas_breast.csv')
classe = pd.read_csv('saidas_breast.csv')

In [3]:
previsores.head()

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave_points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave_points_worst,symmetry_worst,fractal_dimension_worst
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,186.0,275.0,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,243.0,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,173.0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,198.0,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,205.0,0.4,0.1625,0.2364,0.07678


In [4]:
previsores_treinamento, previsores_teste, classe_treinamento,classe_teste = train_test_split(previsores,classe,test_size=0.25)

In [5]:
previsores_treinamento.shape, previsores_teste.shape

((426, 30), (143, 30))

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [7]:
previsores = scaler.fit_transform(previsores)

In [8]:
previsores

array([[-0.28367013, -2.07333501,  1.26993369, ..., -0.21506493,
        -0.32985162, -0.12769577],
       [-0.28260757, -0.35363241,  1.68595471, ...,  4.49527168,
         2.6981086 , -0.12976335],
       [-0.28296999,  0.45618695,  1.56650313, ...,  5.94082456,
        -0.33094131, -0.129863  ],
       ...,
       [-0.28424259,  2.0455738 ,  0.67267578, ..., -0.21819949,
        -0.33247988, -0.13051206],
       [-0.28259521,  2.33645719,  1.98252415, ...,  6.49875725,
        -0.33041852,  8.44441856],
       [-0.28788329,  1.22179204, -1.81438851, ..., -0.22179562,
        -0.33175967, -0.13105248]])

## Primeiro teste da rede neural

In [None]:
classificador = Sequential()

#camada oculta
classificador.add(Dense(units = 16, activation ='relu',
                        kernel_initializer='random_uniform', input_dim = 30)) #camada oculta

#primeiro teste para a quantidade de neurônios 
#(numero de parâmetros + numero de neuronios na camada de saida)/2
#neste caso é: (30 + 1)/2 = 15.5

#20% dos neurônios terão o seu valor zerado
#a cada uptade, os inputs serão zerados aleatóriamente
#isso é feito para prevenir que aconteça overfit
classificador.add(Dropout(0.2))

#mais uma camada de saída
classificador.add(Dense(units = 16, activation ='relu',
                        kernel_initializer='random_uniform')) #camada oculta

classificador.add(Dropout(0.2))

#camada de saída
#é uma probabilidade de ter cancer ou não, logo a função de ativação
#sigmoid que é entre 0 e 1 faz mais sentido

classificador.add(Dense(units=1,activation = 'sigmoid'))

In [None]:
otimizador = keras.optimizers.Adam(lr = 0.001, decay = 0.0001, clipvalue = 0.5)

#lr: learning rate
#decay: 
classificador.compile(optimizer = otimizador, loss = 'binary_crossentropy',
                      metrics = ['binary_accuracy'])

#adam: uma otimização da descida do gradiente estocástico
start=time.time()
classificador.fit(previsores, classe, batch_size = 10, epochs=100, verbose=1)
end=time.time()
print(end-start)

In [None]:
#visualizando os pesos
pesos0 = classificador.layers[0].get_weights()
pesos1 = classificador.layers[1].get_weights()
pesos2 = classificador.layers[2].get_weights()

In [None]:
previsoes = classificador.predict(previsores_teste)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

previsoes = (previsoes > 0.5)
precisao = accuracy_score(classe_teste, previsoes)

matriz = confusion_matrix(classe_teste, previsoes)

In [None]:
matriz, precisao

(array([[49,  7],
        [ 3, 84]]), 0.9300699300699301)

In [None]:
resultado = classificador.evaluate(previsores_teste, classe_teste)



## Tuning of parameters

In [13]:
def criarRede(optimizer, loos, kernel_initializer, activation, neurons_1, neurons_2, neurons_3, drop):
    classificador = Sequential()
    classificador.add(Dense(units = neurons_1, activation = activation, 
                        kernel_initializer = kernel_initializer, input_dim = 30))
    classificador.add(Dropout(drop))
    classificador.add(Dense(units = neurons_2, activation = activation, 
                        kernel_initializer = kernel_initializer))
    classificador.add(Dropout(drop))
    classificador.add(Dense(units = neurons_3, activation = activation, 
                        kernel_initializer = kernel_initializer))
    classificador.add(Dropout(drop))
    classificador.add(Dense(units = 1, activation = 'sigmoid'))
    classificador.compile(optimizer = optimizer, loss = loos,
                      metrics = ['binary_accuracy'])
    return classificador

In [None]:
classificador = KerasClassifier(build_fn = criarRede)

parametros = {'batch_size': [1, 5, 10, 30],
              'epochs': [10, 20, 100],
              'optimizer': ['adam'],
              'loos': ['binary_crossentropy', 'hinge'],
              'kernel_initializer': ['random_uniform', 'normal'],
              'activation': ['relu', 'tanh'],
              'neurons_1': [16, 8, 2],
              'neurons_2': [16, 8, 2],
              'neurons_3': [16, 8, 2],
              'drop':[0.1,0.3]}
grid_search = GridSearchCV(estimator = classificador,
                           param_grid = parametros,
                           scoring = 'accuracy',
                           cv = 5)

In [None]:
grid_search = grid_search.fit(previsores, classe, verbose=0)

In [None]:
melhores_parametros = grid_search.best_params_
melhor_precisao = grid_search.best_score_
melhores_parametros

## Validação Cruzada

In [10]:
def criarRede():
    classificador = Sequential()

    #camada oculta
    classificador.add(Dense(units = 16, activation ='relu',
                            kernel_initializer='normal', input_dim = 30)) #camada oculta

    classificador.add(Dropout(0.2))

    #mais uma camada de saída
    classificador.add(Dense(units = 16, activation ='relu',
                            kernel_initializer='normal')) #camada oculta

    classificador.add(Dropout(0.2))

    #camada de saída
    classificador.add(Dense(units=1,activation = 'sigmoid'))


    otimizador = keras.optimizers.Adam(learning_rate = 0.001, decay = 0.0001, clipvalue = 0.5)
    classificador.compile(optimizer = otimizador, loss = 'binary_crossentropy',
                      metrics = ['binary_accuracy'])


    return classificador

In [None]:
classificador = KerasClassifier(build_fn = criarRede, epochs = 100, batch_size = 10)


start=time.time()
resultados = cross_val_score(estimator=classificador, X=previsores, y=classe, cv =10, scoring = 'accuracy', verbose=0)
end=time.time()
print(end-start)

In [12]:
resultados.mean(), resultados.std()

(0.9542919799498748, 0.027429890512670675)

## Classificar somente um registro

In [None]:
classificador = Sequential()
classificador.add(Dense(units = 8, activation = 'relu', 
                        kernel_initializer = 'normal', input_dim = 30))
classificador.add(Dropout(0.2))
classificador.add(Dense(units = 8, activation = 'relu', 
                        kernel_initializer = 'normal'))
classificador.add(Dropout(0.2))
classificador.add(Dense(units = 1, activation = 'sigmoid'))
classificador.compile(optimizer = 'adam', loss = 'binary_crossentropy',
                      metrics = ['binary_accuracy'])
classificador.fit(previsores, classe, batch_size = 10, epochs = 100)

In [None]:
#chutando valores para o vetor de previsores
novo = np.array([[15.80, 8.34, 118, 900, 0.10, 0.26, 0.08, 0.134, 0.178,
                  0.20, 0.05, 1098, 0.87, 4500, 145.2, 0.005, 0.04, 0.05, 0.015,
                  0.03, 0.007, 23.15, 16.64, 178.5, 2018, 0.14, 0.185,
                  0.84, 158, 0.363]])
previsao = classificador.predict(novo)
previsao = (previsao > 0.5)#o valor 0.5 foi determinado por mim
previsao

array([[ True]])

## Salvar a rede neural

In [None]:
classificador_json = classificador.to_json()
with open('classificador_breast.json', 'w') as json_file:
    json_file.write(classificador_json)
classificador.save_weights('classificador_breast.h5')