# Projeto 2 - Classificação binária breast cancer - Validação cruzada e dropout

## 1. Importando bibliotecas

In [1]:
import pandas as pd
import numpy as np
import torch.nn as nn
from skorch import NeuralNetBinaryClassifier
import torch
from sklearn.model_selection import cross_val_score

In [2]:
torch.set_default_device("mps")
device = torch.device("mps")

# torch.set_default_device("cpu")
# device = torch.device("cpu")

In [3]:
print(torch.backends.mps.is_available())  # Deve retornar True
print(torch.backends.mps.is_built())  # Deve retornar True

True
True


## 2. Importando Dados

In [4]:
np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x1132cbf10>

In [5]:
previsores = pd.read_csv("data/entradas_breast.csv")
classe = pd.read_csv("data/saidas_breast.csv")

In [6]:
previsores = np.array(previsores, dtype='float32')
classe = np.array(classe, dtype='float32').squeeze(1)

## 3. Classe para a rede neural

In [7]:
class classificador_torch(nn.Module):
    def __init__(self):
        super().__init__()

        # 30 -> 16 -> 16 -> 1
        self.dense0 = nn.Linear(30, 16)
        torch.nn.init.uniform_(self.dense0.weight)
        self.activation0 = nn.ReLU()

        self.dense1 = nn.Linear(16, 16)
        torch.nn.init.uniform_(self.dense1.weight)
        self.activation1 = nn.ReLU()

        self.dense2 = nn.Linear(16, 1)
        torch.nn.init.uniform_(self.dense2.weight)

        self.output = nn.Sigmoid()

    def forward(self, X):
        X = self.dense0(X)
        X = self.activation0(X)

        X = self.dense1(X)
        X = self.activation1(X)

        X = self.dense2(X)

        X = self.output(X)
        return X

## 4. Skorch

In [8]:
classificador_sklearn = NeuralNetBinaryClassifier(module=classificador_torch,
                                                  criterion=torch.nn.BCELoss,
                                                  optimizer=torch.optim.Adam,
                                                  lr=0.001,
                                                  optimizer__weight_decay=0.0001,
                                                  max_epochs=100,
                                                  batch_size=10,
                                                  train_split=False)

## 5. Validação Cruzada

In [9]:
resultados = cross_val_score(classificador_sklearn, previsores, classe, cv = 10, scoring='accuracy')
resultados

  epoch    train_loss     dur
-------  ------------  ------
      1       [36m37.1094[0m  0.1809
      2       37.1094  0.1450
      3       37.1094  0.1447
      4       37.1094  0.1430
      5       37.1094  0.1424
      6       37.1094  0.1456
      7       37.1094  0.1456
      8       37.1094  0.1455
      9       37.1094  0.1560
     10       37.1094  0.1437
     11       37.1094  0.1435
     12       37.1094  0.1450
     13       37.1094  0.1432
     14       37.1094  0.1504
     15       37.1094  0.1464
     16       37.1094  0.1411
     17       37.1094  0.1434
     18       37.1094  0.1445
     19       37.1094  0.1427
     20       37.1094  0.1435
     21       37.1094  0.1447
     22       37.1094  0.1428
     23       37.1094  0.1441
     24       37.1094  0.1444
     25       [36m11.9586[0m  0.1447
     26        [36m0.5009[0m  0.1453
     27        [36m0.4813[0m  0.1457
     28        [36m0.4487[0m  0.1414
     29        [36m0.4293[0m  0.1449
     30        

array([0.85964912, 0.84210526, 0.89473684, 0.63157895, 0.87719298,
       0.85964912, 0.8245614 , 0.92982456, 0.9122807 , 0.85714286])

In [10]:
resultados.shape

(10,)

In [11]:
media = resultados.mean()
media

np.float64(0.8488721804511279)

In [12]:
desvio = resultados.std()
desvio

np.float64(0.07850694671513726)

## 6. Dropout

Em redes neurais, dropout é uma técnica de regularização usada para reduzir o overfitting (sobreajuste). Durante o treinamento, essa técnica desativa aleatoriamente uma porcentagem dos neurônios em cada camada em cada iteração. Isso força a rede a aprender representações mais robustas e não depender excessivamente de neurônios específicos.

In [13]:
class classificador_torch_dropout(nn.Module):
    def __init__(self):
        super().__init__()

        # 30 -> 16 -> 16 -> 1
        self.dense0 = nn.Linear(30, 16)
        torch.nn.init.uniform_(self.dense0.weight)
        self.activation0 = nn.ReLU()

        self.dropout0 = nn.Dropout(0.2)

        self.dense1 = nn.Linear(16, 16)
        torch.nn.init.uniform_(self.dense1.weight)
        self.activation1 = nn.ReLU()

        self.dropout1 = nn.Dropout(0.2)

        self.dense2 = nn.Linear(16, 1)
        torch.nn.init.uniform_(self.dense2.weight)

        self.output = nn.Sigmoid()

    def forward(self, X):
        X = self.dense0(X)
        X = self.activation0(X)

        X = self.droupout0(X)
        
        X = self.dense1(X)
        X = self.activation1(X)

        X = self.droupout1(X)
        
        X = self.dense2(X)

        X = self.output(X)
        return X

In [14]:
classificador_sklearn_dropout = NeuralNetBinaryClassifier(module=classificador_torch_dropout,
                                                  criterion=torch.nn.BCELoss,
                                                  optimizer=torch.optim.Adam,
                                                  lr=0.001,
                                                  optimizer__weight_decay=0.0001,
                                                  max_epochs=100,
                                                  batch_size=10,
                                                  train_split=False)

In [15]:
resultados_dropout = cross_val_score(classificador_sklearn, previsores, classe, cv = 10, scoring='accuracy')
resultados_dropout

  epoch    train_loss     dur
-------  ------------  ------
      1       [36m37.1094[0m  0.1685
      2       37.1094  0.1486
      3       37.1094  0.1474
      4       37.1094  0.1477
      5       37.1094  0.1504
      6       37.1094  0.1454
      7       37.1094  0.1470
      8       37.1094  0.1471
      9       37.1094  0.1454
     10       37.1094  0.1464
     11       37.1094  0.1406
     12       37.1094  0.1465
     13       37.1094  0.1443
     14       37.1094  0.1463
     15       37.1094  0.1464
     16       37.1094  0.1465
     17       37.1094  0.1457
     18       37.1094  0.1377
     19       37.1094  0.1262
     20       37.1094  0.1247
     21       37.1094  0.1256
     22       37.1094  0.1247
     23       [36m12.6967[0m  0.1250
     24        [36m0.5449[0m  0.1251
     25        [36m0.5050[0m  0.1250
     26        [36m0.4778[0m  0.1248
     27        [36m0.4711[0m  0.1223
     28        [36m0.4484[0m  0.1245
     29        [36m0.4336[0m  0.163

array([0.87719298, 0.87719298, 0.85964912, 0.96491228, 0.89473684,
       0.9122807 , 0.85964912, 0.63157895, 0.63157895, 0.85714286])

In [16]:
resultados_dropout.shape

(10,)

In [17]:
media_dropout = resultados_dropout.mean()
media_dropout

np.float64(0.8365914786967418)

In [18]:
desvio_dropout = resultados_dropout.std()
desvio_dropout

np.float64(0.10696676085513505)

## Comparando com e sem Dropout

In [19]:
media, media_dropout

(np.float64(0.8488721804511279), np.float64(0.8365914786967418))

In [20]:
desvio, desvio_dropout

(np.float64(0.07850694671513726), np.float64(0.10696676085513505))