### Pytorch Breast Cancer Dataset

In [2]:
from sklearn.datasets import load_breast_cancer

In [3]:
dataset = load_breast_cancer()

In [5]:
import pandas as pd

In [198]:
X = pd.DataFrame(dataset.data, columns = dataset.feature_names)

In [199]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_transformed = scaler.fit_transform(X)

In [196]:
y = pd.Series(dataset.target == 0).astype('int')

In [197]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42, stratify = y)

In [203]:
import torch 
import numpy as np

predictors_train = torch.tensor(np.array(X_transformed), dtype=torch.float)

labels_train = torch.tensor(np.array(y_train), dtype = torch.float).view(-1, 1)


In [204]:
predictors_train

tensor([[ 1.0971, -2.0733,  1.2699,  ...,  2.2961,  2.7506,  1.9370],
        [ 1.8298, -0.3536,  1.6860,  ...,  1.0871, -0.2439,  0.2812],
        [ 1.5799,  0.4562,  1.5665,  ...,  1.9550,  1.1523,  0.2014],
        ...,
        [ 0.7023,  2.0456,  0.6727,  ...,  0.4141, -1.1045, -0.3184],
        [ 1.8383,  2.3365,  1.9825,  ...,  2.2900,  1.9191,  2.2196],
        [-1.8084,  1.2218, -1.8144,  ..., -1.7451, -0.0481, -0.7512]])

In [174]:
predictors_test = torch.tensor(np.array(X_test), dtype=torch.float)

In [175]:
labels_test = torch.tensor(np.array(y_test), dtype = torch.float).view(-1, 1)

In [176]:
train_loader = torch.utils.data.DataLoader(df_tf, batch_size=15, shuffle=True)

### Build Neural Network

In [177]:
import torch.nn as nn
classifier = nn.Sequential(
    nn.Linear(in_features=30, out_features=15),
    nn.ReLU(),
    nn.Linear(15, 15),
    nn.ReLU(),
    nn.Linear(15, 1),
    nn.Sigmoid()
)

In [178]:
criterion = nn.BCELoss()

In [179]:
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001, weight_decay=0.0001)

In [186]:
for epoch in range(20):
  #To store the error:
  run_loss = 0.

  for data in train_loader:
    inputs, labels = data
    optimizer.zero_grad()

    outputs = classifier(inputs) 
    error = criterion(outputs, labels)
    error.backward()
    optimizer.step() 

    run_loss += error.item()
  print('Epoch %3d: loss %.5f' % (epoch+1, run_loss/len(train_loader)))

Epoch   1: loss 0.00160
Epoch   2: loss 0.00153
Epoch   3: loss 0.00144
Epoch   4: loss 0.00143
Epoch   5: loss 0.00141
Epoch   6: loss 0.00135
Epoch   7: loss 0.00134
Epoch   8: loss 0.00137
Epoch   9: loss 0.00133
Epoch  10: loss 0.00122
Epoch  11: loss 0.00124
Epoch  12: loss 0.00120
Epoch  13: loss 0.00115
Epoch  14: loss 0.00108
Epoch  15: loss 0.00112
Epoch  16: loss 0.00116
Epoch  17: loss 0.00096
Epoch  18: loss 0.00101
Epoch  19: loss 0.00103
Epoch  20: loss 0.00091


In [193]:
preds = classifier(predictors_train)

In [189]:
preds.view(-1).long()

tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
        0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
        1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1])

In [190]:
labels_test.view(-1).long()

tensor([0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
        1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1,
        0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
        1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1])

In [195]:
(preds.view(-1).long() == labels_train.long()).float().mean()

tensor(0.5553)

### Resources

[Breast Cancer Pytorch](https://medium.com/analytics-vidhya/practical-ml-part-3-predicting-breast-cancer-with-pytorch-efc469242bfe)

[Github toy dataset](https://gist.github.com/santi-pdp/d0e9002afe74db04aa5bbff6d076e8fe)