<a href="https://colab.research.google.com/github/jjjzhu-5847/Breast-Cancer-Classification-with-Neural-Network/blob/main/Breast_Cancer_Prediction_Using_Neural_Networks_in_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import the library**

In [118]:
import torch
import torch.nn as nn # nn stands for neural network
import torch.optim as optim # optim for optimizer

from sklearn.datasets import load_breast_cancer # load dataset library
from sklearn.preprocessing import StandardScaler # standardize data
from sklearn.model_selection import train_test_split # split data set

# **Device Configuration**

In [119]:
# check for CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # let pytorch now what architecture we are using
print(f'the architecture we are using is: {device}')

the architecture we are using is: cuda


## **Data preprocessing**

In [120]:
data = load_breast_cancer()
x = data.data
y = data.target

In [121]:
print(x)

[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]


In [122]:
print(y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0
 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1
 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1
 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0
 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1
 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1
 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 1 1 1 0 1 0 1 1 0 

In [123]:
# split the data into trainning data and testing data
# 20% of data will be testing data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

print(x.shape)
print(x_train.shape)
print(x_test.shape)

(569, 30)
(455, 30)
(114, 30)


In [124]:
# standarlize the data
scaler = StandardScaler()

# fit will get the mean and sd of all precidtors
# transform will apply the calculation to normal distubution

# Reason why we use fit for train not test:
# Because Data Leakage, if we use fit on test, then the test set will be standarlized base on known test set information
x_train_std = scaler.fit_transform(x_train)
x_test_std = scaler.transform(x_test)

In [125]:
# converty numPy to PyTorch tensor and put it to GPU device
x_train_std = torch.tensor(x_train_std, dtype=torch.float32).to(device)
x_test_std = torch.tensor(x_test_std, dtype=torch.float32).to(device)
y_train_std = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test_std = torch.tensor(y_test, dtype=torch.float32).to(device)

# **Building Neural Networks**

In [126]:
# define neural network
class NeuralNetwork(nn.Module):

  def __init__(self, input_size, hidden_size, output_size):
    super().__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, output_size)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    out = self.sigmoid(out)

    return out

In [127]:
# define parameters
input_size = x_train.shape[1]
hidden_size = 64
output_size = 1

learning_rate = 0.001
epochs = 100

In [128]:
# initialize the neural network and put it to GPU device
model = NeuralNetwork(input_size, hidden_size, output_size).to(device)

In [129]:
# define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_function = nn.BCELoss()

# **Trainning Neural Network**

In [130]:
for epoch in range(epochs):
  model.train() # set the model to trainning model, so it will optimize the paramaters
  optimizer.zero_grad() # reset the gradient from the last epoch
  outputs = model(x_train_std) # it will do forward papogation, forward() will be use inplictly
  loss = loss_function(outputs, y_train_std.view(-1,1)) # calculate the loss between output and label(need to be reshape[size,1])
  loss.backward() # backward propagation to get gradient
  optimizer.step() # apply optimizer

  # show the accuracy
  # temly close gradient calculation
  with torch.no_grad():
    predicted = outputs.round() # round the outputs, trashhold is 0.5
    accuracy = (predicted == y_train_std.view(-1,1)).float().mean() # compare all boolean value, change it to float, cal mean
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Accuracy: {accuracy.item():.4f}')

Epoch 1/100, Loss: 0.6673, Accuracy: 0.6374
Epoch 2/100, Loss: 0.6484, Accuracy: 0.6967
Epoch 3/100, Loss: 0.6301, Accuracy: 0.7516
Epoch 4/100, Loss: 0.6123, Accuracy: 0.7846
Epoch 5/100, Loss: 0.5950, Accuracy: 0.8088
Epoch 6/100, Loss: 0.5784, Accuracy: 0.8286
Epoch 7/100, Loss: 0.5622, Accuracy: 0.8549
Epoch 8/100, Loss: 0.5465, Accuracy: 0.8659
Epoch 9/100, Loss: 0.5313, Accuracy: 0.8725
Epoch 10/100, Loss: 0.5167, Accuracy: 0.8725
Epoch 11/100, Loss: 0.5025, Accuracy: 0.8813
Epoch 12/100, Loss: 0.4888, Accuracy: 0.8835
Epoch 13/100, Loss: 0.4756, Accuracy: 0.8879
Epoch 14/100, Loss: 0.4628, Accuracy: 0.8879
Epoch 15/100, Loss: 0.4505, Accuracy: 0.8945
Epoch 16/100, Loss: 0.4386, Accuracy: 0.8945
Epoch 17/100, Loss: 0.4271, Accuracy: 0.8945
Epoch 18/100, Loss: 0.4160, Accuracy: 0.8989
Epoch 19/100, Loss: 0.4053, Accuracy: 0.8989
Epoch 20/100, Loss: 0.3950, Accuracy: 0.9011
Epoch 21/100, Loss: 0.3850, Accuracy: 0.9033
Epoch 22/100, Loss: 0.3754, Accuracy: 0.9033
Epoch 23/100, Loss:

# **Evaluate Neural Network**

In [142]:
model.eval()
with torch.no_grad():
  output = model(x_test_std)
  predicted = output.round()
  accuracy = (predicted == y_test_std.view(-1,1)).float().mean()
  print(f'Test Accuracy: {accuracy.item():.4f}')

Test Accuracy: 0.9737


In [143]:
# Given new cases
input_data = (20.57,17.77,132.9,1326,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956,0.1238,0.1866,0.2416,0.186,0.275,0.08902)
#input_data = (11.76,21.6,74.72,427.9,0.08637,0.04966,0.01657,0.01115,0.1495,0.05888,0.4062,1.21,2.635,28.47,0.005857,0.009758,0.01168,0.007445,0.02406,0.001769,12.98,25.72,82.98,516.5,0.1085,0.08615,0.05523,0.03715,0.2433,0.06563)

import numpy as np
input_np = np.asarray(input_data).reshape(1, -1) # change data to numPy and reshape it to (1,30)
input_std = scaler.transform(input_np) # transorm() require two dimentional (num sample, num feature)

input_tensor = torch.tensor(input_std, dtype=torch.float32).view(1, -1).to(device)
with torch.no_grad():
  output = model(input_tensor)
  predicted = output.round()
  if predicted.item() == 0:
    print('The output is: Malignant')
  else:
    print('The output is: Benign')

The output is: Malignant
