# **MLPs with PyTorch**


## **PyTorch packages:**

*   torch.nn: Define essential components/classes for building neural networks, including linear layers, activation functions, loss functions, etc.
*   torch.nn.Module: Define a neural network by subclassing nn.Module
*   torch.optim: Gradient-based optimization algorithms
*   torch.nn.functional: Contain common operations for buidling neural networks
*   torch.utils.data: Utility classes like datasets and data loaders that make data preprocessing easier
*   torch.autograd: Automatic differentiation for Tensor operations

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

## **Autograd in PyTorch**

In [None]:
x1 = torch.tensor(1, requires_grad=True, dtype=torch.float32)
x2 = torch.tensor(3, requires_grad=True, dtype=torch.float32)
x3 = torch.tensor(2, requires_grad=True, dtype=torch.float32)
x4 = torch.tensor(4, requires_grad=True, dtype=torch.float32)

z1 = x1 * x2
z2 = x3 * x4
o = z1 + z2

Compute gradients

In [None]:
do_dx = torch.autograd.grad(outputs=o, inputs = [x1, x2, x3, x4])
print(do_dx)

Use `backward` for simplicity

In [None]:
x1 = torch.tensor(1, requires_grad=True, dtype=torch.float32)
x2 = torch.tensor(3, requires_grad=True, dtype=torch.float32)
x3 = torch.tensor(2, requires_grad=True, dtype=torch.float32)
x4 = torch.tensor(4, requires_grad=True, dtype=torch.float32)

z1 = x1 * x2
z2 = x3 * x4
o = z1 + z2

o.backward()

In [None]:
print(x1.grad, x2.grad, x3.grad, x4.grad)

## **Defin an artificial neural network**

### nn.Module: Define an arbitrary network

`nn.Module` allows you to define arbitrary network architectures

To use `nn.Module`, there are three steps to follow:

1. Subclass `nn.Module`: name a network class such as `SimpleNetwork`

2. In the constructor `__init__()`, define your network (all the layers) as class attributes

3. In the `forward()` method, 
  *   Define the connectivity of the network
  *   Use the attributes defined in `__init__` as function calls
  *   **DO NOT** create new layers with learnable parameters

After define the network class, instantiate it as an object and use it


In [None]:
# create a child class that inherits the functionality from nn.Module
class SimpleNetwork(nn.Module):
  # called automatically whenever this class is being used
  def __init__(self, input_dim, hidden_dim, output_dim):  
    # inherit all the methods and properties from the parent
    super(SimpleNetwork, self).__init__()

    # define all the layers
    self.layer_1 = nn.Linear(input_dim, hidden_dim)
    self.layer_2 = nn.Linear(hidden_dim, output_dim)

  def forward(self, x):
    x = self.layer_1(x)
    x = F.relu(x)
    x = self.layer_2(x)
    x = F.softmax(x, dim=1)
    return x

Instantiate a simple nerual network

In [None]:
my_model = SimpleNetwork(30, 50, 2)
print(my_model)

### nn.Sequential: Define a simple feed-forward network

For a simple network with just a stack of feed forward layers, use `nn.Sequential`


In [None]:
input_dim = 30
hidden_dim = 50
output_dim = 2

# Define a simple network
my_model = nn.Sequential(
    nn.Linear(input_dim, hidden_dim),
    nn.ReLU(),
    nn.Linear(hidden_dim, output_dim),
    nn.Softmax(dim=1)
)

print(my_model)

In [None]:
class SimpleNetwork2(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim):
    super(SimpleNetwork2, self).__init__()
    self.layers = nn.Sequential(
        nn.Linear(input_dim, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, output_dim),
        nn.Softmax(dim=1)
    )

  def forward(self, x):
    x = self.layers(x)
    return x

Instantiate a simple nerual network

In [None]:
my_model = SimpleNetwork2(30, 50, 2)
print(my_model)

## **Breast Cancer Wisconsin (Diagnostic) Dataset**
*   569 instances (212 Malignant, 357 Benign)
*   30 numerical features (computed from a digitized image of a breast mass)
*   2 classes (Malignant, Benign)


In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [None]:
wisconsin = datasets.load_breast_cancer()
TrainData, TestData, TrainLabel, TestLabel = train_test_split(wisconsin.data, wisconsin.target, test_size=0.3, random_state=0)

TrainData_min = TrainData.min(axis=0)
TrainData_max = TrainData.max(axis=0)

TrainData = (TrainData - TrainData_min) / (TrainData_max - TrainData_min)
TestData = (TestData - TrainData_min) / (TrainData_max - TrainData_min)

In [None]:
tr_data = torch.from_numpy(TrainData).float()
tr_label = torch.from_numpy(TrainLabel)
ts_data = torch.from_numpy(TestData).float()
ts_label = torch.from_numpy(TestLabel)

Set hyperparameters, loss function, and optimzier

In [None]:
learning_rate = 0.1                 # set learning rate
loss_fn = nn.CrossEntropyLoss()     # cross-entropy loss
# set SGD as an optimizer
optimizer = optim.SGD(my_model.parameters(), lr=learning_rate, momentum=0.9)

Train the network

In [None]:
num_epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load model and data to GPU
my_model = my_model.to(device)
tr_data, tr_label = tr_data.to(device), tr_label.to(device)
ts_data, ts_label = ts_data.to(device), ts_label.to(device)

my_model.train()    # set the model to training mode

errors = []
for epoch in range(num_epochs):
    
    optimizer.zero_grad()     # zero the gradients (by default it adds up)

    output = my_model(tr_data)
    loss = loss_fn(output, tr_label)

    loss.backward()           # backpropgate the loss
    optimizer.step()          # update parameters

    errors.append(loss.item())

Visualize training loss

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(10,5))
plt.title('Training Loss')
plt.plot(errors)
plt.show()

Evaluate the model performance

In [None]:
from sklearn import metrics

In [None]:
my_model.eval()       # Set the model to evaluation mode

tr_output = my_model(tr_data)
_, tr_pred = torch.max(tr_output, 1)

ts_output = my_model(ts_data)
_, ts_pred = torch.max(ts_output, 1)

acc_tr = metrics.accuracy_score(tr_label.cpu(), tr_pred.cpu())
acc_ts = metrics.accuracy_score(ts_label.cpu(), ts_pred.cpu())
print("Training Accuracy =", acc_tr)
print("Test Accuracy =", acc_ts)

**Use PyTorch Dataset and DataLoader**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [None]:
wisconsin = datasets.load_breast_cancer()
TrainData, TestData, TrainLabel, TestLabel = train_test_split(wisconsin.data, wisconsin.target, test_size=0.3, random_state=0)

TrainData_min = TrainData.min(axis=0)
TrainData_max = TrainData.max(axis=0)

TrainData = (TrainData - TrainData_min) / (TrainData_max - TrainData_min)
TestData = (TestData - TrainData_min) / (TrainData_max - TrainData_min)

Prepare PyTorch Dataset

In [None]:
tr_dataset = TensorDataset(torch.from_numpy(TrainData).float(), torch.from_numpy(TrainLabel))
ts_dataset = TensorDataset(torch.from_numpy(TestData).float(), torch.from_numpy(TestLabel))

In [None]:
print(tr_dataset[0])

Prepare PyTorch DataLoader

In [None]:
train_loader = DataLoader(dataset=tr_dataset, batch_size=16)
test_loader = DataLoader(dataset=ts_dataset, batch_size=16)

In [None]:
x, y = next(iter(train_loader))

In [None]:
print('Data: ', x)
print('Label: ', y)

Instantiate a simple nerual network

In [None]:
my_model = SimpleNetwork(30, 50, 2)
print(my_model)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

Set hyperparameters, loss function, and optimzier

In [None]:
learning_rate = 0.1
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(my_model.parameters(), lr=learning_rate, momentum=0.9)

Train the network

In [None]:
num_epochs = 5

my_model.to(device)
my_model.train()

errors = []
for epoch in range(num_epochs):
 
  for x, y in train_loader:

    x = x.to(device)
    y = y.to(device)

    optimizer.zero_grad()     # zero the gradients (by default it adds up)

    output = my_model(x)
    loss = loss_fn(output, y)

    loss.backward()           # backpropgate the loss
    optimizer.step()          # update parameters
    errors.append(loss.item())


Visualize training loss

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(10,5))
plt.title('Training Loss')
plt.plot(errors)
plt.show()

Define a function for model training

In [None]:
def train_model(model, data_loader, optimizer, loss_fn, num_epochs, device):
  model.train()

  for epoch in range(num_epochs):
    train_loss = 0.0
    train_corr, train_n = 0, 0
    
    for i, (x, y) in enumerate(data_loader):

      x = x.to(device)
      y = y.to(device)

      optimizer.zero_grad()     # zero the gradients (by default it adds up)

      output = model(x)
      loss = loss_fn(output, y)

      loss.backward()           # backpropgate the loss
      optimizer.step()          # update parameters

      _, pred = torch.max(output, 1)
      train_corr += torch.sum(pred == y).item()
      train_n += y.size(0)

      train_loss += loss.item()

    # monitor loss and accuracy per epoch
    print('Epoch: %d | Loss: %.4f | Train Acc: %.2f%%' \
          %(epoch, train_loss / i, train_corr / train_n * 100))
    
  return my_model

Set hyperparameters, loss function, and optimzier

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

my_model = SimpleNetwork(30, 50, 2)
my_model.to(device)

num_epochs = 5
learning_rate = 0.1
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(my_model.parameters(), lr=learning_rate, momentum=0.9)

Train the network

In [None]:
my_model = train_model(my_model, train_loader, optimizer, loss_fn, num_epochs, device)

Evaluate the model performance

In [None]:
def test_model(model, data_loader):
  model.eval()         # Set the model to evaluation mode

  correct = 0
  n = 0
  with torch.no_grad():   # disable gradient calculation, reduce memory usage 
    for x, y in data_loader:

      x = x.to(device)
      y = y.to(device)

      # zero the gradients
      optimizer.zero_grad()

      output = model(x)
      _, pred = torch.max(output, 1)

      correct += torch.sum(pred == y).item()
      n += y.size(0)

  acc = correct / n

  return acc

In [None]:
acc_tr = test_model(my_model, train_loader)
acc_ts = test_model(my_model, test_loader)

print("Training Accuracy =", acc_tr)
print("Test Accuracy =", acc_ts)