<a href="https://colab.research.google.com/github/jonkrohn/pytorch/blob/master/notebooks/shallow_net_in_pytorch_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Shallow Neural Network in PyTorch (DEMO)

_Remember to change your Runtime type to GPU or TPU_

#### Load dependencies

In [0]:
import torch
import torch.nn as nn

from torchvision.datasets import MNIST
from torchvision import transforms

from torchsummary import summary

import matplotlib.pyplot as plt

#### Load data

In [0]:
train = MNIST('data', train=True, transform=transforms.ToTensor(), download=True)
test = MNIST('data', train=False, transform=transforms.ToTensor())
# ...toTensor() scales pixels from [0, 255] to [0, 1]

In [0]:
train.data.shape

In [0]:
train.data[0] # not scaled! 

In [0]:
plt.imshow(train.data[0].numpy().squeeze(), cmap='gray_r')

In [0]:
train.targets[0:100]

In [0]:
train.targets.shape

In [0]:
test.data.shape

In [0]:
test.targets.shape

#### Batch data

In [0]:
train_loader = torch.utils.data.DataLoader(train, batch_size=128, shuffle=True) 
test_loader = torch.utils.data.DataLoader(test, batch_size=128) 
# ...DataLoader() can also sample and run multithreaded over a set number of workers

In [0]:
X_sample, y_sample = iter(train_loader).next()

In [0]:
X_sample.shape

In [0]:
y_sample.shape

In [0]:
y_sample

In [0]:
X_sample[0]

In [0]:
X_flat_sample = X_sample.view(X_sample.shape[0], -1) # view() reshapes Tensor (confusingly)

In [0]:
X_flat_sample.shape

In [0]:
X_flat_sample[0]

#### Design neural network architecture

In [0]:
n_input = 784
n_dense = 64
n_out = 10

In [0]:
model = nn.Sequential(
    nn.Linear(n_input, n_dense), # hidden layer
    nn.Sigmoid(), # activation function
    nn.Linear(n_dense, n_out) # output layer
)

In [0]:
summary(model, (1, n_input))

#### Configure training hyperparameters

In [0]:
cost_fxn = nn.CrossEntropyLoss() # includes softmax activation

In [0]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

#### Train

In [0]:
def accuracy_pct(pred_y, true_y):
  _, prediction = torch.max(pred_y, 1) # returns maximum values, indices; fed tensor, dim to reduce
  correct = (prediction == true_y).sum().item()
  return (correct / true_y.shape[0]) * 100.0

In [0]:
n_batches = len(train_loader)
n_batches

In [0]:
n_epochs = 20 

print('Training for {} epochs. \n'.format(n_epochs))

for epoch in range(n_epochs):
  
  avg_cost = 0.0
  avg_accuracy = 0.0
  
  for i, (X, y) in enumerate(train_loader): # enumerate() provides count of iterations  
    
    # forward propagation:
    X_flat = X.view(X.shape[0], -1)
    y_hat = model(X_flat)
    cost = cost_fxn(y_hat, y)
    avg_cost += cost / n_batches
    
    # backprop and optimization via gradient descent: 
    optimizer.zero_grad() # set gradients to zero; .backward() accumulates them in buffers
    cost.backward()
    optimizer.step()
    
    # calculate accuracy metric:
    accuracy = accuracy_pct(y_hat, y)
    avg_accuracy += accuracy / n_batches
    
    if (i + 1) % 100 == 0:
      print('Step {}'.format(i + 1))
    
  print('Epoch {}/{} complete: Cost: {:.3f}, Accuracy: {:.1f}% \n'
        .format(epoch + 1, n_epochs, avg_cost, avg_accuracy)) 

print('Training complete.')

#### Test model

In [0]:
n_test_batches = len(test_loader)
n_test_batches

In [0]:
model.eval() # disables dropout (and batch norm)

with torch.no_grad(): # disables autograd, reducing memory consumption
  
  avg_test_cost = 0.0
  avg_test_acc = 0.0
  
  for X, y in test_loader:
    
    # make predictions: 
    X_flat = X.view(X.shape[0], -1)
    y_hat = model(X_flat)
    
    # calculate cost: 
    cost = cost_fxn(y_hat, y)
    avg_test_cost += cost / n_test_batches
    
    # calculate accuracy:
    test_accuracy = accuracy_pct(y_hat, y)
    avg_test_acc += test_accuracy / n_test_batches

print('Test cost: {:.3f}, Test accuracy: {:.1f}%'.format(avg_test_cost, avg_test_acc))

# model.train() # 'undoes' model.eval()