In [None]:
%matplotlib inline

import torch
import numpy as np
import sklearn.datasets
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

class Object(object): pass

In [None]:
t = Object()

1. Data to train on

2. Model

3. Training algorithm to train the model

4. Validate model on new data

In [None]:
t.raw = sklearn.datasets.fetch_openml("mnist_784")

In [None]:
t.raw.keys()

In [None]:
t.raw.data.shape

In [None]:
28 * 28

In [None]:
t.X = t.raw.data.reshape(70000, 28, 28)

In [None]:
t.raw.target

In [None]:
t.X = t.X.astype(np.float32)
t.y = t.raw.target.astype(np.long)

In [None]:
t.y

In [None]:
def print_image(x, y, n=None):
    if n is None:
        n = np.random.randint(x.shape[0])
    print(y[n])
    _ = plt.imshow(x[n], cmap="gray")

In [None]:
print_image(t.X, t.y, 2)

### Split dataset

In [None]:
t.X_train, t.X_test, t.y_train, t.y_test = \
    train_test_split(t.X, t.y, test_size=0.2)

In [None]:
t.X_train.shape, t.X_test.shape, t.y_train.shape, t.y_test.shape

### Convert to Pytorch

In [None]:
a = torch.Tensor([1, 2, 3])
a

In [None]:
a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
a

In [None]:
t.device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
t.device

In [None]:
a.to(t.device) + 1

### Convert data

In [None]:
t.X_train.shape

In [None]:
np.expand_dims(t.X_train, axis=1).shape

In [None]:
t.X_train_t = torch.from_numpy(np.expand_dims(t.X_train, axis=1)).to(t.device)
t.X_test_t = torch.from_numpy(np.expand_dims(t.X_test, axis=1)).to(t.device)
t.y_train_t = torch.from_numpy(t.y_train).to(t.device)
t.y_test_t = torch.from_numpy(t.y_test).to(t.device)

In [None]:
t.y_train_t

### LeNet

Gradient-Based Learning Applied to Document Recognition<br>
Yann Lecun, Leon Bottou, Yoshua Bengio, Patrick Haffner<br>
Proceedings of the IEEE 86(11):2278 - 2324 · December 1998 with 31,513 Reads<br>
https://www.researchgate.net/publication/2985446_Gradient-Based_Learning_Applied_to_Document_Recognition


```
"Layer C1 is a convolutional layer with six feature maps.
Each unit in each feature map is connected to a 5 5 neighborhood 
in the input. The size of the feature maps is 28 28 which prevents 
connection from the input from falling off the boundary. C1 contains 
156 trainable parameters and 122 304 connections.

Layer S2 is a subsampling layer with six feature maps of
size 14 14. Each unit in each feature map is connected to a
2 2 neighborhood in the corresponding feature map in C1.
The four inputs to a unit in S2 are added, then multiplied by 
a trainable coefficient, and then added to a trainable bias.
The result is passed through a sigmoidal function. 
The 2 2 receptive fields are nonoverlapping, therefore feature maps
in S2 have half the number of rows and column as feature
maps in C1. Layer S2 has 12 trainable parameters and 5880
connections.

Layer C3 is a convolutional layer with 16 feature maps.
Each unit in each feature map is connected to several
5 5 neighborhoods at identical locations in a subset of
S2’s feature maps.

[...]

Layer S4 is a subsampling layer with 16 feature maps of
size 5 5. Each unit in each feature map is connected to a
2 2 neighborhood in the corresponding feature map in C3,
in a similar way as C1 and S2. Layer S4 has 32 trainable
parameters and 2000 connections.

Layer C5 is a convolutional layer with 120 feature maps.
Each unit is connected to a 5 5 neighborhood on all 16
of S4’s feature maps. Here, because the size of S4 is also
5 5, the size of C5’s feature maps is 1 1; this amounts
to a full connection between S4 and C5. C5 is labeled as
a convolutional layer, instead of a fully connected layer,
because if LeNet-5 input were made bigger with everything
else kept constant, the feature map dimension would be
larger than 1 1.

Layer F6 contains 84 units (the reason for this number
comes from the design of the output layer, explained
below) and is fully connected to C5. It has 10 164 trainable
parameters.
"

```

![](lenet-5.png)

In [None]:
from torch import nn

In [None]:
def create_lenet_classic():
    return nn.Sequential(
        # C1, 28 x 28 x 1
        nn.Conv2d(1, 6, kernel_size=5, stride=1),
        nn.ReLU(),
        
        # S2
        nn.MaxPool2d(kernel_size=2, stride=2),
        # S2: 14 x 14 x 6 
        
        # C3: 
        nn.Conv2d(6, 16, kernel_size=5, stride=1),
        nn.ReLU(),
        # C3: 10 x 10
        
        # S4:
        nn.MaxPool2d(kernel_size=2, stride=2),
        # S4: 5 x 5 x 16
        
        # C5:
        nn.Conv2d(16, 120, kernel_size=5, stride=1),
        nn.ReLU(),
        
        nn.Flatten(),
        
        nn.Linear(120, 84),
        nn.ReLU(),
        
        nn.Linear(84, 10),  # Last, how many values to output? 
        nn.LogSoftmax(dim=-1)
    )

### Training Steps

In [None]:
loop = Object()

In [None]:
loop.model = create_lenet_classic().to(t.device)

In [None]:
loop.loss = nn.NLLLoss()

In [None]:
loop.output = loop.model(t.X_train_t)

In [None]:
t.X.shape

In [None]:
if t.X.shape[1] < 32:
    t.X = np.pad(t.X, (
            (0, 0),
            (2, 2),
            (2, 2)),
          mode='constant')

In [None]:
t.X.shape

In [None]:
print_image(t.X, t.y)

In [None]:
t.X_train, t.X_test, t.y_train, t.y_test = \
    train_test_split(t.X, t.y, test_size=0.2)

In [None]:
t.X_train.shape

In [None]:
t.X_train_t = torch.from_numpy(np.expand_dims(t.X_train, axis=1)).to(t.device)
t.X_test_t = torch.from_numpy(np.expand_dims(t.X_test, axis=1)).to(t.device)
t.y_train_t = torch.from_numpy(t.y_train).to(t.device)
t.y_test_t = torch.from_numpy(t.y_test).to(t.device)

In [None]:
t.X_train_t.shape

In [None]:
loop.model = create_lenet_classic().to(t.device)

In [None]:
loop.loss = nn.NLLLoss()

In [None]:
loop.opt = torch.optim.SGD(
    loop.model.parameters(),
    lr=1e-3,
    momentum=0.9, 
    nesterov=True)

In [None]:
loop.outputs = loop.model(t.X_train_t)

In [None]:
loop.loss_value = loop.loss(loop.outputs, t.y_train_t)
loop.loss_value

In [None]:
loop.model.zero_grad()

In [None]:
loop.loss_value.backward()

In [None]:
loop.opt.step()

In [None]:
loop = None

In [None]:
t.loss = torch.nn.NLLLoss()

In [None]:
def accuracy(y_pred_one_hot, y_true):
    y_pred = y_pred_one_hot.max(dim=1)[1]
    return (y_pred == y_true).sum().item() / y_true.shape[0]

In [None]:
def train_batch(net, data, epochs=3):
    optim = torch.optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, nesterov=True)
    batch_size = 16
    batches = int(data.X_train_t.shape[0] / batch_size)
    print_every = batches / 10
    
    for i in range(epochs):
        for b in range(batches):
            start = b * batch_size
            end = start + batch_size
            x_batch = data.X_train_t[start : end]
            y_batch = data.y_train_t[start : end]
        
             # Forward pass: compute predicted y by passing x to the model.
            y_pred = net(x_batch)

            # Compute and print loss.
            loss = data.loss(y_pred, y_batch)

            # Reset gradient
            net.zero_grad()

            # Backward pass: compute gradient of the loss with respect to model
            # parameters
            loss.backward()

            # Calling the step function on an Optimizer makes an update to its
            # parameters
            optim.step()

            if b % print_every == 1:
                print(i, b, 
                      "Loss:", loss.item(), 
                      "accuracy: ", accuracy(net(data.X_train_t), 
                                             data.y_train_t))

In [None]:
%%time

t.nn = create_lenet_classic().to(t.device)
train_batch(t.nn, t, epochs=3)

In [None]:
t.pred_y = t.nn(t.X_test_t)

In [None]:
torch.exp(t.pred_y[0])

In [None]:
print_image(t.X_test, t.y_test, 0)

In [None]:
accuracy(t.nn(t.X_test_t), t.y_test_t)

### Predict for a single image

In [None]:
t.test_n = np.random.randint(t.X_test.shape[0])
t.test_n

In [None]:
t.X_test[t.test_n].shape

In [None]:
plt.imshow(t.X_test[t.test_n], cmap="gray")

In [None]:
t.y_test[t.test_n]

In [None]:
t.test_image = np.expand_dims(np.expand_dims(t.X_test[t.test_n], axis=0), axis=0)
t.test_image.shape

In [None]:
t.test_image_t = torch.from_numpy(t.test_image).to(t.device)

In [None]:
logits = t.nn(t.test_image_t)

In [None]:
logits

In [None]:
torch.exp(logits)

In [None]:
logits.shape

In [None]:
torch.argmax(logits[0]).item()

In [None]:
def predict_image(net, x_t, x, y, n=None):
    if n is None:
        n = np.random.randint(x.shape[0])
    image_t = x_t[n]
    pred_t = net(image_t.unsqueeze(0))
    pred = torch.argmax(torch.exp(pred_t[0])).item()
    print(n, ':', y[n], ' : ',  pred)
    plt.imshow(x[n], cmap="gray")

In [None]:
predict_image(t.nn, t.X_test_t, t.X_test, t.y_test)

In [None]:
def find_wrong_image(net, x, y):
    while True:
        n = np.random.randint(x.shape[0])
        image = np.expand_dims(np.expand_dims(x[n], axis=0), axis=0).astype(np.float32)
        image_t = torch.from_numpy(image)
        pred_t = net(image_t)
        pred = torch.argmax(torch.exp(pred_t[0])).item()
        if pred != y[n]:
            print(n, ':', y[n], ' : ',  pred)
            plt.imshow(x[n], cmap="gray")
            break
    return n

In [None]:
t.wrong_n = find_wrong_image(t.nn.cpu(), t.X_test, t.y_test)

## Homework

1. Wire the LeNet-5 to RedisAI and Twitter. Write a digit on your phone, send to your twitter, recognize, post back the prediction.
<br>
<br>
1. Traffic Signs. 
  1. Re-train the network using traffic signs, http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset. Note: convert images to grayscale for this assignment.
  1. Upload to RedisAI. 
  1. Take a picture of the real trafic sign (when safe to do so), post it to your twitter, check the prediction.
<br>
<br>
1. Adapt LeNet-5 to work with RGB color images, and train on full-color images of traffic signs.