In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# CNNs, CUDA and Hooks
> 

In [2]:
#export

from exp.nb_06 import *

In [3]:
# import torch.nn.functional as F
# import torch.nn as nn
# import torch.optim as optim

## Get Data

In [4]:
x_train,y_train,x_valid,y_valid = get_data()

In [5]:
def normalize_to(train, valid):
    m,s = train.mean(), train.std()
    return normalize(train, m, s), normalize(valid, m,s)

In [6]:
x_train, x_valid = normalize_to(x_train, x_valid)

In [7]:
train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)
nh,bs = 50,512
c = y_train.max().item()+1
loss_func = F.cross_entropy

In [8]:
data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

In [9]:
data.train_ds.x.mean(), data.train_ds.x.std()

(tensor(-6.2598e-06), tensor(1.))

## Basic CNN

We're going to implement a basic CNN using a some 2d conv layers.

### Lambda Class

If we want to make a func and put it into `nn.Sequential` it needs to be a `nn.Module`.

To do this we'll use a `Lambda` class that takes a function, inherits and initializes from `nn.Module` and then calls the function on the forward pass:

In [10]:
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func
    def forward(self, x):
        return self.func(x)

### Get CNN Model

First step is the use the `Lambda` class above to reshape our batches into shape: 

BATCH  x CHANNEL x HEIGHT x WIDTH

In [11]:
def flatten(x):
    return x.view(x.shape[0], -1)

In [12]:
def mnist_resize(x):
    return x.view(-1, 1, 28, 28)

In [13]:
xb, yb = next(iter(data.train_dl))
xb.shape

torch.Size([512, 784])

In [14]:
nb = mnist_resize(xb)
nb.shape

torch.Size([512, 1, 28, 28])

In [15]:
def get_cnn_model(data):
    return nn.Sequential(
        Lambda(mnist_resize),
        nn.Conv2d( 1,  8, 5, padding=2, stride=2), nn.ReLU(), # stride 2 reduces image 14x14
        nn.Conv2d( 8, 16, 3, padding=1, stride=2), nn.ReLU(), # stride 2 reduces image 7x7
        nn.Conv2d(16, 32, 3, padding=1, stride=2), nn.ReLU(), # stride 2 reduces image 4x4
        nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(), # stride 2 reduces image 2x2
        nn.AdaptiveAvgPool2d(1),
        Lambda(flatten),
        nn.Linear(32, data.c)
    )

In [16]:
model = get_cnn_model(data)

In [30]:
opt = optim.SGD(model.parameters(), lr=0.4)
learn = Learner(model, opt, loss_func, data)
run = Runner(cbs=Recorder(), cb_funcs = AvgStatsCallback(accuracy))

In [31]:
%time run.fit(1, learn)

TypeError: 'AvgStatsCallback' object is not callable

Great. It appears to be working but it took > 11 seconds to run.

We'll need to throw it on the GPU to optimize the matrix multiplication.

## CUDA

In [20]:
device = torch.device('cuda', 0)

In [21]:
torch.cuda.set_device(device)

In [22]:
class CudaCallback(Callback):
    def begin_fit(self):
        self.model = self.model.cuda()
    def begin_batch(self):
        self.run.xb, self.run.yb = self.xb.cuda(), self.yb.cuda()

In [23]:
model = get_cnn_model(data)

In [24]:
cbfs = [Recorder(), CudaCallback()]

In [25]:
opt = optim.SGD(model.parameters(), lr=0.4)
learn = Learner(model, opt, loss_func, data)
run = Runner(cbs=AvgStatsCallback(accuracy), cb_funcs = cbfs)

In [26]:
%time run.fit(3, learn)

TypeError: 'AvgStatsCallback' object is not callable

Much better. Now we can do 3x as many epochs in less than a quarter of the time.

## Refactor CNN

We'll want to eventually make deeper models. 