In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import sys
sys.path.append("../src/")

In [3]:
#export
from nb_04 import *
torch.set_num_threads(2)

## ConvNet

In [4]:
x_train,y_train,x_valid,y_valid = get_data()

Downloading http://deeplearning.net/data/mnist/mnist.pkl


In [5]:
#export
def normalize_to(train, valid):
    m,s = train.mean(),train.std()
    return normalize(train, m, s), normalize(valid, m, s)

In [6]:
x_train,x_valid = normalize_to(x_train,x_valid)
train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)

In [7]:
x_train.mean(),x_train.std()

(tensor(3.0614e-05), tensor(1.))

In [8]:
nh,bs = 50,512
c = y_train.max().item()+1
loss_func = F.cross_entropy

data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

To refactor layers, it's useful to have a Lambda layer that can take a basic function and convert it to a layer you can put in nn.Sequential.

NB: if you use a Lambda layer with a lambda function, your model won't pickle so you won't be able to save it with PyTorch. So it's best to give a name to the function you're using inside your Lambda (like flatten below).

In [9]:
#export
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x): return self.func(x)

def flatten(x):      return x.view(x.shape[0], -1)

This one takes the flat vector of size bs x 784 and puts it back as a batch of images of 28 by 28 pixels:

In [10]:
def mnist_resize(x): return x.view(-1, 1, 28, 28)

In [11]:
# simple CNN
def get_cnn_model(data):
    return nn.Sequential(
        Lambda(mnist_resize),
        nn.Conv2d( 1, 8, 5, padding=2,stride=2), nn.ReLU(), #14
        nn.Conv2d( 8,16, 3, padding=1,stride=2), nn.ReLU(), # 7
        nn.Conv2d(16,32, 3, padding=1,stride=2), nn.ReLU(), # 4
        nn.Conv2d(32,32, 3, padding=1,stride=2), nn.ReLU(), # 2
        nn.AdaptiveAvgPool2d(1),
        Lambda(flatten),
        nn.Linear(32,data.c)
    )

In [12]:
model = get_cnn_model(data)

In [13]:
cbfs = [Recorder, partial(AvgStatsCallback,accuracy)]

In [14]:
opt = optim.SGD(model.parameters(), lr=0.4)
learn = Learner(model, opt, loss_func, data)
run = Runner(cb_funcs=cbfs)

In [15]:
%time run.fit(1, learn)

train: [2.21573703125, tensor(0.1890)]
valid: [1.2721822265625, tensor(0.5882)]
CPU times: user 6.91 s, sys: 265 ms, total: 7.17 s
Wall time: 5.63 s


## CUDA

This took a long time to run, so it's time to use a GPU. A simple Callback can make sure the model, inputs and targets are all on the same device.

In [16]:
# Somewhat more flexible way
device = torch.device('cuda',0)

In [17]:
class CudaCallback(Callback):
    def __init__(self,device): self.device=device
    def begin_fit(self): self.model.to(self.device)
    def begin_batch(self): self.run.xb,self.run.yb = self.xb.to(self.device),self.yb.to(self.device)

In [18]:
# Somewhat less flexible, but quite convenient
torch.cuda.set_device(device)

In [19]:
#export
class CudaCallback(Callback):
    def begin_fit(self): self.model.cuda()
    def begin_batch(self): self.run.xb,self.run.yb = self.xb.cuda(),self.yb.cuda()

In [20]:
cbfs.append(CudaCallback)

In [21]:
model = get_cnn_model(data)

In [22]:
opt = optim.SGD(model.parameters(), lr=0.4)
learn = Learner(model, opt, loss_func, data)
run = Runner(cb_funcs=cbfs)

In [23]:
%time run.fit(3, learn)

train: [1.93996546875, tensor(0.3208, device='cuda:0')]
valid: [1.04010849609375, tensor(0.6211, device='cuda:0')]
train: [0.4293822265625, tensor(0.8685, device='cuda:0')]
valid: [0.2628916259765625, tensor(0.9229, device='cuda:0')]
train: [0.22102599609375, tensor(0.9330, device='cuda:0')]
valid: [0.13378681640625, tensor(0.9600, device='cuda:0')]
CPU times: user 5.42 s, sys: 1.14 s, total: 6.56 s
Wall time: 27.8 s


## Refactor model
First we can regroup all the conv/relu in a single function:



In [24]:
def conv2d(ni, nf, ks=3, stride=2):
    return nn.Sequential(
        nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride), nn.ReLU())

Another thing is that we can do the mnist resize in a batch transform, that we can do with a Callback.

In [25]:
#export
class BatchTransformXCallback(Callback):
    _order=2
    def __init__(self, tfm): self.tfm = tfm
    def begin_batch(self): self.run.xb = self.tfm(self.xb)

def view_tfm(*size):
    def _inner(x): return x.view(*((-1,)+size))
    return _inner

In [26]:
mnist_view = view_tfm(1,28,28)
cbfs.append(partial(BatchTransformXCallback, mnist_view))

With the AdaptiveAvgPool, this model can now work on any size input:

In [27]:
nfs = [8,16,32,32]

In [28]:
def get_cnn_layers(data, nfs):
    nfs = [1] + nfs
    return [
        conv2d(nfs[i], nfs[i+1], 5 if i==0 else 3)
        for i in range(len(nfs)-1)
    ] + [nn.AdaptiveAvgPool2d(1), Lambda(flatten), nn.Linear(nfs[-1], data.c)]

def get_cnn_model(data, nfs): return nn.Sequential(*get_cnn_layers(data, nfs))

In [29]:
#export
def get_runner(model, data, lr=0.6, cbs=None, opt_func=None, loss_func = F.cross_entropy):
    if opt_func is None: opt_func = optim.SGD
    opt = opt_func(model.parameters(), lr=lr)
    learn = Learner(model, opt, loss_func, data)
    return learn, Runner(cb_funcs=listify(cbs))

In [30]:
model = get_cnn_model(data, nfs)
learn,run = get_runner(model, data, lr=0.4, cbs=cbfs)

In [31]:
model

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (3): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (4): AdaptiveAvgPool2d(output_size=1)
  (5): Lambda()
  (6): Linear(in_features=32, out_features=10, bias=True)
)

In [32]:
run.fit(3, learn)

train: [1.725136875, tensor(0.4070, device='cuda:0')]
valid: [0.466588818359375, tensor(0.8490, device='cuda:0')]
train: [0.32852875, tensor(0.8993, device='cuda:0')]
valid: [0.17563607177734375, tensor(0.9480, device='cuda:0')]
train: [0.17860630859375, tensor(0.9456, device='cuda:0')]
valid: [0.1442845947265625, tensor(0.9557, device='cuda:0')]


## Hooks

### Manual insertion
Let's say we want to do some telemetry, and want the mean and standard deviation of each activations in the model. First we can do it manually like this: