In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
#--export--#
from exports.e_07_Annealing import *

### Normalization

In [3]:
#--export--#
def normalize(x, m, s): return (x-m)/s

def normalize_data(train, valid):
    m,s = train.mean(),train.std()
    return normalize(train, m, s), normalize(valid, m, s)

In [4]:
#--export--#
def MNISTDataWrapper():
    x_train, y_train, x_valid, y_valid = loadMNIST()
    x_train, x_valid = normalize_data(x_train, x_valid)

    train_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_valid, y_valid)

    n_sampl, n_inp = x_train.shape
    n_out = 10
    n_hid = 50

    batch_size = 512

    return DataWrapper(*make_dls(train_ds, valid_ds, batch_size), n_out)

In [5]:
data_w = MNISTDataWrapper()

# Lambda Layer
This will allow us to convert the MNIST data between square and flat formats, which will allow us to use a convnet.

In [6]:
#--export--#
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func
        
    def forward(self, x): return self.func(x)
        
def flatten(x): return x.view(x.shape[0], -1)
def mnist_square(x): return x.view(-1 , 1, 28, 28)

In [7]:
#--export--#
def CNNModel(data_w, lr=0.3):
    n_inp, n_out = data_w.train_ds.x.shape[1], data_w.n_out
    
    model = nn.Sequential(
        Lambda(mnist_square),
        nn.Conv2d( 1, 8, 5, padding=2,stride=2), nn.ReLU(), # 14
        nn.Conv2d( 8,16, 3, padding=2,stride=2), nn.ReLU(), # 7
        nn.Conv2d(16,32, 3, padding=1,stride=2), nn.ReLU(), # 4
        nn.Conv2d(32,32, 3, padding=1,stride=2), nn.ReLU(), # 2
        nn.AdaptiveAvgPool2d(1),
        Lambda(flatten),
        nn.Linear(32,n_out)
    )
    
    return model, optim.SGD(model.parameters(), lr=lr)

In [8]:
model_w = ModelWrapper(*CNNModel(data_w), F.cross_entropy, data_w)
cbs = [AvgStatsCB([acc_f])]
job = DLJob(cbs)

## CNNs are much slower, but more accurate

In [9]:
%timeit job.fit(1, model_w)

train: [2.0539221875, tensor(0.2785)]
valid: [0.8967267578125, tensor(0.7219)]
train: [0.49458296875, tensor(0.8466)]
valid: [0.2300808837890625, tensor(0.9323)]
train: [0.2500250390625, tensor(0.9246)]
valid: [0.16810185546875, tensor(0.9511)]
train: [0.153403525390625, tensor(0.9535)]
valid: [0.14906611328125, tensor(0.9534)]
train: [0.11439572265625, tensor(0.9653)]
valid: [0.1084128662109375, tensor(0.9677)]
train: [0.096074375, tensor(0.9708)]
valid: [0.10416944580078125, tensor(0.9681)]
train: [0.084914716796875, tensor(0.9735)]
valid: [0.08003513793945312, tensor(0.9759)]
train: [0.0735136865234375, tensor(0.9772)]
valid: [0.08957796020507812, tensor(0.9736)]
3.7 s ± 173 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# CUDA Callback

In [15]:
#--export--#
class CudaCB(Callback):
    def __init__(self, device): self.device = device
    def begin_fit(self): self.model.to(self.device)
    def begin_batch(self): self.job.xb, self.job.yb = self.xb.to(self.device), self.yb.to(self.device)

In [16]:
cbs = [AvgStatsCB([acc_f]),
       CudaCB(torch.device('cuda', 0))]
job = DLJob(cbs)

In [None]:
# unfortunately my GPU is too old
# %timeit job.fit(1, model_w)

# Batch Transform Callback

In [9]:
#--export--#
class BatchTransformCB(Callback):
    def __init__(self, tfm): self.tfm = tfm
        
    def begin_batch(self): self.job.xb = self.tfm(self.xb)

def view_tfm(size): 
    def _inner(x) : return x.view(*((-1,)+size))
    return _inner

mnist_view = view_tfm((1, 28, 28))

In [10]:
cbs = [AvgStatsCB([acc_f]),
       BatchTransformCB(mnist_view)]
job = DLJob(cbs)

In [11]:
job.fit(1, model_w)

train: [2.29350875, tensor(0.1319)]
valid: [2.2261279296875, tensor(0.3111)]


In [2]:
!python utils/export_notebook.py 08_LambdaLayers.ipynb

Notebook 08_LambdaLayers.ipynb has been converted to module ./exports/e_08_LambdaLayers.py!
