In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Layerwise Sequential Unit Variance (LSUV)
> A initialization technique for deep architectures


<a href="https://arxiv.org/pdf/1511.06422.pdf"> All You need is a Good Init </a>

From the abstract:

    Layer-sequential unit-variance (LSUV) initialization – a simple method for weight initialization for deep net learning – is proposed. The method consists of the two steps. First, pre-initialize weights of each convolution or inner-product layer with orthonormal matrices. Second, proceed from the first to the final layer, normalizing the variance of the output of each layer to be equal to one.


In [2]:
#export
from exp.nb_08 import *

## Data, Model and Runner

In [3]:
x_train, y_train, x_valid, y_valid = get_data()

train_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_valid, y_valid)

nh,bs = 50,512
c = y_train.max().item()+1
loss_func = F.cross_entropy

data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

In [4]:
mnist_view = view_tfm(1,28,28)
callbacks = [
    Recorder,
    partial(AvgStatsCallback, accuracy),
    partial(BatchTransformXCallback, mnist_view),
    CudaCallback
]

nfs = [8,16,32,64,64]

Let's refactor our `conv_layer` function into a `nn.Module` class.

In [None]:
class ConvLayer(nn.Module):
    def __init__(self, ni, nf, ks=3, stride=2, sub=0., **kwargs):
        super().__init__()
        self.conv = nn.Conv2d(ni, nf, ks=ks//2, stride=stride, bias=True)
        self.relu = GeneralRelu(sub=sub, **kwargs)
        
    def forward(self, x):
        return self.relu(self.conv(x))
    
    @property
    def bias(self): return -self.relu.sub
    
    @bias.setter
    def bias(self, v): self.relu.sub = -v
    
    @property
    def weight(self): return self.conv.weight

In [None]:
nb_auto_export()