# Dense (Fully Connected) Neural Network

In [1]:
import exports.e_01_testing as tst
import exports.e_02_MNISTLoader as ldr

import math

import torch

## Normalization and Initialization

In [2]:
def normalize(x, m, s):
    """normalize x, with m = mean, s = std dev """
    return (x - m)/s

In [3]:
x_train, y_train, x_valid, y_valid = ldr.loadMNIST()

x_train = normalize(x_train, x_train.mean(), x_train.std())
# NB: Use training, not validation mean for validation set
# (model parameters are sensitive to the normalization parameters - mean and stddev)
x_valid = normalize(x_valid, x_train.mean(), x_train.std())

In [4]:
n_samples, n_inputs = x_train.shape
n_outputs = 1
n_neurons = 50

# simplified Kaiming init / He init
w1 = torch.randn(n_inputs, n_neurons)/math.sqrt(n_inputs)
b1 = torch.zeros(n_neurons)
w2 = torch.randn(n_neurons, n_outputs) / math.sqrt(n_neurons)
b2 = torch.zeros(n_outputs)

## Modules
Layers (linear, activation and loss layers) will be implemented as subclasses of an abstract class `Module`.

The final layer will always be a loss layer to make a backwards pass possible. This is because loss layer gradients do not depend on output gradients fed back to the layer. Hence `(value).grad` means the gradient of the loss wrt value.

In [5]:
class Module():
    """ Abstract class that is capable of a forward and backward pass
        on some inputs to produce and output and accumulate gradients. """
    
    def __call__(self, *args):
        self.args = args
        self.out  = self.forward(*args)
        return self.out
    
    def forward(self): raise Exception('Not Implemented')
    def backward(self): raise Exception('Not Implemented')

class Linear(Module):
    """ Linear layer; out = weights.in + bias """
    
    def __init__(self, weights, bias):
        self.weights, self.bias = weights, bias
    
    def forward(self, inp):
        return inp@self.weights + self.bias
    
    def backward(self, inp):
        inp.grd = self.out.grd @ self.weights.t()
        self.weights.grd = inp.t() @ self.out.grd
        self.bias.grd = self.out.grd.sum()
        
class ReLU(Module):
    """ Rectified linear unit activation layer. """
    
    def forward(self, inp):
        return inp.clamp_min(0.)
    
    def backward(self, inp):
        inp.grd = (inp > 0).float() * self.out.grd
        
class MSE(Module):
    """ Mean squared error. """
    
    def forward(self, inp, labels):
        return (inp.squeeze() - labels).pow(2).mean()
    
    def backward(self, inp, labels):
        inp.grd = (inp.squeeze() - labels).unsqueeze(-1) / labels.shape[0]

## A Forward and Backward Pass

In [6]:
class DNN():
    def __init__(self, params):
        self.layers = [Linear(params['w1'], params['b1']),
                       ReLU(),
                       Linear(params['w2'], params['b2']),
                       MSE()]
        self.lossLayer = self.layers[-1]
        
    def __call__(self, x, labels):
        for layer in self.layers[:-1]:
            x = layer(x)
        return self.lossLayer(x, labels)
    
    def backward(self):
        (x, labels) = self.lossLayer.args
        self.lossLayer.backward(x, labels)
        for layer in reversed(self.layers[:-1]):
            layer.backward(layer.args[0])

In [7]:
# Normalized
inp = x_train
lab = y_train
# Kaiming initialized
params = {'w1': w1, 'b1': b1,'w2': w2, 'b2': b2}

dnn = DNN(params)
mse = dnn(inp, lab)
print(f'MSE: {mse}')

dnn.backward()
print(dnn.layers[0].weights.shape)

MSE: 28.699634552001953
torch.Size([784, 50])


## Pytorch Equivalent

In [8]:
#--export--#
from torch import nn

class TorchDNN(nn.Module):
    def __init__(self, n_inputs, n_neurons, n_outputs):
        super().__init__()
        self.layers = [nn.Linear(n_inputs, n_neurons, n_outputs),
                       nn.ReLU(),
                       nn.Linear(n_neurons, n_outputs)]
        self.lossLayer = nn.MSELoss()
        
    def __call__(self, x, labels):
        for layer in self.layers:
            x = layer(x)
        self.loss = self.lossLayer(x.squeeze(), labels)
        return self.loss
    
    def backward(self):
        self.loss.backward()

In [9]:
# labels are of type long... convert to float
y_train_float = y_train.float()

torchDNN = TorchDNN(n_inputs, n_neurons, n_outputs)
loss = torchDNN(x_train, y_train_float)
print(f'MSE: {loss}')
torchDNN.backward()
torchDNN.layers[0].weight.grad.shape

MSE: 27.24370574951172


torch.Size([50, 784])