# D5 - 01 - Modeling with Deep Learning

## Content
- Designing neural networks for **fitting** noisy data
- Hyperparameter optimisation
- Splitting datasets
- Regularisation with dropout

## Remember jupyter notebooks
- To run the currently highlighted cell, hold <kbd>&#x21E7; Shift</kbd> and press <kbd>&#x23ce; Enter</kbd>.
- To get help for a specific function, place the cursor within the function's brackets, hold <kbd>&#x21E7; Shift</kbd>, and press <kbd>&#x21E5; Tab</kbd>.

## A notebook "preamble"
The first code block prepares our notebook by specifying how to render plots and importing the required packages.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import torch

Let's create a noisy dataset...

In [None]:
def gen_data(length, noise=0.1):
    x = 2.0 * (np.random.rand(length) - 0.5) * np.pi * 1.5
    f = lambda x: np.tanh(x * 0.5) * np.exp(-x**2)
    y = f(x) + np.random.randn(len(x)) * noise
    return x, y, f

x, y, f = gen_data(5000)
x2 = np.linspace(x.min(), x.max(), 100)
y2 = f(x2)

plt.scatter(x, y, s=0.5, alpha=0.3)
plt.plot(x2, y2, linewidth=2, color='C1')

... which we cast into `torch.autograd.Variable`s for training:

In [None]:
inp_trn = torch.autograd.Variable(
    torch.Tensor(x.reshape(-1, 1)))
out_trn = torch.autograd.Variable(
    torch.Tensor(y.reshape(-1, 1)))

print(inp_trn)
print(out_trn)

Here, we define a first version of out neural network for fitting functions:

In [None]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.layer1 = torch.nn.Linear(1, 10)
        self.layer2 = torch.nn.Linear(10, 30)
        self.layer3 = torch.nn.Linear(30, 50)
        self.layer4 = torch.nn.Linear(50, 30)
        self.layer5 = torch.nn.Linear(30, 10)
        self.layer6 = torch.nn.Linear(10, 1)
        self.activation = torch.nn.LeakyReLU()
    def forward(self, inp):
        x = self.activation(self.layer1(inp))
        x = self.activation(self.layer2(x))
        x = self.activation(self.layer3(x))
        x = self.activation(self.layer4(x))
        x = self.activation(self.layer5(x))
        return self.layer6(x)

We instanciate the network and train it on the full set of data; then, we visualise the `loss` history and show the network's prediction for the full dataset:

In [None]:
deep_fit = NeuralNetwork()
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(deep_fit.parameters(), lr=0.0001)

loss_trn = []
deep_fit.train()
for _ in range(5000):
    loss = loss_function(deep_fit(inp_trn), out_trn)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    loss_trn.append(loss.data[0])
deep_fit.eval()

fig, (cnv, prd) = plt.subplots(1, 2, figsize=(10, 4))
cnv.plot(loss_trn)
cnv.set_xlabel('epoch')
cnv.set_ylabel('loss')
prd.scatter(
    inp_trn.data.numpy(),
    deep_fit(inp_trn).data.numpy(),
    s=1, c='C4')
fig.tight_layout()

## Separating data for training/validation

In [None]:
trn = np.random.choice(x.size, int(0.7 * x.size), replace=False)
val = np.setdiff1d(np.arange(x.size), trn, assume_unique=True)

plt.scatter(x[trn], y[trn], s=0.5)
plt.scatter(x[val], y[val], s=0.5)

We have to define new `torch.autograd.Variable`s for training....

In [None]:
inp_trn = torch.autograd.Variable(
    torch.Tensor(x[trn].reshape(-1, 1)))
out_trn = torch.autograd.Variable(
    torch.Tensor(y[trn].reshape(-1, 1)))

print(inp_trn)
print(out_trn)

... and validation...

In [None]:
inp_val = torch.autograd.Variable(
    torch.Tensor(x[val].reshape(-1, 1)),
    volatile=True)
out_val = torch.autograd.Variable(
    torch.Tensor(y[val].reshape(-1, 1)),
    volatile=True)

print(inp_val)
print(out_val)

... and we redo the training for a new instance of our network:

In [None]:
deep_fit = NeuralNetwork()
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(deep_fit.parameters(), lr=0.0001)

loss_trn, loss_val = [], []
for _ in range(5000):
    deep_fit.train()
    loss = loss_function(deep_fit(inp_trn), out_trn)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    loss_trn.append(loss.data[0])
    deep_fit.eval()
    loss = loss_function(deep_fit(inp_val), out_val)
    loss_val.append(loss.data[0])

fig, (cnv, prd) = plt.subplots(1, 2, figsize=(10, 4))
cnv.plot(loss_trn, label='training')
cnv.plot(loss_val, label='validation')
cnv.set_xlabel('epoch')
cnv.set_ylabel('loss')
cnv.legend()
prd.scatter(
    inp_val.data.numpy(),
    deep_fit(inp_val).data.numpy(),
    s=1, c='C4')
fig.tight_layout()

## Dropout

In [None]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.layer1 = torch.nn.Linear(1, 10)
        self.layer2 = torch.nn.Linear(10, 30)
        self.layer3 = torch.nn.Linear(30, 50)
        self.layer4 = torch.nn.Linear(50, 30)
        self.layer5 = torch.nn.Linear(30, 10)
        self.layer6 = torch.nn.Linear(10, 1)
        self.activation = torch.nn.LeakyReLU()
        self.dropout = torch.nn.Dropout(p=0.1)
    def forward(self, inp):
        x = self.dropout(self.activation(self.layer1(inp)))
        x = self.dropout(self.activation(self.layer2(x)))
        x = self.dropout(self.activation(self.layer3(x)))
        x = self.dropout(self.activation(self.layer4(x)))
        x = self.dropout(self.activation(self.layer5(x)))
        return self.layer6(x)

In [None]:
deep_fit = NeuralNetwork()
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(deep_fit.parameters(), lr=0.0001)

loss_trn, loss_val = [], []
for _ in range(5000):
    deep_fit.train()
    loss = loss_function(deep_fit(inp_trn), out_trn)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    loss_trn.append(loss.data[0])
    deep_fit.eval()
    loss = loss_function(deep_fit(inp_val), out_val)
    loss_val.append(loss.data[0])

fig, (cnv, prd) = plt.subplots(1, 2, figsize=(10, 4))
cnv.plot(loss_trn, label='training')
cnv.plot(loss_val, label='validation')
cnv.set_xlabel('epoch')
cnv.set_ylabel('loss')
cnv.legend()
prd.scatter(
    inp_val.data.numpy(),
    deep_fit(inp_val).data.numpy(),
    s=1, c='C4')
fig.tight_layout()

Turing off the training mode for making predictions (use `.eval()`) is very important:

In [None]:
deep_fit.train()
plt.scatter(
    inp_val.data.numpy(),
    deep_fit(inp_val).data.numpy(),
    s=1, c='C4')