## Neural Network

In [1]:
# Let's start by importing the relevant packages
# matplotlib for plots
import matplotlib as mpl
from matplotlib import pyplot as plt
# pandas to read in some data
import pandas as pd
# numpy to build our first perceptron
import numpy as np
# Train test split to do validate our findings from the perceptron training
from sklearn.model_selection import train_test_split
# MinMaxScaler to normalise the data before inputting them to the perceptron
from sklearn.preprocessing import MinMaxScaler
# PyTorch for neural networks
import torch
import time
from torch import nn
%matplotlib inline
mpl.rcParams['figure.figsize'] = (16, 9)
import os
home = os.path.expanduser("~")
data = home + '/data/workshop_data/occupancy_data/datatraining.txt'


In [2]:
# Load the occupancy data so we have something to predict
df = pd.read_csv(data)
target = 'Occupancy'
features = [col for col in df.columns if target not in col and 'date' not in col]

Unnamed: 0,date,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy
1,2015-02-04 17:51:00,23.18,27.272,426.0,721.25,0.004793,1
2,2015-02-04 17:51:59,23.15,27.2675,429.5,714.0,0.004783,1
3,2015-02-04 17:53:00,23.15,27.245,426.0,713.5,0.004779,1
4,2015-02-04 17:54:00,23.15,27.2,426.0,708.25,0.004772,1
5,2015-02-04 17:55:00,23.1,27.2,426.0,704.5,0.004757,1


In [21]:
x_train, x_val, y_train, y_val = train_test_split(df[features], df[target], shuffle=False)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

## Build the neural network
To extend our previously build neuron to a neural network, we will need to add a second (third, fourth) linear layer.

The first layer needs to output as many layers as the second one consumes. Try 10 for the time being.
You will need to update the logits and forward function as well to pass through all layers.


In [38]:
class Network(nn.Module):
    
    def __init__(self, number_of_inputs, hidden_units):
        super().__init__()
        # Build the neuron using nn.Linear
        self.linear1 = nn.Linear(number_of_inputs, hidden_units, bias=True)
        self.linear2 = nn.Linear(hidden_units, 1, bias=True)
        # use nn.Sigmoid as an activation function
        self.act1 = nn.Sigmoid()
        self.act2 = nn.Sigmoid()
    
    def logit(self, inp):
        out_layer1 = self.act1(self.linear1(inp))
        return self.linear2(out_layer1)
    
    def forward(self, inp):
        return self.act2(self.logit(inp))
    

Let us now select a random selection of the training data and calculate the gradients for the neuron:

In [39]:
loss = nn.BCEWithLogitsLoss()
net = Network(5, 10)


In [40]:
optim = torch.optim.SGD(net.parameters(), lr=5e-2)

In [42]:
def fit_batch(optim, loss, net, x, y):
    net.train()
    optim.zero_grad()
    y_pred = net.logit(x)
    #print(y, y_pred, y.sum())
    err = loss(y_pred, y)
    #err = err * (y * 3 + 1)
    err.mean().backward()
    optim.step()
    return y_pred

def eval_batch(net, x):
    net.eval()
    y_pred = net(x)
    return y_pred

print(torch.cuda.is_available())

if torch.cuda.is_available():
    net = Network(5, 10).cuda()
    optim = torch.optim.Adam(net.parameters(), lr=1e-2)
    start = time.time()
    for i in range(20):
        acc = None
        for i in range(200):
            select = np.random.randint(0, len(x_train), 2048)
            x = torch.from_numpy(x_train[select]).float().cuda()
            y = torch.from_numpy(y_train.iloc[select].values).float().unsqueeze(1).cuda()
            y_pred = fit_batch(optim, loss, net, x, y)
            if acc is None:
                acc = (y==(y_pred > .5).float()).float().mean()
            else:
                acc += (y==(y_pred > .5).float()).float().mean()
        print(f'train accuracy {acc.data.cpu().numpy()/200}')
    
    x = torch.from_numpy(x_val).float().cuda()
    y = torch.from_numpy(y_val.values).float().unsqueeze(1).cuda()
    y_pred = eval_batch(net, x)
    acc = (y==(y_pred > .5).float()).float().mean()
    print(f'val accuracy {acc.data.cpu().numpy()}')
    
    print(f'Training time: {time.time() - start}')

True
train accuracy 0.85386962890625
train accuracy 0.96170166015625
train accuracy 0.97273681640625
train accuracy 0.976767578125
train accuracy 0.97940185546875
train accuracy 0.980791015625
train accuracy 0.98091552734375
train accuracy 0.98175537109375
train accuracy 0.981826171875
train accuracy 0.98212890625
train accuracy 0.98299560546875
train accuracy 0.98311767578125
train accuracy 0.983017578125
train accuracy 0.982998046875
train accuracy 0.9829833984375
train accuracy 0.98308349609375
train accuracy 0.98350341796875
train accuracy 0.983505859375
train accuracy 0.9838427734375
train accuracy 0.984228515625
val accuracy 0.9228880405426025
Training time: 9.295231580734253


## How does the result change with a chaning network?

Now try using a bigger layersize and try adding dropout.
How can we change the training and validation loss?

- What happens if we add Dropout? [docs](https://pytorch.org/docs/stable/nn.html#dropout)
- What happens if you add momentum or weight decay to SGD? [docs](https://pytorch.org/docs/stable/optim.html#torch.optim.SGD)
- What happens if you use an Adam optimizer instead of SGD? [docs](https://pytorch.org/docs/stable/optim.html#torch.optim.Adam)
- What happens if we use other activation functions? [docs](https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity)

Hint You can add dropout using:
```
    self.dropout = nn.Dropout(how_many_percent_shall_be_dropped)
    
    def logits(x):
        out_layer1 = self.dropout(self.act1(self.linear1(x)))
        ...
```