## Neural Network

In [2]:
# Let's start by importing the relevant packages
# matplotlib for plots
import matplotlib as mpl
from matplotlib import pyplot as plt
# pandas to read in some data
import pandas as pd
# numpy to build our first perceptron
import numpy as np
# Train test split to do validate our findings from the perceptron training
from sklearn.model_selection import train_test_split
# MinMaxScaler to normalise the data before inputting them to the perceptron
from sklearn.preprocessing import MinMaxScaler
# PyTorch for neural networks
import torch
import time
from torch import nn
%matplotlib inline
mpl.rcParams['figure.figsize'] = (16, 9)
import os
home = os.path.expanduser("~")
data = home + '/data/workshop_data/occupancy_data/datatraining.txt'


In [3]:

# Load the occupancy data so we have something to predict
df = pd.read_csv(data)
target = 'Occupancy'
features = [col for col in df.columns if target not in col and 'date' not in col]
df.head()

Unnamed: 0,date,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy
1,2015-02-04 17:51:00,23.18,27.272,426.0,721.25,0.004793,1
2,2015-02-04 17:51:59,23.15,27.2675,429.5,714.0,0.004783,1
3,2015-02-04 17:53:00,23.15,27.245,426.0,713.5,0.004779,1
4,2015-02-04 17:54:00,23.15,27.2,426.0,708.25,0.004772,1
5,2015-02-04 17:55:00,23.1,27.2,426.0,704.5,0.004757,1


In [4]:
print(df[target].value_counts())

0    6414
1    1729
Name: Occupancy, dtype: int64


In [5]:
x_train, x_test, y_train, y_test = train_test_split(df[features], df[target], shuffle=False)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [6]:
x_train[:5]

array([[1.        , 0.70391174, 0.27549041, 0.42478485, 0.98861629],
       [0.99282297, 0.70361083, 0.27775383, 0.41480207, 0.98416176],
       [0.99282297, 0.70210632, 0.27549041, 0.4141136 , 0.98230597],
       [0.99282297, 0.69909729, 0.27549041, 0.40688468, 0.97859448],
       [0.98086124, 0.69909729, 0.27549041, 0.40172117, 0.97182167]])

In [7]:
y_train[:5]

1    1
2    1
3    1
4    1
5    1
Name: Occupancy, dtype: int64

## Build the neural network
To extend out previously build neural network, you will need to add a second (third, fourth) lineat layer. The first layer needs to output as many layers as the second one consumes. Try 10 for the time being. 
You will need to update the logits and forward function as well to pass through all layers.

In [14]:
class Network(nn.Module):
    
    def __init__(self, number_of_inputs, hidden_units):
        super().__init__()
        # Build the neuron using nn.Linear
        self.linear1 = nn.Linear(number_of_inputs, hidden_units, bias=True)
        self.linear2 = nn.Linear(hidden_units, 1, bias=True)
        # use nn.Sigmoid as an activation function
        self.act1 = nn.Sigmoid()
        self.act2 = nn.Sigmoid()
    
    def logit(self, inp):
        return self.linear2(self.act1(self.linear1(inp)))
    
    def forward(self, inp):
        return self.act2(self.logit(inp))
    

Let us now select a random selection of the training data and calculate the gradients for the neuron:

In [15]:
loss = nn.BCEWithLogitsLoss()
net = Network(5, 10)


In [16]:
optim = torch.optim.SGD(net.parameters(), lr=5e-2)

In [20]:
def fit_batch(optim, loss, neuron, x, y):
    optim.zero_grad()
    y_pred = neuron.logit(x)
    #print(y, y_pred, y.sum())
    err = loss(y_pred, y)
    #err = err * (y * 3 + 1)
    err.mean().backward()
    optim.step()
    return y_pred

print(torch.cuda.is_available())

if torch.cuda.is_available():
    net = Network(5, 10).cuda()
    optim = torch.optim.SGD(neuron.parameters(), lr=5e-2)
    start = time.time()
    for i in range(20):
        acc = None
        for i in range(200):
            select = np.random.randint(0, len(x_train), 2048)
            x = torch.from_numpy(x_train[select]).float().cuda()
            y = torch.from_numpy(y_train.iloc[select].values).float().unsqueeze(1).cuda()
            y_pred = fit_batch(optim, loss, neuron, x, y)
            if acc is None:
                acc = (y==(y_pred > .5).float()).float().mean()
            else:
                acc += (y==(y_pred > .5).float()).float().mean()
        print(f'{acc.data.cpu().numpy()/200}')
    print(f'Training time: {time.time() - start}')

False


## Why is the GPU version slower?

Well, we need to move the data to the GPU and back. This costs us time. It normally pays off, as the computations take way longer than moving the data. In our current case the computation is very simple and the amount of data very small. This nothing the GPU is well suited for, because it can not use its advantage of performing a lot of computations in parallel.