# Lets build out first Deep Neural Network

All dependencies for this notebook is listed in the requirements.txt file. One parent above the nbs directory. This list will keep changing as we add to it so be sure to rerun this line after every git pull

In [None]:
!pip install -r ../requirements.txt

Lets declare our imports

In [None]:
import numpy as np
import torch
from torch import nn
import math
from tqdm.notebook import tqdm

In [None]:
! pip install -q kaggle

In [None]:
from google.colab import files

In [None]:
files.upload()

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets list

In [None]:
! kaggle datasets download -d ronitf/heart-disease-uci

In [None]:
class MyFirstNeuralNetwork(torch.nn.Module):
    def __init__(self, in_size=2, out_size=2, hidden_size=3):

        super(MyFirstNeuralNetwork, self).__init__()

        # Set the dimensionality of the network
        self.input_size = in_size
        self.output_size = out_size
        self.hidden_size = hidden_size
        self.learning_rate = 0.5

        # Initialize our weights
        self._init_weights()

    '''
    Initialize the weights
    '''
    def _init_weights(self):
        # Create an input tensor of shape (in_size, hidden_size)
        self.W_Input = torch.randn(self.input_size, self.hidden_size)
        # Create an output tensor of shape (3, 1)
        self.W_Output = torch.randn(self.hidden_size, self.output_size)
        
        self.bias = torch.randn((self.hidden_size))
        self.bias_hidden = torch.randn((self.hidden_size))
        self.bias_out = torch.randn((self.output_size))
    '''
    Create the forward pass
    '''
    def forward(self, inputs):
        # Lets get the element wise dot product
        self.z = torch.matmul(inputs, self.W_Input) + self.bias
        # We call the activation
        self.state = self._activation(self.z) + self.bias_hidden
        # Pass it through the hidden layer
        self.z_hidden = torch.matmul(self.state, self.W_Output)
        # Finally activate the output
        output = self._activation(self.z_hidden) + self.bias_out
        # Return the output
        return output

    '''
    Backpropagation algorithm implemented
    '''
    def backward(self, inputs, labels, output):
        # What is the error in output
        self.loss = labels - output
        # What is the delta loss based on the derivative
        self.loss_delta = self.learning_rate * self.loss * self._derivative(output)
        # Get the loss for the existing output weight
        self.z_loss = torch.matmul(self.loss_delta, torch.t(self.W_Output))
        # Compute the delta like before
        self.z_loss_delta = self.learning_rate * self.z_loss * self._derivative(self.state)
        # Finally propogate this to our existing weight tensors to update
        # the gradient loss
        self.W_Input += torch.matmul(torch.t(inputs), self.z_loss_delta)
        self.W_Output += torch.matmul(torch.t(self.state), self.loss_delta)

    '''
    Here we train the network
    '''
    def train(self, inputs, labels):
        # First we do the foward pass
        outputs = self.forward(inputs)
        # Then we do the backwards pass
        self.backward(inputs, labels, outputs)

    '''
    Here we perform inference
    '''
    def predict(self, inputs):
        pass

    '''
    Here we save the model
    '''
    def save(self, out_path):
        self.save(out_path)
    
    '''
    Our non-linear activation function
    '''
    def _activation(self, s):
        # Lets use sigmoid
        return 1 / (1 * torch.exp(-s))

    '''
    Our derivative function used for backpropagation
    Usually the sigmoid prime
    '''
    def _derivative(self, s):
        # derivative of sigmoid
        return s * (1 - s)

In [None]:
import pandas as pd
df = pd.read_csv('../data/heart.csv')

In [None]:
df.head(20)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
sc = MinMaxScaler((-1, 1))

Lets split out dataset between inputs and target

In [None]:
df.shape
y = df['target']
X = df.drop('target', axis=1)

Lets create a test and train split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

Here we transform features by scaling each feature to a given range.
This estimator scales and translates each feature individually such that it is in the given range on the training set, e.g. between zero and one.

In [None]:
X_train = sc.fit_transform(X_train)

In [None]:
X_train = torch.tensor(X_train).float()

In [None]:
y_train = torch.tensor((y_train.values,))

In [None]:
y_train = y_train.transpose(0,1)

In [None]:
print(X_train.shape, y_train.shape)

Now we instantiate our neural network

In [None]:
nn = MyFirstNeuralNetwork(in_size=X_train.shape[1], out_size=1)

We train our neural network with 1000 epochs (training loops) and we measure the loss

In [None]:
for i in tqdm(range(1000)):
    outputs = nn(X_train)
    loss = torch.mean((y_train - outputs)**2).detach().item()
    tqdm.write("Loss: {}".format(loss))
    nn.train(X_train, y_train)

# Excercises

1. Try to initialize the weights with something better. Hint (Xavier Initialization)
2. Add a bias to the forward pass. Recall the affine transform is (inputs . weights) + bias
3. We are missing a learning rate to the backwards pass. See if you can add that in

# How would we rewrite this code using PyTorch built-in methods

PyTorch gives us most of this functionality out of the box. First we can flag all Tensors to use Autograd. You can read more about autograd here: https://pytorch.org/docs/stable/autograd.html

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
# Populate the best
X_train = torch.tensor(sc.fit_transform(X_train), dtype=torch.float, requires_grad=True)
X_test = torch.tensor(sc.transform(X_test), dtype=torch.float)

y_train = torch.tensor(y_train.values)
y_test = torch.tensor(y_test.values)

This is the first way using the torch.nn.Sequential. In the Sequential model modules will be added to it in the order they are passed in the constructor. This is a quick way to write a small neural network

In [None]:
import torch.nn as nn
from collections import OrderedDict

model = torch.nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(13, 100)),
    ('relu1', nn.ReLU()),
    ('fc2', nn.Linear(100, 100)),
    ('relu2', nn.ReLU()),
    ('fc3', nn.Linear(100, 2)),
    ('sigmoid', nn.Sigmoid())
]))

In [None]:
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()
losses = []

In [None]:
for epoch in tqdm(range(1000)):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    losses.append(loss.item())
    print("Epoch {}, Loss: {}".format(epoch, loss.item()))

In [None]:
prediction = model(X_test)
_, preds_y = torch.max(prediction, 1)

In [None]:
accuracy_score(y_test, preds_y)

In [None]:
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot 
init_notebook_mode()

In [None]:
iplot([{'y': losses}])

In [None]:
losses_no_back = []
for epoch in tqdm(range(1000)):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    optimizer.step()
    losses_no_back.append(loss.item())
    print("Epoch {}, Loss: {}".format(epoch, loss.item()))

In [None]:
iplot([{'y': losses_no_back}])