# Building neural network from scratch

#### We will be using heart disease dataset from UCL Machine Learning repository to build a logistic regression model and then use the same intuition to build an aritficial neural netework.



In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
import sklearn.linear_model

%matplotlib inline

In [2]:
df = pd.read_csv('heart.csv')

In [3]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [4]:
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


# Standarising Dataframe 

In [8]:
X = df.iloc[:,:-1]
# notice the extra [], The concept of rows and columns applies when you have a 2D array. However, the array numpy.array([1,2,3,4]) is a 1D array and so has only one dimension, therefore shape rightly returns a single valued iterable
y = df[['target']] 

In [9]:
from sklearn import preprocessing

min_max_scaler = preprocessing.MinMaxScaler()

X_scaled = min_max_scaler.fit_transform(X)


In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=0)

Decision Tree classifier

In [11]:
from sklearn.ensemble import BaggingClassifier
bag_clf = BaggingClassifier(DecisionTreeClassifier(),n_estimators=500, max_samples=100, bootstrap= True, n_jobs=-1)
bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)

  y = column_or_1d(y, warn=True)


In [9]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.8351648351648352

Logistic Regression

In [10]:
log_clf = sklearn.linear_model.LogisticRegression()
log_clf.fit(X_train,y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [11]:
LR_predictions = log_clf.predict(X_test)
accuracy_score(y_test, LR_predictions)

0.8131868131868132

# Neural Network

**Neural Network Structure**: The structure to build a Neural Network is to:
    1. Define the neural network structure ( # of input units,  # of hidden units, etc). 
    2. Initialize the model's parameters
    3. Loop:
        - Implement forward propagation
        - Compute loss
        - Implement backward propagation to get the gradients
        - Update parameters (gradient descent)

You often build helper functions to compute steps 1-3 and then merge them into one function we call `nn_model()`. Once you've built `nn_model()` and learnt the right parameters, you can make predictions on new data.

<img src='https://cdn-images-1.medium.com/max/1600/1*DW0Ccmj1hZ0OvSXi7Kz5MQ.jpeg'>

In [12]:
print ("train_set_x shape: " + str(X_train.shape))
print ("train_set_y shape: " + str(y_train.shape))
print ("test_set_x shape: " + str(X_test.shape))
print ("test_set_y shape: " + str(y_test.shape))

train_set_x shape: (212, 13)
train_set_y shape: (212, 1)
test_set_x shape: (91, 13)
test_set_y shape: (91, 1)


In [13]:
#sigmoid

def sigmoid(z):
    
    s = 1/(1+ np.exp(-z))
    
    return s

In [14]:
#Define the structure of neural network 

def nn_layers(X, Y):
    '''Function to defind number of input layer, number of hidden layers, number of output layer'''
    n_x = X.shape[1] #input layer
    n_h = 4 #hidden layer
    n_y = Y.shape[1] #output layer
    
    return (n_x, n_h, n_y)
    

In [15]:
(n_x, n_h, n_y) = nn_layers(X_train, y_train)
print ("Input Layer "+ str(n_x))
print ("Hidden Layer "+ str(n_h))
print ("Output Layer "+ str(n_y))


Input Layer 13
Hidden Layer 4
Output Layer 1


# Initialising parameters

In [16]:
def initialise_paramters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y,n_h) * 0.01
    b2 = np.zeros((n_y,1))
    
    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [17]:
initialise_paramters(n_x, n_h, n_y)

parameters = initialise_paramters(n_x, n_h, n_y)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 2.04904518e-02 -8.87429136e-05  7.36978462e-03  7.71859575e-03
  -2.41861539e-03 -6.47651634e-03  1.06306204e-02  1.20179531e-02
  -4.72064249e-03 -7.22124654e-04  5.25179381e-03  2.04656521e-02
   4.07315589e-03]
 [ 1.91381139e-03 -4.76864875e-03 -5.36537954e-04 -1.71313964e-02
  -1.19982875e-02  2.96984116e-03  2.10150080e-02 -1.15779275e-03
   1.86604212e-02 -9.07348597e-03  1.67173251e-03 -1.77601861e-03
  -1.87511657e-02]
 [-5.60132881e-03 -3.71312428e-03  3.64144143e-03  4.45705351e-03
  -1.20789632e-02 -6.98878758e-03 -3.24595074e-03 -4.02720162e-03
  -2.85854633e-03 -1.37665124e-02  4.60394027e-03 -7.81261009e-03
   6.43868939e-03]
 [ 1.69490042e-02  1.21742338e-02  1.27578402e-02  9.77713733e-03
   2.36941486e-02  8.54505840e-03 -1.13844989e-02 -1.84610626e-03
   1.42735333e-02  8.21385269e-03 -2.52769353e-03  6.56387109e-03
   7.79263404e-03]]
b1 = [[0.]
 [0.]
 [0.]
 [0.]]
W2 = [[ 0.00211584 -0.01435216 -0.03072213 -0.00814382]]
b2 = [[0.]]


# Forward Propogation

Now that we have initialized our parameters, we are going to implement forward propagation.

There are two steps for implementing Forward propagation:  
- Retrieve each parameter from the dictionary "parameters" (which is the output of initialize_parameters()) by using parameters[".."].
- Compute $Z^{[1]}, A^{[1]}, Z^{[2]}$ and $A^{[2]}$ (the vector of all your predictions on all the examples in the training set).
- Store the values in *cache* for backpropagation 

In [18]:
def forward_propagation(X, parameters):
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    #forward propagation
    Z1 = np.dot(W1,X.T) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2,A1) + b2
    A2 = sigmoid(Z2)
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    assert(A2.shape == (1, X.shape[0]))
    return A2, cache

In [19]:
#paramaters = initialise_paramters(n_x, n_h, n_y)
A2, cache = forward_propagation(X_train, parameters)

In [23]:
print(np.mean(cache['Z1']) ,np.mean(cache['A1']),np.mean(cache['Z2']),np.mean(cache['A2']))

0.012788647211885952 0.012778694357383363 0.00017208995925056182 0.5000430224891803


## Part 2 - Compute Cost
We will use Cross Entropy loss as a loss function
