In [None]:
# f(x₁,x₂,x₃) = w₁₁*x₁ + w₁₂*x₂ + w₁₃*x₃ + b₁₁
# A₁ = g(f(x₁,x₂,x₃)) = g(w₁₁*x₁ + w₁₂*x₂ + w₁₃*x₃ + b₁₁)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def relu(z):
  return np.maximum(0, z)

In [None]:
def initialize_params(layer_sizes): #takes a list of the layer sizes as input and returns initialized parameters eg: [2, 5, 3]
    params = {}
    for i in range(1, len(layer_sizes)):
      params['W' + str(i)] = np.random.randn(layer_sizes[i], layer_sizes[i-1]) * 0.01
      params['B' + str(i)] = np.random.randn(layer_sizes[i], layer_sizes[i-1]) * 0.01
    return params

In [None]:
initialize_params([2, 3, 4])

{'W1': array([[ 0.00495124,  0.0108736 ],
        [-0.01295884,  0.01506262],
        [-0.0096534 , -0.01010094]]),
 'B1': array([[-0.00481749, -0.00161687],
        [ 0.01538416, -0.00906149],
        [ 0.0006826 , -0.00385703]]),
 'W2': array([[-0.00331872, -0.00216686,  0.0060617 ],
        [-0.00398092,  0.00328807, -0.01600608],
        [-0.02852648, -0.01592398,  0.00909365],
        [ 0.00284954, -0.03294313, -0.01531181]]),
 'B2': array([[-0.01912949, -0.01580727, -0.00098723],
        [ 0.01258629, -0.01010359, -0.00876762],
        [-0.00051445,  0.01048728,  0.01152069],
        [ 0.00996432,  0.00283932,  0.00547873]])}

In [None]:
def forward_propagation(X_train, params): #takes input training features and parameters as input and returns a dictionary containining the numpy arrays of activations of all layers
    layers = len(params)//2
    values = {}
    for i in range(1, layers+1):
      if i == 1:
        values['Z' + str(i)] = np.dot(params['W' + str(i)], X_train) + params['B' + str(i)]
        values['A' + str(i)] = relu(values['Z' + str(i)])
      else:
        values['Z' + str(i)] = np.dot(params['W' + str(i)], values['A' + str(i-1)]) + params['B' + str(i)]
        if i==layers:
            values['A' + str(i)] = values['Z' + str(i)]
        else:
            values['A' + str(i)] = relu(values['Z' + str(i)])
    return values

In [None]:
def compute_cost(values, Y_train): #takes true values and dictionary having activations of all layers as input and returns cost
    layers = len(values)//2
    Y_pred = values['A' + str(layers)]
    cost = 1/(2*len(Y_train)) * np.sum(np.square(Y_pred - Y_train))
    return cost

In [None]:
def backward_propagation(params, values, X_train, Y_train): #takes parameters, activations, training set as input and returns gradients wrt parameters
    layers = len(params)//2
    m = len(Y_train)
    grads = {}
    for i in range(layers, 0, -1):
      if i == layers:
        dA = 1/m * (values['A' + str(i)] - Y_train)
        dZ = dA
      else:
        dA = np.dot(params['W' + str(i+1)].T, dZ)
        dZ = np.multiply(dA, np.where(values['A' + str(i)]>=0, 1, 0))
      if i==1:
            grads['W' + str(i)] = 1/m * np.dot(dZ, X_train.T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
      else:
            grads['W' + str(i)] = 1/m * np.dot(dZ,values['A' + str(i-1)].T)
            grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
    return grads

In [None]:
def update_params(params, grads, learning_rate): #takes parameters, gradients and learning rate as input and returns updated parameters
    layers = len(params)//2
    params_updated = {}
    for i in range(1,layers+1):
        params_updated['W' + str(i)] = params['W' + str(i)] - learning_rate * grads['W' + str(i)]
        params_updated['B' + str(i)] = params['B' + str(i)] - learning_rate * grads['B' + str(i)]
    return params_updated

In [None]:
def model(X_train, Y_train, layer_sizes, num_iters, learning_rate): #trains the model
    params = initialize_params(layer_sizes)
    for i in range(num_iters):
      values = forward_propagation(X_train.T, params)
      cost = compute_cost(values, Y_train.T)
      grads = backward_propagation(params, values, X_train.T, Y_train.T)
      params = update_params(params, grads, learning_rate)
      print('Cost at iteration ' + str(i+1) + ' = ' + str(cost) + '\n')
    return params


def compute_accuracy(X_train, X_test, Y_train, Y_test, params):
    layers = len(params)//2
    values_train = forward_propagation(X_train.T, params)
    values_test = forward_propagation(X_test.T, params)
    train_acc = np.sqrt(mean_squared_error(Y_train, values_train['A' + str(layers-1)].T))
    test_acc = np.sqrt(mean_squared_error(Y_test, values_test['A' + str(layers-1)].T))
    return train_acc, test_acc

def predict(X, params):  #predict on new array X given learnt parameters
    values = forward_propagation(X.T, params)
    predictions = values['A' + str(len(values)//2)].T
    return predictions