In [0]:
# Multiclass, multilayer neural network
'''
STEPS:
1. Forward propagate
2. Backward propagate
3. Update params
'''

'\nSTEPS:\n1. Forward propagate\n2. Backward propagate\n3. Update params\n'

In [0]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split 
import matplotlib.pyplot as plt

In [0]:
import sys
import numpy as np
np.set_printoptions(threshold=sys.maxsize)

In [0]:
# Feature scaling
def x_mean(a):
  mean = np.mean(a, dtype="float128")
  func = np.vectorize(lambda t: t - mean)
  return func(a)

def std_var(a):
  std = np.std(a, dtype="float128")
  if std == 0:#hack if all features are 1 or same; ideally in that case, remove the feature
    std = 1
  func = np.vectorize(lambda t: t / std)
  return func(a)

def feature_scaling(X):
  X = np.apply_along_axis(x_mean, 0, X)
  X = np.apply_along_axis(std_var, 0, X)
  return X

In [0]:
def sigmoid(Z):
  A = 1 / (1 + np.exp(-Z))
  return A

def sigmoid_gradient(A):
#   A is sigmoid(Z) here
  return np.multiply(A, (1 - A))

In [0]:
def initialize_parameters(layerwise_neuron_dist):
# Assign weights for hidden layers, o/p layer
# For each neuron, 1 wt for each feature
  parameters = {}
  layers_num = len(layerwise_neuron_dist)
  
  for l in range(1, layers_num):
    parameters["W" + str(l)] = np.random.randn(layerwise_neuron_dist[l], layerwise_neuron_dist[l - 1])
    parameters["b" + str(l)] = np.zeros((layerwise_neuron_dist[l], 1))
  return parameters
    

In [0]:
def forward_propagate(A_prev, W, b):
#   print("W.shape:",W.shape)
#   print("A_prev.",A_prev.shape)
#   print("b shape", b.shape)
#   b = b[:, :W.shape[1]] 
#   print("b:",b.shape, "w:", W.shape)
  Z = np.dot(W, A_prev) + b
  A = sigmoid(Z)
  return Z, A

def layerwise_forward(X, parameters):
#   layers_num = len(parameters) # which will be num of hidden layers + 1
  layers_num= 3
  A_prev = X
  caches = []
  
  for l in range(1, layers_num+1):
    W = parameters["W" + str(l)]
    b = parameters["b" + str(l)]
    linear_cache = (A_prev, W, b)

    Z, A = forward_propagate(A_prev, W, b)
    z_cache = Z
    cache = (linear_cache, z_cache)
    caches.insert(l, cache)
    
    A_prev = A
    
  return A, caches

In [0]:
def cost_function(A, Y):
  c1 = Y * np.log(A) 
  c2 = (1 - Y) * np.log(1 - A) 
  final = -c1 - c2 
  me = np.mean(final)
  return me

In [0]:
def back_propagate(dA, cache):
  A_prev, W, b = cache[0] #linear_cache
  Z = cache[1] #z_cache
  m = A_prev.shape[1]
    
  A = sigmoid(Z)
  dZ = dA * sigmoid_gradient(A)

  dW = np.dot(dZ, A_prev.T)
  db = np.sum(dZ, axis=1, keepdims=True)
  dA_prev = np.dot(W.T, dZ)

  return dA_prev, dW, db

def layerwise_backward(A, Y, caches):
#   layers_num = len(caches) # num of layers = 3
  Y = Y.reshape(A.shape)
  layers_num = 3
  deltas = {}
  
  err = A - Y
  sig_gradient =  sigmoid_gradient(A)
  dA = np.multiply(err, sig_gradient)
  
#   For last layer
  cache = caches[layers_num-1]
  A_prev, W, b = cache[0]
  Z = cache[1]
  
  deltas["dA" + str(layers_num-1)] = dA
  deltas["db" + str(layers_num)] = dA
  deltas["dW" + str(layers_num)] = np.dot(dA, A_prev.T)

  for l in range(layers_num-1, 0, -1):
    cache_1 = caches[l-1]
    dA_1, dW, db = back_propagate(deltas["dA" + str(l)], cache_1)
    
    deltas["dA" + str(l - 1)] = dA_1 
    deltas["dW" + str(l)] = dW
    deltas["db" + str(l)] = db 

  return deltas


In [0]:
def update_params(params, deltas, learning_rate):
#   layers_num = len(params)
  layers_num = 3

  for l in range(1, layers_num+1):
    dW = deltas["dW" + str(l)]
    db = deltas["db" + str(l)]
    params["W" + str(l)] = params["W" + str(l)] - learning_rate * dW
    params["b" + str(l)] = params["b" + str(l)] - learning_rate * db

  return params

In [0]:
def train(X, Y, params, learning_rate = 0.01, max_epochs = 1000):
#   Gradient descent
  loss_list = []
  
  for epoch in range(max_epochs):
    A, caches = layerwise_forward(X, params)
    
    loss = cost_function(A, Y)
#     print("Epoch: ", epoch, ", loss: ", loss)
    loss_list.append([loss, epoch])
    
    deltas = layerwise_backward(A, Y, caches)
    
    params = update_params(params, deltas, learning_rate)
  
  return params, loss_list

In [0]:
def plot_loss(loss_list):
  # Plot loss over epoch
  x_axis = np.array([i[1] for i in loss_list]) # epoch
  y_axis = np.array([i[0] for i in loss_list]) # loss
  plt.figure(figsize=(10,10))
  plt.title("Loss over epochs")
  plt.xlabel("Epochs")
  plt.ylabel("Loss")
  plt.plot(x_axis, y_axis, label="training loss")
  plt.legend(loc="best")

In [0]:
if __name__ == "__main__":
  digits = load_digits()
  X = digits.data 
  y = digits.target
  
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)
  print("Initial X_train.shape: ", X_train.shape)
  print("Initial y_train.shape: ", y_train.shape)
  
  X_train_scaled = feature_scaling(X_train).T
  X_test_scaled  = feature_scaling(X_test).T
  
  print("\nAfter scaling:")
  print("X_train_scaled shape: ", X_train_scaled.shape)
  print("X_test_scaled shape: ", X_test_scaled.shape)

Initial X_train.shape:  (1257, 64)
Initial y_train.shape:  (1257,)

After scaling:
X_train_scaled shape:  (64, 1257)
X_test_scaled shape:  (64, 540)


In [0]:
# Prepare y training set
y0_train = np.array(y_train == 0, dtype= int).reshape(1, -1)
y1_train = np.array(y_train == 1, dtype= int).reshape(1, -1)
y2_train = np.array(y_train == 2, dtype= int).reshape(1, -1)
y3_train = np.array(y_train == 3, dtype= int).reshape(1, -1)
y4_train = np.array(y_train == 4, dtype= int).reshape(1, -1)
y5_train = np.array(y_train == 5, dtype= int).reshape(1, -1)
y6_train = np.array(y_train == 6, dtype= int).reshape(1, -1)
y7_train = np.array(y_train == 7, dtype= int).reshape(1, -1)
y8_train = np.array(y_train == 8, dtype= int).reshape(1, -1)
y9_train = np.array(y_train == 9, dtype= int).reshape(1, -1)

y_train_all = [y0_train, y1_train, y2_train, y3_train, y4_train, y5_train, y6_train, y7_train, y8_train, y9_train]

#   Initial values
layerwise_neuron_dist = [X_train_scaled.shape[0], 10, 10, 1]
param_list = []
classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Train for each class
for cl in classes:
  params = initialize_parameters(layerwise_neuron_dist)
  new_params, loss_list = train(X_train_scaled, y_train_all[cl], params)
#   plot_loss(loss_list)
  param_list.append(new_params)

In [0]:
# Predict based on all classifiers
def check(x):
  if x >= 0.5:
    return x
  else:
    return float("-inf")
  
def sig_under_threshold(X, params):
  A, ca = layerwise_forward(X, params)
  
  j = list(map(lambda x: check(x), A[0]))
  return j
  
def predict(X, param_list): 
    prob_mat = sig_under_threshold(X, param_list[0])
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[1])))
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[2])))
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[3])))
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[4])))
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[5])))
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[6])))
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[7])))
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[8])))
    prob_mat = np.vstack((prob_mat, sig_under_threshold(X, param_list[9])))
    max_prob = np.argmax(prob_mat, 0)
    return max_prob

In [0]:
def accuracy(y_true, y_estimate):
  return np.sum(y_true == y_estimate, dtype = "float")/len(y_true)

In [0]:
#   Accuracy on set
  X_true = X_train_scaled
  y_true = y_test

  y_estimate = predict(X_true, param_list)
  print(y_true[:5])
  print(y_estimate[:5])

  acc = accuracy(y_true, y_estimate)
  print(acc)

[3 5 3 7 0]
[3 5 3 7 0]
0.9992044550517104


## **END**