<a href="https://colab.research.google.com/github/harperd/machine-learning/blob/master/notebooks/neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural Network

Create a neural network to recognize hand-written digits (0 to 9).

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
import pandas as pd
import google.colab as colab
import scipy.optimize as opt
import io

from scipy.io import loadmat

# Allow saving our graphs in the notebook
%matplotlib inline

style.use('dark_background')

In [2]:
mat_file = colab.files.upload()
!ls -l

Saving ex4data1.mat to ex4data1.mat
total 7340
-rw-r--r-- 1 root root 7511764 Sep 14 16:26 ex4data1.mat
drwxr-xr-x 1 root root    4096 Aug 27 16:17 sample_data


In [3]:
mat_data = loadmat('ex4data1.mat')
mat_data

{'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 '__globals__': [],
 '__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [287]:
def add_bias(m):
  if(len(m.shape) == 2):
    return np.concatenate(
      (np.ones(m.shape[0])[:, np.newaxis], m),
      axis = 1)
  
  if(len(m.shape) == 1):
    return np.insert(m, 0, 1, axis = 0)
    
X = add_bias(mat_data['X'])
y = mat_data['y']

print(f'X Shape: {X.shape}')
print(f'y Shape: {y.shape}')

X Shape: (5000, 401)
y Shape: (5000, 1)


![Hand written numbers](https://github.com/harperd/machine-learning/blob/master/images/ex3-1.png?raw=1)

In [230]:
def describe_network(layers_dims, parameters):
  L = len(layers_dims)
  INPUT_LAYER = 1
  OUTPUT_LAYER = L
  
  print('The following describes the configured network architecture:')
  print()
  
  for l in range(1, L + 1):
    size_l = layers_dims[l - 1]
    
    if(l == INPUT_LAYER):
      print(f'Input[ X x {size_l} ] --> ', end = '')
      print(f'W{l}[ {parameters["W" + str(l)].shape} ] --> ', end = '')
    elif(l == OUTPUT_LAYER):
      print(f'Output[ a{l} x {size_l} ]')
    else:
      print(f'Hidden[ a{l} x {size_l} ] --> ', end = '')
      print(f'W{l}[ {parameters["W" + str(l)].shape} ] --> ', end = '')

def xavier(prev_layer_size):
    return np.sqrt(1 / prev_layer_size)

def initialize_parameters(layers_dims):
    np.random.seed(1)               
    parameters = {}
    
    # Get the number of layers
    L = len(layers_dims)            
    
    # For each layer initalize the weights and bias vector
    for l in range(1, L): 
        current_layer_size = layers_dims[l - 1]
        next_layer_size    = layers_dims[l]
        
        print(f'Initializing W{l} parameters matrix with size {next_layer_size} x {current_layer_size + 1}')
        
        # Create a weight matrix with size of (m) x (n)
        # where rows (m) is the number of activation functions in the next layer
        # and the number of columns (n) is the number of activation functions 
        # from the current layer plus 1 for the bias column.
        W = np.random.randn(
              next_layer_size,       # number of rows (m)
              current_layer_size + 1 # number of columns (n)
            ) * xavier(current_layer_size)
        
        assert W.shape == (next_layer_size, current_layer_size + 1)
        
        parameters[f'W{l}'] = W
        
    print()
    describe_network(layers_dims, parameters)

    return parameters
  
layers_dims = [
    400, # Input units
    25,  # Hidden layer units
    10   # Output layer units
]

parameters = initialize_parameters(layers_dims)

Initializing W1 parameters matrix with size 25 x 401
Initializing W2 parameters matrix with size 10 x 26

The following describes the configured network architecture:

Input[ X x 400 ] --> W1[ (25, 401) ] --> Hidden[ a2 x 25 ] --> W2[ (10, 26) ] --> Output[ a3 x 10 ]


In [292]:
def sigmoid(z):
  return 1 / (1 + np.exp(-z))

def g(z):
  return sigmoid(z)
  
def feed_forward(layers_dims, parameters, X):
  L = len(layers_dims)
  INPUT_LAYER = 1
  OUTPUT_LAYER = L
  
  compute = {}
  
  example = 1
  
  for x in X:
    print(f'Processing example {example}')
    for l in range(1, L + 1):
      size_l = layers_dims[l - 1]

      if(l == INPUT_LAYER):
        print('a1 = x')
        compute['a1'] = x
      #elif(l == OUTPUT_LAYER):
      else:
        print(f'z{l} = W{l - 1} x a{l - 1}')
        print(f'a{l} = g(z{l})')
        a = compute[f'a{l - 1}']
        W = parameters[f'W{l - 1}']
        
        z = W @ a
        
        compute[f'a{l}'] = add_bias(g(z))
        
    example = example + 1
    break
        

feed_forward(layers_dims, parameters, X)

Processing example 1
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 2
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 3
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 4
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 5
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 6
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 7
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 8
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 9
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 10
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 11
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 12
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processing example 13
a1 = x
z2 = W1 x a1
a2 = g(z2)
z3 = W2 x a2
a3 = g(z3)
Processi

KeyboardInterrupt: ignored