In [1]:
# IMPORTING NECESSARY LIBRARIES
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio #For loading OCTAVE .mat files #REF: https://docs.scipy.org/doc/scipy/reference/tutorial/io.html
import matplotlib.cm as cm #Used to display images in a specific colormap
import random #To pick random images to display

plt.rcParams['figure.figsize'] = (15.0,10.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

#np.random.seed(1)
%load_ext autoreload
%autoreload 2

In [2]:
# NETWORK MODEL SETTINGS FOR THE EXERCISE
input_layer_size  = 1;  # 20x20 Input Images of Digits
hidden_layer_size = 4;   # 25 hidden units
num_labels = 1;          # 10 labels, from 1 to 10 (note that we have mapped "0" to label 10)
lambda_val = 0.01;       # Lambda value

In [3]:
nn_weights =np.array([3.1,1.6,1.5,-2.9,-1.3,-0.8,-0.7,1.3,5.4,-1.7,-1.1,-0.9,1.6]).reshape(-1,1)
X_train = np.array([1,2,3]).reshape(-1,1)
y_train = np.array([1,4,9]).reshape(-1,1)

#RESULTS CHECK

print('X_train shape:',X_train.shape)
print('y_train shape:',y_train.shape)
print('nn_weights shape:',nn_weights.shape)

X_train shape: (3, 1)
y_train shape: (3, 1)
nn_weights shape: (13, 1)


In [4]:
# INITIALIZING PARAMETERS
def initialize_parameters(nn_weights, input_layer_size, hidden_layer_size, num_labels):
    # -------------------------------------------------------------
    # Reshaping W1 and W2
    # -------------------------------------------------------------
    W1 = np.reshape(nn_weights[0:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, (input_layer_size + 1)));
    W2 = np.reshape(nn_weights[(hidden_layer_size * (input_layer_size + 1)):,], (num_labels, (hidden_layer_size + 1)));
    parameters = {"W1":W1,
                  "W2":W2}
    
    return parameters

In [5]:
parameters = initialize_parameters(nn_weights, input_layer_size, hidden_layer_size, num_labels);

#RESULTS CHECK
print('W1 shape:',parameters['W1'].shape)
print('W2 shape:',parameters['W2'].shape)

W1 shape: (4, 2)
W2 shape: (1, 5)


In [6]:
def predictLinear(x,theta):
    return np.dot(x,theta)

In [7]:
# FORWARD PROPAGATION FUNCTION

def forward_propagation(X,parameters):
    
    # -------------------------------------------------------------
    # Retrieving parameters
    # -------------------------------------------------------------
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    m = X.shape[0];
        
    # -------------------------------------------------------------
    # The Forward Propagation Implementation
    # -------------------------------------------------------------
    A1   = np.insert(X,0,1,axis=1);     # (3,2)
    Z2   = np.dot(A1,W1.T);             # (3,2).(2,4) = (3,4)
    tanh = np.tanh(Z2);                 # (3,4)
    A2   = np.insert(tanh,0,1,axis=1);  # (3,5)
    Z3   = predictLinear(A2,W2.T);      # (3,5).(5,1) = (3,1)
 
    # -------------------------------------------------------------
    # SAVING THE ACTIVATIONS TO DICTIONARY
    # -------------------------------------------------------------
    cache = {"A1":A1, 
             "Z2":Z2,
             "A2":A2,
             "Z3":Z3}

    return Z3, cache

In [8]:
#COST FUNCTION TO COMPUTE THE MEAN SQUARED ERROR

def nnCostFunctionLinear(Z3, y, parameters,lambda_val):
    #theta = parameters["W2"].T
    m = Z3.shape[0]; 
    error = Z3-y;
    error_sqrd = np.square(error);
    sum_error_sqrd = np.sum(error_sqrd) 
    unreg_cost =(1/(2*m)) * sum_error_sqrd;
    #temp = np.ones((theta.shape[0],1))
    #temp[0,0] = 0
    #reg_term = (lambda_val/(2*m)) * np.sum(np.square(np.multiply(temp, theta)))#np.sum(np.dot(theta[1:].T,theta[1:])); #lambda_t/2/m*np.sum(np.multiply(mask_array, theta)**2)
    cost = unreg_cost #+ reg_term
    return cost

In [9]:
parameters = initialize_parameters(nn_weights, input_layer_size, hidden_layer_size, num_labels);
Z3, cache = forward_propagation(X_train,parameters)
cost = nnCostFunctionLinear(Z3, y_train, parameters, lambda_val)
print('\nCost at parameters (loaded): %.6f\n' % (cost));


Cost at parameters (loaded): 7.090178



In [10]:
def randInitializeWeights(L_in, L_out):
    W = np.zeros((L_out, 1 + L_in));
    epsilon_init = np.sqrt(6)/np.sqrt(L_in + L_out); #0.12;
    W = np.random.randn(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init;
    
    return W

In [11]:
# Initializing NN Weights

print('\nInitializing Neural Network Parameters ...\n')
initial_W1 = randInitializeWeights(input_layer_size, hidden_layer_size);
initial_W2 = randInitializeWeights(hidden_layer_size, num_labels);

# Flattening the weights
initial_W1_flat = initial_W1.flatten()
initial_W2_flat = initial_W2.flatten()

# Unroll parameters
initial_nn_params = np.concatenate((initial_W1_flat,initial_W2_flat),axis=0).reshape(-1,1)

#RESULTS CHECK
print('Shape of initial_W1 is: ', initial_W1.shape)
print('Shape of initial_W2 is: ', initial_W2.shape)
print('Shape of nn_params is: ', initial_nn_params.shape)

#print('Initial_W1 is: ', initial_W1)
#print('Initial_W2 is: ', initial_W2)


Initializing Neural Network Parameters ...

Shape of initial_W1 is:  (4, 2)
Shape of initial_W2 is:  (1, 5)
Shape of nn_params is:  (13, 1)


In [12]:
# Backward  propagation function

def backward_propagation(parameters, cache, x, y, lambda_val):
    # -------------------------------------------------------------
    # Retrieving parameters
    # -------------------------------------------------------------
    W1 = parameters["W1"]
    W2 = parameters["W2"]

    # -------------------------------------------------------------
    # Retrieving cache parameters
    # -------------------------------------------------------------
    A1 = cache["A1"]     # (3,2)
    Z2 = cache["Z2"]     # (3,4)
    A2 = cache["A2"]     # (3,5)
    Z3 = cache["Z3"]     # (3,1)


    m = x.shape[0];

    # -------------------------------------------------------------
    # Backpropagation algorithm
    # -------------------------------------------------------------
    
    delta2 = (1/m)*np.dot((Z3 - y).T,A2)            
    u = np.tanh(Z2)
    d_tanh = 1 - np.power(u,2)
    d2a = np.dot((Z3 - y),W2[:,1:])
    d2 = np.multiply(d2a, d_tanh)
    delta1 = (1/m) * np.dot(d2.T,A1)
    temp1=W1; 
    temp2=W2;
    temp1[:,0]=0;
    temp2[:,0]=0;
    dW1 = delta1 + ((lambda_val/m) * temp1)
    dW2 = delta2 + ((lambda_val/m) * temp2)
    
    grads = {"dW1":dW1,
             "dW2":dW2}
    
    return grads

In [13]:
def update_parameters(parameters,grads, learning_rate):
    # -------------------------------------------------------------
    # Retrieving parameters
    # -------------------------------------------------------------
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    
    # -------------------------------------------------------------
    # Retrieving gradients
    # -------------------------------------------------------------
    dW1 = grads["dW1"]
    dW2 = grads["dW2"]
    # -------------------------------------------------------------
    # Update parameters
    # -------------------------------------------------------------
    W1 = W1 - learning_rate * dW1
    W2 = W2 - learning_rate * dW2
    
    
    parameters = {"W1":W1,
                  "W2":W2}
    
    return parameters

In [14]:
def nn_model(X,Y, initial_nn_params, input_layer_size, hidden_layer_size, num_labels, lambda_val, print_cost=False):
    
    parameters = initialize_parameters(initial_nn_params, input_layer_size, hidden_layer_size, num_labels)
        
    # -------------------------------------------------------------
    # Retrieving parameters
    # -------------------------------------------------------------
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    
    m = X.shape[0]
    
    for i in range(10000):
        Z3, cache = forward_propagation(X,parameters)
        cost = nnCostFunctionLinear(Z3, Y, parameters, lambda_val)
        grads = backward_propagation(parameters, cache, X, Y, lambda_val)
        parameters = update_parameters(parameters,grads, learning_rate= 0.006)
        if print_cost and i % 500 ==0:
            print("Iteration %i: Cost:%f"%(i,cost))
    
    return parameters

In [15]:
parameters = nn_model(X_train,y_train, initial_nn_params, input_layer_size, hidden_layer_size, num_labels, lambda_val, print_cost=True)

Iteration 0: Cost:18.258947
Iteration 500: Cost:5.409391
Iteration 1000: Cost:5.372220
Iteration 1500: Cost:4.837369
Iteration 2000: Cost:3.519476
Iteration 2500: Cost:2.971702
Iteration 3000: Cost:2.517666
Iteration 3500: Cost:2.142860
Iteration 4000: Cost:1.834213
Iteration 4500: Cost:1.580312
Iteration 5000: Cost:1.371420
Iteration 5500: Cost:1.199382
Iteration 6000: Cost:1.057441
Iteration 6500: Cost:0.940051
Iteration 7000: Cost:0.842680
Iteration 7500: Cost:0.761641
Iteration 8000: Cost:1.017819
Iteration 8500: Cost:0.933171
Iteration 9000: Cost:0.946310
Iteration 9500: Cost:0.956946
