In [26]:
from os.path import abspath
import numpy as np

In [27]:
def load_data(path):
    input_file = open(path)
    output_data = []
    for line in input_file:
        one_row = line.split(',')
        output_data.append([float(x) for x in one_row])

    print('Input data info.')
    print('\tFeatures amount: %d, set size %d' % (len(output_data[0]), len(output_data)))
    return np.array(output_data)

In [28]:
def show_prediction(output_vector):
    index_of_max = np.argmax(np.roll(output_vector[1:], 1))
    return index_of_max

In [29]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [30]:
# sum of all pairs theta[i]*input[i] in one neuron
# z(i) = sum(theta * x) = theta' * x
def unit_summarization(theta, input_vec):
    return (np.array(theta) * np.array(input_vec)).sum()

In [31]:
# computing the activation function of neuron
# a(i) = g(z(i))
def unit_activation(theta, input_vec):
    sum_result = unit_summarization(theta, input_vec)
    return sigmoid(sum_result)

In [32]:
# computing the activation vector of whole network layer
# A = g(z(i)) for all units on layer
# [1] - additional neuron (bias)
def forward_propagation_step(theta_matrix, input_vec):
    activations_vector = [unit_activation(theta_matrix[i], input_vec) for i in range(len(theta_matrix))] 
    return np.concatenate(([1], activations_vector))

In [37]:
def forward_propagation(theta_one, theta_two, input_vec):
    # computing input level activation vector
    activation_vector_input_layer = np.concatenate(([1], input_vec))
    # computing second level activation vector
    activation_vector_hidden_layer = forward_propagation_step(theta_one, activation_vector_input_layer)
    # computing output level activation vector
    activation_vector_output_layer = forward_propagation_step(theta_two, activation_vector_hidden_layer)
    return [activation_vector_hidden_layer, activation_vector_output_layer]

In [100]:
def compute_layer_error(theta, next_layer_error, layer_activation):
    return (theta.transpose() @ next_layer_error) * layer_activation * (1 - layer_activation)

In [93]:
def compute_delta(prev_delta, layer_error, layer_activation):
    return prev_delta + layer_error @ layer_activation.transpose()

In [113]:
def compute_partial_derivative(delta, theta, m, lm):
    derivative = np.zeros((len(delta), len(delta[0])))
    coefficient = 1.0 / m 
    for i in range(len(delta)):
        for j in range(len(delta[0])):
            x = lm * theta[i][j]
            derivative[i][j] = delta[i][j] + x if j != 0 else delta[i][j]
            derivative[i][j] *= coefficient
            
    return derivative

In [120]:
# computing the partial derivative for cost function J using back propagation algorithm
def back_propagation(data_x, data_y, thetas=None):
    features_amount = 400
    hidden_layer_neuron_amount = 25
    hidden_layer_inputs_per_neuron = features_amount + 1
    output_layer_neuron_amount = 10
    output_layer_inputs_per_neuron = hidden_layer_neuron_amount + 1
    
    # initializing thetas (weights of each connection between neurons)
    # weights between input and hidden layer
    theta_hidden_layer = np.random.rand(hidden_layer_neuron_amount, hidden_layer_inputs_per_neuron)
    # weights between hidden and output layer
    theta_output_layer = np.random.rand(output_layer_neuron_amount, output_layer_inputs_per_neuron)
    
    if thetas is not None:
        theta_hidden_layer = thetas[0]
        theta_output_layer = thetas[1]
    
    # deltas matrix, will be used for accumulate partial derivative
    hidden_layer_delta = np.zeros((hidden_layer_neuron_amount, hidden_layer_inputs_per_neuron))
    output_layer_delta = np.zeros((output_layer_neuron_amount, output_layer_inputs_per_neuron))
    
    for i in range(len(data_x)):
        activation_vectors = forward_propagation(theta_hidden_layer, theta_output_layer, data_x[0])
        output_layer_activation = activation_vectors[1]
        hidden_layer_activation = activation_vectors[0]
        
        output_layer_errors = output_layer_activation - data_y[i]
        hidden_layer_errors = compute_layer_error(theta_output_layer, output_layer_errors[1:], hidden_layer_activation)
        
        output_layer_delta = compute_delta(output_layer_delta, output_layer_errors, output_layer_activation)
        hidden_layer_delta = compute_delta(hidden_layer_delta, hidden_layer_errors, hidden_layer_activation)
        
    lm = 1
    output_layer_derivative = compute_partial_derivative(output_layer_delta, theta_output_layer, len(data_x), lm)
    hidden_layer_derivative = compute_partial_derivative(hidden_layer_delta, theta_hidden_layer, len(data_x), lm)
        
    return [hidden_layer_derivative, output_layer_derivative]

In [56]:
loaded_data_x = load_data(abspath('demo/init/coursera/neuralnetwork/data/data_x.csv'))
loaded_data_y = load_data(abspath('demo/init/coursera/neuralnetwork/data/data_y.csv'))
loaded_theta_one = load_data(abspath('demo/init/coursera/neuralnetwork/data/theta1.csv'))
loaded_theta_two = load_data(abspath('demo/init/coursera/neuralnetwork/data/theta2.csv'))

Input data info.
	Features amount: 400, set size 5000
Input data info.
	Features amount: 1, set size 5000
Input data info.
	Features amount: 401, set size 25
Input data info.
	Features amount: 26, set size 10


In [126]:
x = back_propagation(loaded_data_x, loaded_data_y, [loaded_theta_one, loaded_theta_two])