In [28]:
from os.path import abspath
import numpy as np
import time

In [29]:
def load_data(path):
    input_file = open(path)
    output_data = []
    for line in input_file:
        one_row = line.split(',')
        output_data.append([float(x) for x in one_row])

    print('Input data info.')
    print('\tFeatures amount: %d, set size %d' % (len(output_data[0]), len(output_data)))
    return np.array(output_data)

In [30]:
def show_prediction(output_vector):
    index_of_max = np.argmax(np.roll(output_vector[1:], 1))
    return index_of_max

In [31]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [32]:
# sum of all pairs theta[i]*input[i] in one neuron
# z(i) = sum(theta * x) = theta' * x
def unit_summarization(theta, input_vec):
    return (np.array(theta) * np.array(input_vec)).sum()

In [33]:
# computing the activation function of neuron
# a(i) = g(z(i))
def unit_activation(theta, input_vec):
    sum_result = unit_summarization(theta, input_vec)
    return sigmoid(sum_result)

In [34]:
# computing the activation vector of whole network layer
# A = g(z(i)) for all units on layer
# [1] - additional neuron (bias)
def forward_propagation_step(theta_matrix, input_vec):
    activations_vector = [unit_activation(theta_matrix[i], input_vec) for i in range(len(theta_matrix))] 
    return np.concatenate(([1], activations_vector))

In [35]:
def forward_propagation(theta_one, theta_two, input_vec):
    # computing input level activation vector
    activation_vector_input_layer = np.concatenate(([1], input_vec))
    # computing second level activation vector
    activation_vector_hidden_layer = forward_propagation_step(theta_one, activation_vector_input_layer)
    # computing output level activation vector
    activation_vector_output_layer = forward_propagation_step(theta_two, activation_vector_hidden_layer)
    return [activation_vector_hidden_layer, activation_vector_output_layer]

In [36]:
def compute_regularize_coefficient(lm, m, theta):
    coefficient = lm / (2.0 * m)
    a = np.array(theta[0]) * np.array(theta[0])
    b = np.array(theta[1]) * np.array(theta[1])
    return (a.sum() + b.sum()) * coefficient

In [37]:
def get_y(i, k):
    offset = 500
    return 1.0 if k * offset <= i < k * offset + offset else 0.0


def get_output_vector(num):
    out_vec = [0.0] * 10
    out_vec[int(num - 1)] = 1.0
    return [1.0] + out_vec

In [38]:
def compute_cost_function(x_data, theta, lm):
    temp_sum = 0.0
    coefficient = -1 / len(x_data)
    fake_zero = 10 ** (-9)
    
    regularize_coefficient = compute_regularize_coefficient(lm, len(x_data), theta)
    for k in range(10):
        for i in range(len(x_data)):
            hyp_vector = forward_propagation(theta[0], theta[1], x_data[i])[1]
            hyp = np.roll(hyp_vector[1:], 1)[k]
            temp_sum += get_y(i, k) * np.log(hyp if hyp != 0.0 else fake_zero) \
                + (1.0 - get_y(i, k)) * np.log(1 - hyp if 1 - hyp != 0.0 else fake_zero)
 
    return temp_sum * coefficient + regularize_coefficient

In [39]:
def compute_unit_error(theta, delta):
    return (np.array(theta) * np.array(delta)).sum()

In [40]:
# for all layers except last one
def compute_layer_error(layer_delta, theta):
    layer_delta = layer_delta[1:]
    temp_theta = np.transpose(theta)
    return [compute_unit_error(temp_theta[i], layer_delta) for i in range(len(temp_theta))]

In [41]:
def compute_layer_delta(layer_error, layer_activation):
    return layer_error * layer_activation * (1.0 - layer_activation)

In [42]:
def unit_theta_correction(prev_layer_activation, delta, rate):
    return [prev_layer_activation[i] * delta * rate for i in range(len(prev_layer_activation))]

In [43]:
def layer_theta_correction(prev_layer_activation, layer_delta, lr, additional_neuron=False):
    layer_delta = layer_delta[1:]
    prev_layer_activation = np.concatenate(([[1.0], prev_layer_activation])) if additional_neuron else prev_layer_activation
    return [unit_theta_correction(prev_layer_activation, layer_delta[i], lr) for i in range(len(layer_delta))]

In [44]:
def back_propagation(x_data, theta, learning_rate):
    theta_one = np.array(theta[0])
    theta_two = np.array(theta[1])

    for i in range(len(x_data)):
        # running forward propagation for getting hypothesis  
        layer_activations = forward_propagation(theta_one, theta_two, x_data[i])

        # getting outputs of hidden and output layer
        output_layer_activation = layer_activations[1]
        hidden_layer_activation = layer_activations[0]

        ############### output layer section ############
        # computing output layer error, it's different for all other layers 
        output_layer_error = output_layer_activation - get_output_vector(i / 500)
        # computing deltas, it will be used for computing new theta (new weights)
        # delta = error * sigmoid(x)dx
        output_layer_delta = compute_layer_delta(output_layer_error, output_layer_activation)
        # correcting theta
        theta_two -= np.array(layer_theta_correction(hidden_layer_activation, output_layer_delta, learning_rate))

        # ############### hidden layer section ############
        hidden_layer_error = compute_layer_error(output_layer_delta, theta_two)
        hidden_layer_delta = compute_layer_delta(hidden_layer_error, hidden_layer_activation)
        theta_one -= layer_theta_correction(x_data[i], hidden_layer_delta, learning_rate, True)
        
    return [theta_one, theta_two]


In [18]:
loaded_data_x = load_data(abspath('demo/init/coursera/neuralnetwork/data/data_x.csv'))
loaded_data_y = load_data(abspath('demo/init/coursera/neuralnetwork/data/data_y.csv'))
loaded_theta_one = load_data(abspath('demo/init/coursera/neuralnetwork/data/theta1.csv'))
loaded_theta_two = load_data(abspath('demo/init/coursera/neuralnetwork/data/theta2.csv'))

Input data info.
	Features amount: 400, set size 5000
Input data info.
	Features amount: 1, set size 5000
Input data info.
	Features amount: 401, set size 25
Input data info.
	Features amount: 26, set size 10


In [45]:
def check_network(expected_theta, actual_theta, x_data):
    expected = forward_propagation(expected_theta[0], expected_theta[1], x_data)[1]
    actual = forward_propagation(actual_theta[0], actual_theta[1], x_data)[1]
    print('Expected: %d  Actual: %d' % (show_prediction(expected), show_prediction(actual)))
    print('Output: ', actual[1:])

In [48]:
def run():
    features_amount = 400
    hidden_layer_neuron_amount = 25
    hidden_layer_inputs_per_neuron = features_amount + 1
    output_layer_neuron_amount = 10
    output_layer_inputs_per_neuron = hidden_layer_neuron_amount + 1
    learning_rate = 0.05
    epochs = 100
    computed_thetas = [[], []]
    # initializing thetas (weights of each connection between neurons)
    # weights between input and hidden layer
    computed_thetas[0] = np.random.rand(hidden_layer_neuron_amount, hidden_layer_inputs_per_neuron) / 100
    # weights between hidden and output layer
    computed_thetas[1] = np.random.rand(output_layer_neuron_amount, output_layer_inputs_per_neuron) / 100

    for i in range(epochs):
        if i % 10 == 0:
            check_network([loaded_theta_one, loaded_theta_two], computed_thetas, loaded_data_x[501])         
        computed_thetas = back_propagation(loaded_data_x, computed_thetas, learning_rate) 
            
    return computed_thetas

cool_thetas = run()