In [27]:
from os.path import abspath
from os.path import sep as path_sep
import numpy as np
from time import time

In [21]:
def load_data(path):
    output_data = []
    for line in open(path):
        output_data.append([float(x) for x in line.split(',')])

    print('Input data info: ', path.split(path_sep)[-1])
    print('Features amount: %d, set size %d' % (len(output_data[0]), len(output_data)))
    return np.array(output_data)

In [3]:
def show_prediction(output_vector):
    # have to roll result, because index 10 indicates prediction '0'
    index_of_max = np.argmax(np.roll(output_vector[1:], 1))
    return index_of_max

In [4]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [5]:
# sum of all pairs theta[i]*input[i] in one neuron
# z(i) = sum(theta * x) = theta' * x
def unit_summarization(theta, input_vec):
    return np.array(theta) @ np.array(input_vec).T

In [6]:
# computing the activation function of neuron
# a(i) = g(z(i))
def unit_activation(theta, input_vec):
    sum_result = unit_summarization(theta, input_vec)
    return sigmoid(sum_result)

In [7]:
# computing the activation vector of whole network layer
# A = g(z(i)) for all units on layer
# [1] - additional neuron (bias)
def forward_propagation_step(theta_matrix, input_vec):
    activations_vector = [unit_activation(theta_matrix[i], input_vec) for i in range(len(theta_matrix))] 
    return np.concatenate(([1.0], activations_vector))

In [8]:
def forward_propagation(theta_one, theta_two, input_vec):
    # computing input level activation vector, and concatenating additional neuron (bias)
    activation_vector_input_layer = np.concatenate(([1.0], input_vec))
    # computing second level activation vector
    activation_vector_hidden_layer = forward_propagation_step(theta_one, activation_vector_input_layer)
    # computing output level activation vector
    activation_vector_output_layer = forward_propagation_step(theta_two, activation_vector_hidden_layer)
    # return each layer activation, that will be used in back propagation 
    return [activation_vector_hidden_layer, activation_vector_output_layer]

In [9]:
def compute_regularize_coefficient(lm, m, theta):
    coefficient = lm / (2.0 * m)
    a = np.array(theta[0]) * np.array(theta[0])
    b = np.array(theta[1]) * np.array(theta[1])
    return (a.sum() + b.sum()) * coefficient

In [10]:
def get_y(i, k):
    offset = 500
    return 1.0 if k * offset <= i < k * offset + offset else 0.0

In [11]:
def get_output_vector(num):
    out_vec = [0.0] * 10
    out_vec[int(num) - 1] = 1.0
    return [1.0] + out_vec

In [12]:
def compute_cost_function(x_data, theta, lm):
    temp_sum = 0.0
    coefficient = -1 / len(x_data)
    fake_zero = 10 ** (-9)
    
    regularize_coefficient = compute_regularize_coefficient(lm, len(x_data), theta)
    for k in range(10):
        for i in range(len(x_data)):
            hyp_vector = forward_propagation(theta[0], theta[1], x_data[i])[1]
            hyp = np.roll(hyp_vector[1:], 1)[k]
            temp_sum += get_y(i, k) * np.log(hyp if hyp != 0.0 else fake_zero) \
                + (1.0 - get_y(i, k)) * np.log(1 - hyp if 1 - hyp != 0.0 else fake_zero)
 
    return temp_sum * coefficient + regularize_coefficient

In [13]:
# for all layers except last one
def compute_layer_error(layer_delta, theta):
    return theta.transpose() @ layer_delta

In [14]:
# computing weights correction layer  
def compute_layer_delta(layer_error, layer_activation):
    return (np.array(layer_error) * layer_activation * (1.0 - layer_activation))[1:]

In [15]:
def unit_theta_correction(prev_layer_activation, delta, rate):
    return prev_layer_activation * delta * rate

In [16]:
def layer_theta_correction(prev_layer_activation, layer_delta, lr, add_neuron=False):
    prev_layer_activation = np.concatenate(([[1.0], prev_layer_activation])) if add_neuron else prev_layer_activation
    return [unit_theta_correction(prev_layer_activation, layer_delta[i], lr) for i in range(len(layer_delta))]

In [17]:
def back_propagation(x_data, theta, learning_rate):
    theta_one = np.array(theta[0])
    theta_two = np.array(theta[1])

    for i in range(len(x_data)):
        # running forward propagation for getting hypothesis  
        layer_activations = forward_propagation(theta_one, theta_two, x_data[i])

        # getting outputs of hidden and output layer
        output_layer_activation = layer_activations[1]
        hidden_layer_activation = layer_activations[0]

        ############### output layer section ############
        # computing output layer error, it's different for all other layers 
        output_layer_error = output_layer_activation - get_output_vector(i / 500)
        # computing deltas, it will be used for computing new theta (new weights)
        # delta = error * sigmoid(x)dx
        output_layer_delta = compute_layer_delta(output_layer_error, output_layer_activation)
        # correcting theta
        theta_two -= np.array(layer_theta_correction(hidden_layer_activation, output_layer_delta, learning_rate))

        # ############### hidden layer section ############
        # computing layer error as unit_theta * layer_delta
        hidden_layer_error = compute_layer_error(output_layer_delta, theta_two)
        hidden_layer_delta = compute_layer_delta(hidden_layer_error, hidden_layer_activation)
        # correcting theta
        theta_one -= np.array(layer_theta_correction(x_data[i], hidden_layer_delta, learning_rate, add_neuron=True))
        
    return [theta_one, theta_two]


In [22]:
loaded_data_x = load_data(abspath('demo/init/coursera/neuralnetwork/data/data_x.csv'))
loaded_data_y = load_data(abspath('demo/init/coursera/neuralnetwork/data/data_y.csv'))
loaded_theta_one = load_data(abspath('demo/init/coursera/neuralnetwork/data/theta1.csv'))
loaded_theta_two = load_data(abspath('demo/init/coursera/neuralnetwork/data/theta2.csv'))

Input data info:  data_x.csv
Features amount: 400, set size 5000
Input data info:  data_y.csv
Features amount: 1, set size 5000
Input data info:  theta1.csv
Features amount: 401, set size 25
Input data info:  theta2.csv
Features amount: 26, set size 10


In [23]:
def check_network(expected_theta, actual_theta, input_vec):
    expected = forward_propagation(expected_theta[0], expected_theta[1], input_vec)[1]
    actual = forward_propagation(actual_theta[0], actual_theta[1], input_vec)[1]
    print('Expected: %d  Actual: %d' % (show_prediction(expected), show_prediction(actual)))
    print('Output: [', end='')
    [print("%0.3f" % f, end=' ') for f in actual[1:]]
    print(']')

In [31]:
def run():
    features_amount = 400
    hidden_layer_neuron_amount = 25
    hidden_layer_inputs_per_neuron = features_amount + 1
    output_layer_neuron_amount = 10
    output_layer_inputs_per_neuron = hidden_layer_neuron_amount + 1
    
    learning_rate = 0.01
    epochs = 100
    
    computed_thetas = [[], []]
    # initializing thetas (weights of each connection between neurons)
    # weights between input and hidden layer
    computed_thetas[0] = np.random.rand(hidden_layer_neuron_amount, hidden_layer_inputs_per_neuron) / 100
    # weights between hidden and output layer
    computed_thetas[1] = np.random.rand(output_layer_neuron_amount, output_layer_inputs_per_neuron) / 100

    start_time = time()
    for i in range(epochs):
        if i % 10 == 0:
            print('Epochs left - ', epochs - i)
        computed_thetas = back_propagation(loaded_data_x, computed_thetas, learning_rate)
    print('Estimate time: ', time() - start_time)
    
    return computed_thetas

cool_thetas = run()

[check_network([loaded_theta_one, loaded_theta_two], cool_thetas, loaded_data_x[j * 500]) for j in range(10)]

Epochs left -  100


Epochs left -  90


Epochs left -  80


Epochs left -  70


Epochs left -  60


Epochs left -  50


Epochs left -  40


Epochs left -  30


Epochs left -  20


Epochs left -  10


Estimate time:  122.79859447479248
Expected: 0  Actual: 0
Output: [0.000 0.004 0.001 0.000 0.022 0.005 0.006 0.003 0.001 0.968 ]
Expected: 1  Actual: 1
Output: [0.889 0.073 0.008 0.002 0.024 0.002 0.013 0.044 0.004 0.000 ]
Expected: 2  Actual: 2
Output: [0.000 0.553 0.007 0.000 0.025 0.001 0.004 0.027 0.074 0.005 ]
Expected: 3  Actual: 3
Output: [0.001 0.002 0.948 0.001 0.008 0.000 0.003 0.224 0.030 0.001 ]
Expected: 4  Actual: 4
Output: [0.000 0.010 0.001 0.958 0.004 0.006 0.104 0.006 0.007 0.000 ]
Expected: 5  Actual: 5
Output: [0.000 0.001 0.024 0.000 0.619 0.003 0.005 0.033 0.006 0.043 ]
Expected: 6  Actual: 6
Output: [0.000 0.022 0.000 0.008 0.001 0.985 0.000 0.016 0.015 0.001 ]
Expected: 7  Actual: 9
Output: [0.000 0.000 0.001 0.003 0.001 0.000 0.697 0.000 0.844 0.011 ]
Expected: 8  Actual: 8
Output: [0.022 0.004 0.006 0.003 0.024 0.001 0.000 0.966 0.307 0.001 ]
Expected: 9  Actual: 9
Output: [0.004 0.000 0.001 0.054 0.012 0.000 0.012 0.002 0.988 0.000 ]


[None, None, None, None, None, None, None, None, None, None]