In [30]:
from os.path import abspath
import numpy as np
import os

In [2]:
def load_data(path):
    input_file = open(path)
    output_data = []
    for line in input_file:
        one_row = line.split(',')
        output_data.append([float(x) for x in one_row])

    print('Input data info.')
    print('\tFeatures amount: %d, set size %d' % (len(output_data[0]), len(output_data)))
    return np.array(output_data)

In [3]:
def show_prediction(output_vector):
    index_of_max = np.argmax(np.roll(output_vector[1:], 1))
    return index_of_max

In [4]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [5]:
# sum of all pairs theta[i]*input[i] in one neuron
# z(i) = sum(theta * x) = theta' * x
def unit_summarization(theta, input_vec):
    return (np.array(theta) * np.array(input_vec)).sum()

In [6]:
# computing the activation function of neuron
# a(i) = g(z(i))
def unit_activation(theta, input_vec):
    sum_result = unit_summarization(theta, input_vec)
    return sigmoid(sum_result)

In [7]:
# computing the activation vector of whole network layer
# A = g(z(i)) for all units on layer
# [1] - additional neuron (bias)
def forward_propagation_step(theta_matrix, input_vec):
    activations_vector = [unit_activation(theta_matrix[i], input_vec) for i in range(len(theta_matrix))] 
    return np.concatenate(([1.0], activations_vector))

In [8]:
def forward_propagation(theta_one, theta_two, input_vec):
    # computing input level activation vector
    activation_vector_input_layer = np.concatenate(([1.0], input_vec))
    # computing second level activation vector
    activation_vector_hidden_layer = forward_propagation_step(theta_one, activation_vector_input_layer)
    # computing output level activation vector
    activation_vector_output_layer = forward_propagation_step(theta_two, activation_vector_hidden_layer)
    return [activation_vector_hidden_layer, activation_vector_output_layer]

In [9]:
def compute_regularize_coefficient(lm, m, theta):
    coefficient = lm / (2.0 * m)
    a = np.array(theta[0]) * np.array(theta[0])
    b = np.array(theta[1]) * np.array(theta[1])
    return (a.sum() + b.sum()) * coefficient

In [10]:
def get_y(i, k):
    offset = 500
    return 1.0 if k * offset <= i < k * offset + offset else 0.0


def get_output_vector(num):
    out_vec = [0.0] * 10
    out_vec[int(num - 1)] = 1.0
    return [1.0] + out_vec

In [11]:
def compute_cost_function(x_data, theta, lm):
    temp_sum = 0.0
    coefficient = -1 / len(x_data)
    fake_zero = 10 ** (-9)
    
    regularize_coefficient = compute_regularize_coefficient(lm, len(x_data), theta)
    for k in range(10):
        for i in range(len(x_data)):
            hyp_vector = forward_propagation(theta[0], theta[1], x_data[i])[1]
            hyp = np.roll(hyp_vector[1:], 1)[k]
            temp_sum += get_y(i, k) * np.log(hyp if hyp != 0.0 else fake_zero) \
                + (1.0 - get_y(i, k)) * np.log(1 - hyp if 1 - hyp != 0.0 else fake_zero)
 
    return temp_sum * coefficient + regularize_coefficient

In [12]:
def compute_unit_error(theta, delta):
    pass

In [13]:
# for all layers except last one
def compute_layer_error(layer_delta, theta):
    return theta.transpose() @ layer_delta

In [14]:
def compute_layer_delta(layer_error, layer_activation):
    layer_error = np.array(layer_error)
    return (layer_error * layer_activation * (1.0 - layer_activation))[1:]

In [15]:
def unit_theta_correction(prev_layer_activation, delta, rate):
    return prev_layer_activation * delta * rate

In [16]:
def layer_theta_correction(prev_layer_activation, layer_delta, lr, add_neuron=False):
    prev_layer_activation = np.concatenate(([[1.0], prev_layer_activation])) if add_neuron else prev_layer_activation
    return [unit_theta_correction(prev_layer_activation, layer_delta[i], lr) for i in range(len(layer_delta))]

In [17]:
def back_propagation(x_data, theta, learning_rate):
    theta_one = np.array(theta[0])
    theta_two = np.array(theta[1])

    for i in range(len(x_data)):
        # running forward propagation for getting hypothesis  
        layer_activations = forward_propagation(theta_one, theta_two, x_data[i])

        # getting outputs of hidden and output layer
        output_layer_activation = layer_activations[1]
        hidden_layer_activation = layer_activations[0]

        ############### output layer section ############
        # computing output layer error, it's different for all other layers 
        output_layer_error = output_layer_activation - get_output_vector(i / 500)
        # computing deltas, it will be used for computing new theta (new weights)
        # delta = error * sigmoid(x)dx
        output_layer_delta = compute_layer_delta(output_layer_error, output_layer_activation)
        # correcting theta
        theta_two -= np.array(layer_theta_correction(hidden_layer_activation, output_layer_delta, learning_rate))

        # ############### hidden layer section ############
        hidden_layer_error = compute_layer_error(output_layer_delta, theta_two)
        hidden_layer_delta = compute_layer_delta(hidden_layer_error, hidden_layer_activation)
        theta_one -= np.array(layer_theta_correction(x_data[i], hidden_layer_delta, learning_rate, True))
        
    return [theta_one, theta_two]


In [21]:
loaded_data_x = load_data(abspath('demo/init/coursera/neuralnetwork/data/data_x.csv'))
loaded_data_y = load_data(abspath('demo/init/coursera/neuralnetwork/data/data_y.csv'))
loaded_theta_one = load_data(abspath('demo/init/coursera/neuralnetwork/data/theta1.csv'))
loaded_theta_two = load_data(abspath('demo/init/coursera/neuralnetwork/data/theta2.csv'))

Input data info.
	Features amount: 400, set size 5000
Input data info.
	Features amount: 1, set size 5000
Input data info.
	Features amount: 401, set size 25
Input data info.
	Features amount: 26, set size 10


In [24]:
def check_network(expected_theta, actual_theta, x_data):
    expected = forward_propagation(expected_theta[0], expected_theta[1], x_data)[1]
    actual = forward_propagation(actual_theta[0], actual_theta[1], x_data)[1]
    print('Expected: %d  Actual: %d' % (show_prediction(expected), show_prediction(actual)))
    print('Output: ', list(actual[1:]))

In [34]:
def run():
    features_amount = 400
    hidden_layer_neuron_amount = 25
    hidden_layer_inputs_per_neuron = features_amount + 1
    output_layer_neuron_amount = 10
    output_layer_inputs_per_neuron = hidden_layer_neuron_amount + 1
    
    learning_rate = 0.01
    epochs = 200
    
    computed_thetas = [[], []]
    # initializing thetas (weights of each connection between neurons)
    # weights between input and hidden layer
    computed_thetas[0] = np.random.rand(hidden_layer_neuron_amount, hidden_layer_inputs_per_neuron) / 100
    # weights between hidden and output layer
    computed_thetas[1] = np.random.rand(output_layer_neuron_amount, output_layer_inputs_per_neuron) / 100

    for i in range(epochs):
        if i % 10 == 0:
            print('Epochs left', epochs - i)
            
        computed_thetas = back_propagation(loaded_data_x, computed_thetas, learning_rate) 
        
    return computed_thetas

cool_thetas = run()

for i in range(10):
    check_network([loaded_theta_one, loaded_theta_two], cool_thetas, loaded_data_x[i * 500])

Epochs left 200


Epochs left 190


Epochs left 180


Epochs left 170


Epochs left 160


Epochs left 150


Epochs left 140


Epochs left 130


Epochs left 120


Epochs left 110


Epochs left 100


Epochs left 90


Epochs left 80


Epochs left 70


Epochs left 60


Epochs left 50


Epochs left 40


Epochs left 30


Epochs left 20


Epochs left 10


Expected: 0  Actual: 1
Output:  [  9.86827728e-01   9.60466566e-04   7.90068648e-04   4.54337332e-07
   5.85067841e-03   1.26910517e-03   1.40480399e-03   5.68632203e-05
   2.52586035e-03   1.00130761e-03]
Expected: 1  Actual: 1
Output:  [  9.23534904e-01   7.71012092e-02   3.68133633e-03   3.22595753e-04
   1.09813074e-03   1.63443507e-03   4.13746980e-03   1.09429526e-02
   2.90433429e-04   3.59163879e-03]
Expected: 2  Actual: 2
Output:  [  2.37963051e-04   8.52696956e-01   2.03403673e-04   8.49851518e-05
   6.35927850e-03   1.70590425e-04   1.38252695e-04   2.69786717e-01
   4.25687553e-03   1.70053593e-03]
Expected: 3  Actual: 3
Output:  [  6.21395702e-03   5.99368108e-04   9.90101975e-01   1.64850914e-04
   5.79272659e-03   2.55010992e-06   1.18093531e-03   5.49118699e-02
   2.69313162e-02   2.08443358e-03]
Expected: 4  Actual: 4
Output:  [  4.14678493e-04   2.13135222e-02   8.82159198e-04   9.81866947e-01
   1.39708701e-04   2.60858187e-03   5.26992834e-02   2.08824141e-03
   1.9