In [None]:
# notebook using Julia replicating the Python notebook for Make Your Own Neural Network
# working with the MNIST data set
#
# Quick aside about Julia, by default it will always print the output (or in a notebook output from the last command)
# To stop this we place a semi-colon after the last command
#
# (c) Alex Glaser, 2017
# license is GPLv2

In [172]:
# Whilst you could create a class in Julia, this was a little bit of an advanced topic for someone taking their first steps in Julia
# Instead we stick with functions.
function activate(x)
    return 1./(1+exp(-x))
end
function train(wih, who, lr, inputs, targets)
    # calculate signals into hidden layer
    # matrix multiplication in Julia is done via the "*" command
    hidden_inputs = wih * inputs
    # calculate the signals emerging from hidden layer
    hidden_outputs = activate(hidden_inputs)

    # calculate signals into final output layer
    final_inputs = who * hidden_outputs
    # calculate the signals emerging from final output layer
    final_outputs = activate(final_inputs)

    # output layer error is the (target - actual)
    output_errors = targets - final_outputs
    # hidden layer error is the output_errors, split by weights, recombined at hidden nodes
    # In Julia transposing a matrix (or array) is done by adding a ' at the end of the variable name
    hidden_errors = who' * output_errors

    # update the weights for the links between the hidden and output layers
    # Note in Julia, element-by-element multiplication is done by the ".*" command
    who += lr * (output_errors .* final_outputs .* (1.0 - final_outputs)) * hidden_outputs'

    # update the weights for the links between the input and hidden layers
    wih += lr * (hidden_errors .* hidden_outputs .* (1.0 - hidden_outputs)) * inputs'

    return(wih, who)
    
end
# query the neural network
function query(wih, who, inputs)
    # calculate signals into hidden layer
    hidden_inputs = wih * inputs
    # calculate the signals emerging from hidden layer
    hidden_outputs = activate(hidden_inputs)

    # calculate signals into final output layer
    final_inputs = who * hidden_outputs
    # calculate the signals emerging from final output layer
    final_outputs = activate(final_inputs)

    return(final_outputs)
end;



In [86]:
# number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 10

# learning rate
learning_rate = 0.1;

In [87]:
# create instance of neural network
# As we have not created a class for the neural network each element is created individually
inodes = input_nodes
hnodes = hidden_nodes
onodes = output_nodes
lr = learning_rate
# As randn generates numbers from a standard normal distribution we have to multiply by the standard deviation 
wih = randn(hnodes,inodes) * hnodes^(-0.5)
who = randn(onodes,hnodes) * onodes^(-0.5);
# There are various add-on packages in Julia that do random sampling from more complex distributions, but we leave that for some other time.

In [159]:
# load the mnist training data CSV file into a list
#training_data_file = open("mnist_dataset/mnist_train_100.csv", 'r')
training_data_file = open("mnist_dataset/mnist_train.csv", "r")
training_data_list = readlines(training_data_file)
close(training_data_file)

In [None]:
# train the neural network
# epochs is the number of times the training data set is used for training
epochs = 5
for e in 1:epochs
    # go through all records in the training data set
    for record in training_data_list
        # split the record by the ',' commas
        interim = split(record, ",")
        # to convert the variable 'interim' into integers create an initial array (or length 785) and use a for loop to convert
        all_values = Array(Int64, 785)
        for i in 1:785
            all_values[i] = parse(Float64, interim[i])
        end
        # scale and shift the inputs
        inputs = all_values[2:end] / 255.0 * 0.99 + 0.01
        # create the target output values (all 0.01, except the desired label which is 0.99)
        targets = zeros(output_nodes) + 0.01
        # all_values[1] is the target label for this record
        targets[all_values[1]+1] = 0.99
        wih, who = train(wih, who, lr, inputs, targets)
    end
end

In [179]:
# load the mnist training data CSV file into a list
#training_data_file = open("mnist_dataset/mnist_train_100.csv", 'r')
test_data_file = open("mnist_dataset/mnist_test.csv", "r")
test_data_list = readlines(test_data_file)
close(test_data_file)

In [183]:
# test the neural network

# scorecard for how well the network performs, initially empty
scorecard = []

# go through all the records in the test data set
for record in test_data_list
    # split the record by the ',' commas
    interim = split(record, ',')
    all_values = Array(Int64, 785)
    for i in 1:785
        all_values[i] = parse(Float64, interim[i])
    end
    # correct answer is first value
    correct_label = all_values[1]
    # scale and shift the inputs
    inputs = all_values[2:end] / 255.0 * 0.99 + 0.01
    # query the network
    outputs = query(wih, who, inputs)
    # the index of the highest value corresponds to the label
    # In Julia findmax returns two values; the maximum value and the first index of this maximum value
    label = findmax(outputs)[2] - 1
    # append correct or incorrect to list
    # Julia does have an append command (thoug it has an eclamation mark at the end, e.g. append!(scorecard, 1)
    # Another way of concatenation is to separate the elements using the semi-colon, as below.
    if (label == correct_label)
        # network's answer matches correct answer, add 1 to scorecard
        scorecard = [scorecard; 1];
    else
        # network's answer doesn't match correct answer, add 0 to scorecard
        scorecard = [scorecard; 0];
    end
end

In [184]:
# calculate the performance score, the fraction of correct answers
println("performance = ", sum(scorecard) / size(scorecard)[1])

performance = 0.9736
