In [2]:
mutable struct NeuralNetwork
    weights::Array
    biases::Array
end

In [3]:
function createNetwork(layers::Tuple)::NeuralNetwork
    network = NeuralNetwork([],[])
    for i in 1:length(layers)-1
        weightMatrix = randn(layers[i+1],layers[i])
        push!(network.weights,weightMatrix)
        biasVector = randn(layers[i+1])
        push!(network.biases,biasVector)
    end
    return network
 end

createNetwork (generic function with 1 method)

In [15]:
# Function takes a network and feeds an input vector through
# to return lists of activations and z = (W*a + b)
function feedForward(network::NeuralNetwork, input::Array)
    
    a = input
    activations = [a]
    zList = []
    
    for (W,b) in zip(network.weights, network.biases)
        z = W*a + b
        push!(zList, z)
        a = sigmoid.(z)
        push!(activations, a)
    end
    
    return activations, zList
end

# Function passes single input through network and returns answer
function ffInput(network::NeuralNetwork, x::Array)::Array
    for (W,b) in zip(network.weights, network.biases)
        x = sigmoid.(W*x + b)
    end
    return x
end

ffInput (generic function with 1 method)

In [4]:
# function takes an image label number and creates a 10x1 vector 
# with a 1 in the position of the number
 
function labelToVector(label::Int64)::Array
    labelVector = zeros(10)
    labelVector[label + 1] = 1
    return labelVector
end

labelToVector (generic function with 1 method)

In [5]:
sigmoid(x::Number) = 1/(1 + exp(-x))
sigmoidPrime(x::Number) = sigmoid(x)*(1-sigmoid(x))

sigmoidPrime (generic function with 1 method)

In [6]:
using LinearAlgebra

# Function takes a network, input, and label to compute gradients
# for the network's weights and biases and returns these gradients 
# in lists

function backPropagation(network::NeuralNetwork, input::Array, label::Int64)
    label = labelToVector(label)
    nabla_w = [] # Array to hold weight gradients
    nabla_b = [] # Array to hold bias gradients
    aList, zList = feedForward(network, input)
    delta = (aList[end] - label) .* sigmoidPrime.(zList[end])
    pushfirst!(nabla_b, delta)
    wDelta = delta * aList[end - 1]'
    pushfirst!(nabla_w, wDelta)
    
    for i in 0:length(network.weights)-2
        delta = (net.weights[end - i]' * delta) .* sigmoidPrime.(zList[end - i - 1])
        pushfirst!(nabla_b, delta)
        wDelta = delta * aList[end - i - 2]'
        pushfirst!(nabla_w, wDelta)
    end
    
    return nabla_b, nabla_w
end

backPropagation (generic function with 1 method)

In [7]:
function updateMiniBatch!(network::NeuralNetwork, miniBatch::Array, eta::Number)
    m = length(miniBatch)
    nablaB = [zero(b) for b in network.biases]
    nablaW = [zero(w) for w in network.weights]
    for (input, label) in miniBatch
        deltaB, deltaW = backPropagation(network, input, label)
        nablaB = [nb+dnb for (nb, dnb) in zip(nablaB, deltaB)]
        nablaW = [nw+dnw for (nw, dnw) in zip(nablaW, deltaW)]
    end
    network.weights = [W-(eta/m)*nW for (W, nW) in zip(network.weights,nablaW)]
    network.biases = [b-(eta/m)*nb for (b, nb) in zip(network.biases,nablaB)]
end

updateMiniBatch! (generic function with 1 method)

In [8]:
# Function returns the number of test inputs for which the network
# outputs the correct result

function evaluate(network::NeuralNetwork, testData::Array)::Number
    testResults = [(argmax(ffInput(network,input))-1,label) for (input,label) in testData]
    return sum(Int64(x==y) for (x,y) in testResults)
end

evaluate (generic function with 1 method)

In [9]:
using Random

# Function performs stochastic gradient descent
function SGD!(network::NeuralNetwork, trainingData::Array, testData::Array, epochs::Number, miniBatchSize::Number, eta::Number)
    n = length(trainingData)
    nTest = length(testData)
    for i in 1:epochs
        shuffle!(trainingData)
        miniBatches = [trainingData[k:k+miniBatchSize-1] for k in 1:miniBatchSize:n]
        for mb in miniBatches
            updateMiniBatch!(network,mb,eta)
        end
        numCorrect = evaluate(network,testData)
        println("Epoch $i: $numCorrect/$nTest")
    end
end

SGD! (generic function with 1 method)

In [22]:
# Function converts mnist data into workable format
function mnistConverter(inputs,labels)
    vectors = [convert(Array{Float64,1},vec(inputs[:, :, i])) for i in 1:size(inputs,3)]
    return collect(zip(vectors,convert(Array{Int64,1},labels)))
end

mnistConverter (generic function with 1 method)

In [33]:
using MLDatasets
train_x, train_y = MNIST.traindata(1:5)
test_x,  test_y  = MNIST.testdata(1:5)

(FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [7, 2, 1, 0, 4])

In [34]:
trainingData = mnistConverter(train_x,train_y)
testData = mnistConverter(test_x,test_y)

5-element Array{Tuple{Array{Float64,1},Int64},1}:
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 7)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 2)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 1)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 0)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 4)

In [13]:
net = createNetwork((784,30,10))

NeuralNetwork(Any[[0.875315 -0.171041 … 1.53242 0.217799; -0.971041 0.644642 … 0.208482 -0.537842; … ; -1.89493 -0.0719117 … 2.00529 1.6683; -1.66609 -0.646334 … 1.33292 -0.509891], [-0.529806 0.168624 … -1.23484 -0.745847; -0.191047 -1.09056 … -1.11786 -1.01957; … ; -0.179932 -0.883614 … 1.02452 0.183943; 0.339666 -1.11269 … 0.0100294 -0.220737]], Any[[-0.676031, 0.0818956, 0.374857, 1.11205, -1.46125, -0.109917, 0.919051, 0.884505, -0.799572, -0.180999  …  1.66154, 0.323723, -0.887067, -0.5225, -0.152034, -0.431007, 0.603144, -0.490588, -1.97647, -0.479901], [0.69402, -1.46159, -0.0201361, -0.982483, -1.69132, 0.368173, 1.13283, 0.790487, 0.795625, -0.186452]])

In [16]:
SGD!(net,trainingData,testData,1,1000,1.0)

Epoch 1: 1459/10000
