In [1]:
mutable struct NeuralNetwork
    weights::Array
    biases::Array
end

In [2]:
function createNetwork(layers::Tuple)::NeuralNetwork
    network = NeuralNetwork([],[])
    for i in 1:length(layers)-1
        weightMatrix = randn(layers[i+1],layers[i])
        push!(network.weights,weightMatrix)
        biasVector = randn(layers[i+1])
        push!(network.biases,biasVector)
    end
    return network
 end

createNetwork (generic function with 1 method)

In [23]:
# Function takes a network and feeds an input vector through
# to return lists of activations and z = (W*a + b)
function feedForward(network::NeuralNetwork, input::Array)
    
    a = input
    activations = [a]
    zList = []
    
    for (W,b) in zip(network.weights, network.biases)
        z = W*a + b
        push!(zList, z)
        a = sigmoid.(z)
        push!(activations, a)
    end
    
    return activations, zList
end

# Function passes single input through network and returns answer
function ffInput(network::NeuralNetwork, input::Array)::Array
    a = input
    for (W,b) in zip(network.weights, network.biases)
        a = sigmoid.(W*a + b)
    end
    return a
end

ffInput (generic function with 1 method)

In [18]:
# function takes an image label number and creates a 10x1 vector 
# with a 1 in the position of the number
 
function labelToVector(label::Int64)::Array
    labelVector = zeros(10)
    labelVector[label + 1] = 1
    return labelVector
end

labelToVector (generic function with 1 method)

In [22]:
sigmoid(x::Number) = 1/(1 + exp(-x))
sigmoidPrime(x::Number) = sigmoid(x)*(1-sigmoid(x))

sigmoidPrime (generic function with 1 method)

In [16]:
using LinearAlgebra

# Function takes a network, input, and label to compute gradients
# for the network's weights and biases and returns these gradients 
# in lists

function backPropagation(network::NeuralNetwork, input::Array, label::Int64)
    label = labelToVector(label)
    nabla_w = [] # Array to hold weight gradients
    nabla_b = [] # Array to hold bias gradients
    aList, zList = feedForward(network, input)
    delta = (aList[end] - label) .* sigmoidPrime.(zList[end])
    pushfirst!(nabla_b, delta)
    wDelta = delta * aList[end - 1]'
    pushfirst!(nabla_w, wDelta)
    
    for i in 0:length(network.weights)-2
        delta = (net.weights[end - i]' * delta) .* sigmoidPrime.(zList[end - i - 1])
        pushfirst!(nabla_b, delta)
        wDelta = delta * aList[end - i - 2]'
        pushfirst!(nabla_w, wDelta)
    end
    
    return nabla_b, nabla_w
end

backPropagation (generic function with 1 method)

In [11]:
function updateMiniBatch!(network::NeuralNetwork, miniBatch::Array, eta::Number)
    m = length(miniBatch)
    nablaB = [zero(b) for b in network.biases]
    nablaW = [zero(w) for w in network.weights]
    for (input, label) in miniBatch
        deltaB, deltaW = backPropagation(network, input, label)
        nablaB = [nb+dnb for (nb, dnb) in zip(nablaB, deltaB)]
        nablaW = [nw+dnw for (nw, dnw) in zip(nablaW, deltaW)]
    end
    network.weights = [W-(eta/m)*nW for (W, nW) in zip(network.weights,nablaW)]
    network.biases = [b-(eta/m)*nb for (b, nb) in zip(network.biases,nablaB)]
end

updateMiniBatch! (generic function with 1 method)

In [20]:
# Function returns the number of test inputs for which the network
# outputs the correct result

function evaluate(network::NeuralNetwork, testData::Array)::Number
    testResults = [(argmax(ffInput(network,input)),label) for (input,label) in testData]
    return sum(Int64(x==y) for (x,y) in testResults)
end

evaluate (generic function with 1 method)

In [21]:
using Random

# Function performs stochastic gradient descent
function SGD!(network::NeuralNetwork, trainingData::Array, testData::Array, epochs::Number, miniBatchSize::Number, eta::Number)
    n = length(trainingData)
    nTest = length(testData)
    for i in 1:epochs
        shuffle!(trainingData)
        miniBatches = [trainingData[k:k+miniBatchSize-1] for k in 1:miniBatchSize:n]
        for mb in miniBatches
            updateMiniBatch!(network,mb,eta)
        end
        numCorrect = evaluate(network,testData)
        println("Epoch $i: $numCorrect/$nTest")
    end
end

SGD! (generic function with 1 method)

In [7]:
# Function converts mnist data into workable format
function mnistConverter(inputs,labels)
    vectors = []
    for i in 1:size(inputs,3)
        push!(vectors,convert(Array{Float64,1},vec(inputs[:, :, i])))
    end
    return collect(zip(vectors,convert(Array{Int64,1},labels)))
end

mnistConverter (generic function with 1 method)

In [5]:
using MLDatasets
train_x, train_y = MNIST.traindata()
test_x,  test_y  = MNIST.testdata()

(FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

...

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [7, 2, 1, 0, 4, 1, 4, 9, 5, 9  …  7, 8, 9, 0, 1, 2, 3, 4, 5, 6])

In [8]:
trainingData = mnistConverter(train_x,train_y)
testData = mnistConverter(test_x,test_y)

10000-element Array{Tuple{Any,Int64},1}:
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 7)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 2)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 1)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 0)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 4)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 1)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 4)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 9)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0

In [13]:
trainingData[1][2]

5

In [9]:
net = createNetwork((784,30,10))

NeuralNetwork(Any[[-0.340692 0.0299267 … -0.0349842 -0.0463948; -0.79672 0.770344 … 0.393467 -0.0573069; … ; -0.459227 -0.329184 … -0.31305 -0.435092; -0.807036 0.567519 … -2.08624 0.126429], [-2.14727 0.334129 … 0.596879 -1.08919; -0.684495 -0.409773 … -0.450078 -0.634077; … ; -1.82799 1.78653 … 1.16692 -1.15122; 0.493381 0.204793 … -0.824854 0.547131]], Any[[-0.183656, 0.608764, -0.0441905, -0.568904, -0.616052, -0.228219, -0.778119, -0.168107, 0.524407, -0.168679  …  -0.263613, 0.707805, -0.344857, 0.86762, -1.24668, -1.89003, 0.102931, 2.51901, -0.695611, 1.18696], [0.164216, 0.0498595, -0.27577, 0.284605, -0.891043, 0.115293, -0.37611, -0.920316, -0.496749, 1.35963]])

In [25]:
SGD!(net,trainingData,testData,5,10,100.0)

Epoch 1: 654/10000
Epoch 2: 1075/10000
Epoch 3: 1032/10000
Epoch 4: 1032/10000
Epoch 5: 1032/10000
