In [1]:
using BenchmarkTools
using Test
using Profile
using Traceur
#using CuArrays

In [2]:
using Pkg
Pkg.build("CUDAdrv")

[32m[1m  Building[22m[39m CUDAdrv → `C:\Users\Andre\.julia\packages\CUDAdrv\lu32K\deps\build.log`


┌ Error: Error building `CUDAdrv`: 
│ Initializing CUDA driver failed: unknown error (code 999)..
└ @ Pkg.Operations C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Pkg\src\Operations.jl:1097


In [2]:
mutable struct NeuralNetwork
    weights::Array
    biases::Array
end

In [3]:
function createNetwork(layers::Tuple)::NeuralNetwork
    network = NeuralNetwork([],[])
    for i in 1:length(layers)-1
        weightMatrix = randn(layers[i+1],layers[i])
        push!(network.weights,weightMatrix)
        biasVector = randn(layers[i+1])
        push!(network.biases,biasVector)
    end
    return network
 end

createNetwork (generic function with 1 method)

In [4]:
# Function takes a network and feeds an input vector through
# to return lists of activations and z = (W*a + b)
function feedForward(network::NeuralNetwork, x::Array)
    activations = [x]
    zList = []

    for (W,b) in zip(network.weights, network.biases)
        z = W*x + b
        push!(zList, z)
        x = sigmoid.(z)
        push!(activations, x)
    end

    return activations, zList
end

# Function passes single input through network and returns answer
function ffInput(network::NeuralNetwork, x::Array)::Array
    for (W,b) in zip(network.weights, network.biases)
        x = sigmoid.(W*x + b)
    end
    return x
end

ffInput (generic function with 1 method)

In [5]:
# function takes an image label number and creates a 10x1 vector 
# with a 1 in the position of the number
 
function labelToVector(label::Int64)::Array
    labelVector = zeros(10)
    labelVector[label + 1] = 1
    return labelVector
end

labelToVector (generic function with 1 method)

In [6]:
sigmoid(x::Number) = 1/(1 + exp(-x))
sigmoidPrime(x::Number) = sigmoid(x)*(1-sigmoid(x))

sigmoidPrime (generic function with 1 method)

In [7]:
using LinearAlgebra

# Function takes a network, input, and label to compute gradients
# for the network's weights and biases and returns these gradients 
# in lists

function backPropagation(network::NeuralNetwork, input::Array, label::Int64)
    label = labelToVector(label)
    nabla_w = [] # Array to hold weight gradients
    nabla_b = [] # Array to hold bias gradients
    aList, zList = feedForward(network, input)
    delta = (aList[end] - label) .* sigmoidPrime.(zList[end])
    pushfirst!(nabla_b, delta)
    wDelta = delta * aList[end - 1]'
    pushfirst!(nabla_w, wDelta)

    for i in 0:length(network.weights)-2
        delta = (net.weights[end - i]' * delta) .* sigmoidPrime.(zList[end - i - 1])
        pushfirst!(nabla_b, delta)
        wDelta = delta * aList[end - i - 2]'
        pushfirst!(nabla_w, wDelta)
    end
    
    return nabla_b, nabla_w
end

backPropagation (generic function with 1 method)

In [8]:
function updateMiniBatch!(network::NeuralNetwork, miniBatch::Array, eta::Number)
    m = length(miniBatch)
    nablaB = [zero(b) for b in network.biases]
    nablaW = [zero(w) for w in network.weights]
    for (input, label) in miniBatch
        deltaB, deltaW = backPropagation(network, input, label)
        nablaB = [nb+dnb for (nb, dnb) in zip(nablaB, deltaB)]
        nablaW = [nw+dnw for (nw, dnw) in zip(nablaW, deltaW)]
    end
    network.weights = [W-(eta/m)*nW for (W, nW) in zip(network.weights,nablaW)]
    network.biases = [b-(eta/m)*nb for (b, nb) in zip(network.biases,nablaB)]
end

updateMiniBatch! (generic function with 1 method)

In [9]:
# Function returns the number of test inputs for which the network
# outputs the correct result

function evaluate(network::NeuralNetwork, testData::Array)::Number
    testResults = [(argmax(ffInput(network,input))-1,label) for (input,label) in testData]
    return sum(Int64(x==y) for (x,y) in testResults)
end

evaluate (generic function with 1 method)

In [10]:
using Random

# Function performs stochastic gradient descent
function SGD!(network::NeuralNetwork, trainingData::Array, testData::Array, epochs::Number, miniBatchSize::Number, eta::Number)
    n = length(trainingData)
    nTest = length(testData)
    for i in 1:epochs
        shuffle!(trainingData)
        miniBatches = [trainingData[k:k+miniBatchSize-1] for k in 1:miniBatchSize:n]
        for mb in miniBatches
            updateMiniBatch!(network,mb,eta)
        end
        numCorrect = evaluate(network,testData)
        println("Epoch $i: $numCorrect/$nTest")
    end
end

SGD! (generic function with 1 method)

In [11]:
# Function converts mnist data into workable format
function mnistConverter(inputs,labels)
    vectors = [convert(Array{Float64,1},vec(inputs[:, :, i])) for i in 1:size(inputs,3)]
    return collect(zip(vectors,convert(Array{Int64,1},labels)))
end

mnistConverter (generic function with 1 method)

In [12]:
using MLDatasets
train_x, train_y = MNIST.traindata()
test_x,  test_y  = MNIST.testdata()

(FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

...

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [7, 2, 1, 0, 4, 1, 4, 9, 5, 9  …  7, 8, 9, 0, 1, 2, 3, 4, 5, 6])

In [13]:
trainingData = mnistConverter(train_x,train_y)
testData = mnistConverter(test_x,test_y)

10000-element Array{Tuple{Array{Float64,1},Int64},1}:
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 7)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 2)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 1)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 0)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 4)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 1)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 4)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 9)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 

In [14]:
net = createNetwork((784,30,10))

NeuralNetwork(Any[[-0.61827 -0.00645018 … 0.506456 0.311127; 1.06463 -2.10176 … -0.471339 1.99114; … ; 0.502761 -1.24851 … 1.2198 -0.283598; -0.872706 0.446547 … -0.412292 -1.0104], [0.416856 2.07815 … -0.352516 1.01687; -0.102343 0.981661 … -0.389077 1.19999; … ; -0.37257 0.722652 … 0.652613 0.398809; 0.320502 0.396477 … 0.0241236 -0.254712]], Any[[-0.901046, 0.171992, -0.53581, -0.301479, 0.0447115, -0.519136, -1.67725, 1.244, -0.947674, 1.03363  …  -0.690993, -0.178006, 0.79196, -2.81532, -0.309629, 2.76725, -0.302805, 1.04048, -2.01615, 1.55649], [-0.905893, -0.35701, 1.42227, -0.0506743, -0.296568, -0.165196, 1.31754, 1.60571, 0.988851, -0.668661]])

In [18]:
@btime SGD!(net,trainingData,testData,1,10,3.0)

Epoch 1: 9351/10000
Epoch 1: 9386/10000
Epoch 1: 9401/10000
Epoch 1: 9437/10000
  38.280 s (4292188 allocations: 24.82 GiB)
