In [4]:
using BenchmarkTools
using Test
using Profile
using Traceur
using LinearAlgebra
using Random
#using CuArrays

In [2]:
using Pkg
Pkg.build("CUDAdrv")

[32m[1m  Building[22m[39m CUDAdrv → `C:\Users\Andre\.julia\packages\CUDAdrv\lu32K\deps\build.log`


┌ Error: Error building `CUDAdrv`: 
│ Initializing CUDA driver failed: unknown error (code 999)..
└ @ Pkg.Operations C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Pkg\src\Operations.jl:1097


In [3]:
mutable struct NeuralNetwork
    weights::Array
    biases::Array
end

In [3]:
function createNetwork(layers::Tuple)::NeuralNetwork
    network = NeuralNetwork([],[])
    for i in 1:length(layers)-1
        weightMatrix = randn(layers[i+1],layers[i])
        push!(network.weights,weightMatrix)
        biasVector = randn(layers[i+1])
        push!(network.biases,biasVector)
    end
    return network
 end

createNetwork (generic function with 1 method)

In [5]:
# Function takes a network and feeds an input vector through
# to return lists of activations and z = (W*a + b)
function feedForward(network::NeuralNetwork, x::Array)
    activations = [x]
    zList = []

    for (W,b) in zip(network.weights, network.biases)
        z = W*x + b
        push!(zList, z)
        x = sigmoid.(z)
        push!(activations, x)
    end

    return activations, zList
end

# Function passes single input through network and returns answer
function ffInput(network::NeuralNetwork, x::Array)::Array
    for (W,b) in zip(network.weights, network.biases)
        x = sigmoid.(W*x + b)
    end
    return x
end

ffInput (generic function with 1 method)

In [6]:
# function takes an image label number and creates a 10x1 vector 
# with a 1 in the position of the number
 
function labelToVector(label::Int64)::Array
    labelVector = zeros(10)
    labelVector[label + 1] = 1
    return labelVector
end

labelToVector (generic function with 1 method)

In [7]:
sigmoid(x::Number) = 1/(1 + exp(-x))
sigmoidPrime(x::Number) = sigmoid(x)*(1-sigmoid(x))

sigmoidPrime (generic function with 1 method)

In [8]:
# Function takes a network, input, and label to compute gradients
# for the network's weights and biases and returns these gradients 
# in lists
function backPropagation(network::NeuralNetwork, input::Array, label::Int64)
    label = labelToVector(label)
    nabla_w = [] # Array to hold weight gradients
    nabla_b = [] # Array to hold bias gradients
    aList, zList = feedForward(network, input)
    delta = (aList[end] - label) .* sigmoidPrime.(zList[end])
    pushfirst!(nabla_b, delta)
    wDelta = delta * aList[end - 1]'
    pushfirst!(nabla_w, wDelta)

    for i in 0:length(network.weights)-2
        delta = (net.weights[end - i]' * delta) .* sigmoidPrime.(zList[end - i - 1])
        pushfirst!(nabla_b, delta)
        wDelta = delta * aList[end - i - 2]'
        pushfirst!(nabla_w, wDelta)
    end
    
    return nabla_b, nabla_w
end

#Function does the same as above with cross entropy instead of quadratic cost
function backPropagation2(network::NeuralNetwork, input::Array, label::Int64)
    label = labelToVector(label)
    nabla_w = [] # Array to hold weight gradients
    nabla_b = [] # Array to hold bias gradients
    aList, zList = feedForward(network, input)
    delta = crossEntropy.(aList[end], label) .* (aList[end]-)
    pushfirst!(nabla_b, delta)
    wDelta = delta * aList[end - 1]'
    pushfirst!(nabla_w, wDelta)

    for i in 0:length(network.weights)-2
        delta = (net.weights[end - i]' * delta) .* sigmoidPrime.(zList[end - i - 1])
        pushfirst!(nabla_b, delta)
        wDelta = delta * aList[end - i - 2]'
        pushfirst!(nabla_w, wDelta)
    end
    
    return nabla_b, nabla_w
end

#Function computes cross entropy and returns 0 if calculation results in nan or inf
function crossEntropy(a::Number, y::Number)
    cost = -y*log(a) - (1-y)*log(1-a)
    if isinf(cost) || isnan(cost)
        return 0.0
    else
        return cost
    end
end

backPropagation (generic function with 1 method)

In [9]:
function updateMiniBatch!(network::NeuralNetwork, miniBatch::Array, eta::Number)
    m = length(miniBatch)
    nablaB = [zero(b) for b in network.biases]
    nablaW = [zero(w) for w in network.weights]
    for (input, label) in miniBatch
        deltaB, deltaW = backPropagation(network, input, label)
        nablaB = [nb+dnb for (nb, dnb) in zip(nablaB, deltaB)]
        nablaW = [nw+dnw for (nw, dnw) in zip(nablaW, deltaW)]
    end
    network.weights = [W-(eta/m)*nW for (W, nW) in zip(network.weights,nablaW)]
    network.biases = [b-(eta/m)*nb for (b, nb) in zip(network.biases,nablaB)]
end

updateMiniBatch! (generic function with 1 method)

In [10]:
# Function returns the number of test inputs for which the network
# outputs the correct result

function evaluate(network::NeuralNetwork, testData::Array)::Number
    testResults = [(argmax(ffInput(network,input))-1,label) for (input,label) in testData]
    return sum(Int64(x==y) for (x,y) in testResults)
end

evaluate (generic function with 1 method)

In [12]:
# Function performs stochastic gradient descent
function SGD!(network::NeuralNetwork, trainingData::Array, testData::Array, epochs::Number, miniBatchSize::Number, eta::Number)
    n = length(trainingData)
    nTest = length(testData)
    for i in 1:epochs
        shuffle!(trainingData)
        miniBatches = [trainingData[k:k+miniBatchSize-1] for k in 1:miniBatchSize:n]
        for mb in miniBatches
            updateMiniBatch!(network,mb,eta)
        end
        numCorrect = evaluate(network,testData)
        println("Epoch $i: $numCorrect/$nTest")
    end
end

SGD! (generic function with 1 method)

In [13]:
# Function converts mnist data into workable format
function mnistConverter(inputs,labels)
    vectors = [convert(Array{Float64,1},vec(inputs[:, :, i])) for i in 1:size(inputs,3)]
    return collect(zip(vectors,convert(Array{Int64,1},labels)))
end

mnistConverter (generic function with 1 method)

In [14]:
using MLDatasets
train_x, train_y = MNIST.traindata(1:2)
test_x,  test_y  = MNIST.testdata(1:2)

(FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

FixedPointNumbers.Normed{UInt8,8}[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [7, 2])

In [15]:
trainingData = mnistConverter(train_x,train_y)
testData = mnistConverter(test_x,test_y)

2-element Array{Tuple{Array{Float64,1},Int64},1}:
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 7)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 2)

In [16]:
net = createNetwork((784,30,10))

NeuralNetwork(Any[[1.44444 0.768688 … -0.512279 -0.603474; -1.2145 1.4125 … -0.854687 1.2306; … ; 1.87387 -0.436123 … 0.938041 -0.246723; -1.78963 -0.773634 … 1.03086 -0.972938], [1.26145 -0.580334 … 0.947361 1.01339; 0.502065 -0.310575 … -0.982824 -0.191458; … ; -1.08009 0.00504403 … -1.05233 -0.0585738; 0.400818 0.530568 … 0.765188 -0.838112]], Any[[1.51529, 0.049334, 0.967461, 0.457381, 0.55865, 0.486873, -0.419933, 0.640075, 1.72451, -1.37599  …  0.514081, -0.873241, 1.0894, 0.214044, 1.21756, 1.02496, 0.305114, 0.240682, -0.339459, -0.656797], [0.0387723, -0.340905, 0.202012, 0.261148, 0.45623, 0.658789, 0.0618018, -1.42886, 0.418539, -1.06461]])

In [None]:
SGD!(net,trainingData,testData,1,1,3.0)

In [9]:
isnan(log(0))

false