In [1]:
abstract Layer

# append vertical
function appendColumnOfOnes(a::Array{Float64,2})
  vcat(a,ones(1,size(a ,2)))
end

# params: horizontal
# input: vertical
function sigmoidNeuronTransformFunction(params, input)
  return 1.0 ./ (1.0 .+ exp(-params * appendColumnOfOnes(input)))
end

function linearNeuronTransformFunction(params, input)
  return params * appendColumnOfOnes(input)
end

function exponentialNormalizer(params, input)
  denominator = sum(exp(input),1)
  return exp(input) ./ denominator
end

type FullyConnectedComputingLayer <: Layer
  inputSize::Int64
  numberOfNeurons::Int64
  parameters::Array{Float64,2}
  transform::Function
  derivative::Function # derivative added here

  function FullyConnectedComputingLayer(inputSize, numberOfNeurons, transform::Function, derivative::Function)
    parameters = randn(numberOfNeurons, inputSize + 1)  * 0.1 # adding one param column for bias
    return new(inputSize, numberOfNeurons, parameters, transform, derivative)
  end
end

type SoftMaxLayer <: Layer
  numberOfNeurons::Int64
  parameters::Any
  transform::Function

  function SoftMaxLayer(numberOfNeurons)
    return new(numberOfNeurons, [], exponentialNormalizer)
  end
end

type NetworkArchitecture
  layers::Array{Layer}
  function NetworkArchitecture(firstLayer::Layer)
    return new([firstLayer])
  end
end

function addSoftMaxLayer(architecture::NetworkArchitecture)
 lastNetworkLayer = architecture.layers[end]
 numberOfNeurons = lastNetworkLayer.numberOfNeurons
 softMaxLayer = SoftMaxLayer(numberOfNeurons)
 push!(architecture.layers, softMaxLayer)
end

function addFullyConnectedSigmoidLayer(arch::NetworkArchitecture, numberOfNeurons::Int64)
 lastNetworkLayer = arch.layers[end]
 inputSize = lastNetworkLayer.numberOfNeurons
 #derivative added
 sigmoidLayer = FullyConnectedComputingLayer(inputSize, numberOfNeurons, sigmoidNeuronTransformFunction, x -> x .* (1 - x))
 push!(arch.layers, sigmoidLayer)
end

function addFullyConnectedLinearLayer(architecture::NetworkArchitecture, numberOfNeurons::Int64)
 lastNetworkLayer = architecture.layers[end]
 inputSize = lastNetworkLayer.numberOfNeurons
 linearLayer = FullyConnectedComputingLayer(inputSize, numberOfNeurons, linearNeuronTransformFunction, x -> 1)
 push!(architecture.layers, linearLayer)
end

function infer(architecture::NetworkArchitecture, input)
  currentResult = input
  for i in 1:length(architecture.layers)
     layer = architecture.layers[i]
     currentResult = layer.transform(layer.parameters, currentResult)
  end
  return currentResult
end

function crossEntropyError(architecture::NetworkArchitecture, input, labels)
 probabilitiesSparseMatrix = infer(architecture, input) .* labels
 probabilities = sum(probabilitiesSparseMatrix , 1)
 return -mean(log(probabilities))
end

type BackPropagationBatchLearningUnit
  networkArchitecture::NetworkArchitecture
  dataBatch::Array{Float64,2}
  labels::AbstractSparseMatrix
  outputs::Array{Array{Float64,2}} # outputs remembered now
  deltas::Array{Array{Float64,2}} # deltas kept here

  function BackPropagationBatchLearningUnit(arch::NetworkArchitecture, dataBatch::Array{Float64,2}, labels::AbstractSparseMatrix)
     outputs = [ zeros(l.numberOfNeurons, size(dataBatch,2)) for l in arch.layers ]
     deltas = [ zeros(l.numberOfNeurons, size(dataBatch,2)) for l in arch.layers ]
     return new(arch, dataBatch, labels, outputs, deltas)
  end
end

function forwardPass!(learningUnit::BackPropagationBatchLearningUnit)
  currentResult = learningUnit.dataBatch
  for i in 1:length(learningUnit.networkArchitecture.layers)
     layer = learningUnit.networkArchitecture.layers[i]
     currentResult = layer.transform(layer.parameters, currentResult)
     learningUnit.outputs[i]  = currentResult
  end
end

function backwardPass!(learningUnit::BackPropagationBatchLearningUnit)

  layer = learningUnit.networkArchitecture.layers[end-1]
  learningUnit.deltas[end-1]  = layer.derivative(learningUnit.outputs[end-1]) .*  (learningUnit.outputs[end] - learningUnit.labels)

  for i in 2:(length(learningUnit.networkArchitecture.layers) - 1)
      higherLayer = learningUnit.networkArchitecture.layers[end - i + 1]
      currentLayer = learningUnit.networkArchitecture.layers[end - i]
      learningUnit.deltas[end-i] = currentLayer.derivative(learningUnit.outputs[end-i]) .* (transpose(higherLayer.parameters[:,(1:end-1)]) * learningUnit.deltas[end - i + 1])
  end
end

function updateParameters!(unit::BackPropagationBatchLearningUnit, learningRate)
  forwardPass!(unit)
  backwardPass!(unit)
  derivativeW= (unit.deltas[1] * transpose(unit.dataBatch)) / size(unit.dataBatch,2);
  unit.networkArchitecture.layers[1].parameters[:,1:(end-1)] = unit.networkArchitecture.layers[1].parameters[:,1:(end-1)] - learningRate * derivativeW;
  derivativeB = mean(unit.deltas[1],2);
  unit.networkArchitecture.layers[1].parameters[:,end] =  unit.networkArchitecture.layers[1].parameters[:,end] - learningRate * derivativeB;
  for i in 2:(length(unit.networkArchitecture.layers) - 1)
    derivativeW = (unit.deltas[i] * transpose(unit.outputs[i-1])) / size(unit.dataBatch,2);
    unit.networkArchitecture.layers[i].parameters[:,1:(end-1)] = unit.networkArchitecture.layers[i].parameters[:,1:(end-1)] - learningRate * derivativeW;
    derivativeB = mean(unit.deltas[i],2);
    unit.networkArchitecture.layers[i].parameters[:,end] =  unit.networkArchitecture.layers[i].parameters[:,end] - learningRate * derivativeB;
  end
end

# helper to build SoftMax architecture
function buildNetworkArchitectureSoftMax(sizes)
  firstLayer = FullyConnectedComputingLayer(sizes[1], sizes[2], linearNeuronTransformFunction, x -> 1);
  architecture = NetworkArchitecture(firstLayer);
  addSoftMaxLayer(architecture)
  return(architecture)
end

# helper to build an architecture with hidden sigmoid layers
function buildNetworkArchitectureWithOneHiddenSigmoids(sizes)
  firstLayer = FullyConnectedComputingLayer(sizes[1], sizes[2], sigmoidNeuronTransformFunction, x -> x .* (1 - x));
  architecture = NetworkArchitecture(firstLayer);
  for i in 3:(length(sizes)-1)
    addFullyConnectedSigmoidLayer(architecture, sizes[i]);
  end
  addFullyConnectedLinearLayer(architecture, sizes[end]);
  addSoftMaxLayer(architecture)
  return(architecture)
end

buildNetworkArchitectureWithOneHiddenSigmoids (generic function with 1 method)

In [2]:
firstLayer = FullyConnectedComputingLayer(784, 100, sigmoidNeuronTransformFunction);
architecture = NetworkArchitecture(firstLayer);
addFullyConnectedSigmoidLayer(architecture, 50);
addFullyConnectedSigmoidLayer(architecture, 30);
addFullyConnectedLinearLayer(architecture, 9);
addSoftMaxLayer(architecture);

outputVector = infer(architecture , randn(784,1)) # we try a single vector now
sum(outputVector)


LoadError: MethodError: no method matching FullyConnectedComputingLayer(::Int64, ::Int64, ::#sigmoidNeuronTransformFunction)[0m
Closest candidates are:
  FullyConnectedComputingLayer(::Any, ::Any, ::Function, [1m[31m::Function[0m) at In[1]:31
  FullyConnectedComputingLayer{T}(::Any) at sysimg.jl:53[0m

In [3]:
firstLayer = FullyConnectedComputingLayer(784, 100, sigmoidNeuronTransformFunction)
architecture = NetworkArchitecture(firstLayer)
addFullyConnectedSigmoidLayer(architecture, 50)
addFullyConnectedSigmoidLayer(architecture, 10)
addFullyConnectedSigmoidLayer(architecture, 5)
infer(architecture , randn(784,1)) # we try a single vector now


LoadError: MethodError: no method matching FullyConnectedComputingLayer(::Int64, ::Int64, ::#sigmoidNeuronTransformFunction)[0m
Closest candidates are:
  FullyConnectedComputingLayer(::Any, ::Any, ::Function, [1m[31m::Function[0m) at In[1]:31
  FullyConnectedComputingLayer{T}(::Any) at sysimg.jl:53[0m

In [6]:
(x -> x .* (1 - x))(2)

-2

In [5]:
x(2)

LoadError: UndefVarError: x not defined

In [12]:
firstLayer = FullyConnectedComputingLayer(784, 100, sigmoidNeuronTransformFunction, x -> x .* (1 - x))
architecture = NetworkArchitecture(firstLayer)
addFullyConnectedSigmoidLayer(architecture, 50)
addFullyConnectedSigmoidLayer(architecture, 10)
addFullyConnectedSigmoidLayer(architecture, 5)
infer(architecture , randn(784,1)) # we try a single vector now

5×1 Array{Float64,2}:
 0.419535
 0.401596
 0.538738
 0.604009
 0.513639

In [13]:
architecture = buildNetworkArchitectureWithOneHiddenSigmoids([4,128,20, 5]) # 50 neurons in a hidden layer now 

NetworkArchitecture(Layer[FullyConnectedComputingLayer(4,128,[-0.0404926 -0.0428208 … 0.097584 -0.00302284; 0.094352 -0.0578039 … -0.00680203 0.177206; … ; 0.00127318 0.0742024 … -0.204414 0.0700946; -0.0794905 0.0466298 … -0.0695677 0.0198188],sigmoidNeuronTransformFunction,#11),FullyConnectedComputingLayer(128,20,[0.0503732 0.306608 … 0.0590196 0.0322511; -0.0874081 0.186486 … 0.0992642 -0.0672814; … ; 0.148788 0.0063104 … -0.0334107 0.0506905; -0.0909246 0.000661362 … 0.114618 0.0169652],sigmoidNeuronTransformFunction,#1),FullyConnectedComputingLayer(20,5,[-0.111164 0.18771 … -0.10185 0.0951952; 0.0516701 -0.155717 … 0.0280639 -0.13353; … ; 0.0142033 -0.0211878 … -0.145402 0.10406; 0.190332 -0.147278 … 0.146077 0.184218],linearNeuronTransformFunction,#3),SoftMaxLayer(5,Any[],exponentialNormalizer)])