In [1]:
using LinearAlgebra

In [2]:
h(x) = exp(-x)
h′(x,y) = -y
𝓁(x,y) = sum(abs2,x-y)/2
𝓁′(x,y) = x-y
init(sizes...) = 0.1randn(sizes...)

init (generic function with 1 method)

In [3]:
𝜀 = .0001
n = [5,4,3,1]
N = length(n)-1
B = 7

7

### Scalar Neural Network

In [4]:
function neural_net(params, input; h=h, h′=h′, N=length(params))
    δ = [];
    X = [input];
    for i=1:N
        x = sum(params[i] .* [X[i],1])
        push!(X,h(x))
        push!(δ, h′.(x,X[i+1]))
    end
    return X,δ
end


neural_net (generic function with 1 method)

In [5]:
params =[[init(),init()] for i=1:N] # W and B
x,y = init(),init() # input and output

(0.021209288237944407, 0.045845020680530646)

In [6]:
X,δ = neural_net(params,x)
L   = Bidiagonal(zeros(N),[δ[i] * params[i][1] for i=2:N],:L)
D   = Diagonal(δ.*[[X[i],1]' for i=1:N])
f   = [zeros(N-1);𝓁′(X[N+1],y)]
∇J  = D'*((I-L')\f);

In [7]:
∇Jfd = ∇J * 0
ϵ    = ∇J * 0
for i=1:N, j=1:2       
    ϵ[i][j] = 𝜀
    ∇Jfd[i][j]=(𝓁(neural_net(params.+ϵ,x)[1][N+1],y)-𝓁(neural_net(params.-ϵ,x)[1][N+1],y))/2𝜀
    ϵ[i][j] = .0
end
∇Jfd

3-element Array{Array{Float64,1},1}:
 [-0.000121919, -0.00574836]
 [0.04491, 0.0436309]       
 [-0.613332, -0.761998]     

### Simple Matrix Neural Network
- [] needed to create a box type because operations on matrices of abstract types causes problems

In [8]:
import Base: +,-,*,/,zero,one,adjoint,convert,inv,size,iszero,transpose,length

abstract type Map; end

struct RM <: Map; A; end
-(K::RM) = RM(-K.A)
*(K::RM, X::Union{AbstractArray,Number}) =  X * K.A
adjoint(K::RM) = RM(K.A')

struct (⦿) <: Map; A; B; end
*(C::⦿, X::Union{AbstractArray,Number}) = C.A*(C.B*X)
adjoint(K::⦿) = K.B' ⦿ K.A'
-(K::⦿) = -K.A ⦿ K.B

struct (Δ)   <: Map; A; end
struct Zero  <: Map; end # zero of the vector space
struct One   <: Map; sign::Bool; end # identity map of the vector space

-(X::Δ) = Δ(-X.A)
*(X::Δ,Y::Union{AbstractArray,Number}) = X.A .* Y
*(Y::Union{AbstractArray,Number},X::Δ) = Y .* X.A
adjoint(X::Δ) = Δ(X.A)

*(X::Zero,Y) = Zero()
*(Y,X::Zero) = Zero()
*(::Zero,::Zero) = Zero()

+(X::Zero,Y) = Y
+(Y,X::Zero) = X+Y
+(::Zero,::Zero) = Zero()

-(X::Zero)   = X
-(X::Zero,Y) = -Y
-(Y,X::Zero) = Y
-(::Zero,::Zero) = Zero()
adjoint(X::Zero) = X

*(Y,X::One) = X.sign ?  Y : -Y
*(X::One,Y) = Y*X
-(X::One) = One(!X.sign)
One() = One(true)
adjoint(X::One) = X

iszero(X::Map) = isa(X,Zero)

iszero (generic function with 12 methods)

In [9]:
struct Box; X; end

value(R::Box) = R.X
value(A::Array) = value.(A)
zero(::Type{Box}) = Box(Zero())
zero(::Box) = zero(Box)
iszero(R::Box) = isa(value(R),Zero)
one(::Type{Box})  = Box(One())
one(::Box) = one(Box)
adjoint(R::Box)   = Box(adjoint(value(R)))
inv(R::Box) = Box(inv(value(R)))
convert(::Type{Box},x) = Box(x)
convert(::Type{Box},x::Box) = x

*(R::Box, X) = Box(value(R) * X)
*(X, R::Box) = Box(X * value(R))
*(R1::Box, R2::Box) = Box(value(R1) * value(R2))

-(R::Box)    = Box(-value(R))
-(R::Box, X) = Box(value(R)-X)
-(X,R::Box)  = Box(X-value(R))
-(R1::Box,R2::Box) = Box(value(R1) - value(R2))

+(R1::Box,R2::Box) = Box(value(R1) + value(R2))

/(X,R::Box)  = Box(X*inv(R))

array(x) = fill(x,1,1)

array (generic function with 1 method)

In [19]:
function neural_net(params,input;h=h,h′= h′)
    X     = [input]
    δ     = []
    for i=1:length(params)
        x = params[i][1]*X[i] .+ params[i][2]         
        push!(X,h.(x))
        push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end


neural_net (generic function with 1 method)

In [20]:
params =[[init(n[i+1],n[i]),init(n[i+1])] for i=1:N]
x, y = init(n[1],B), init(1,B)

([0.0672977 0.0504554 … -0.0161737 -0.230778; 0.000407993 0.0147234 … 0.00234037 -0.00571761; … ; -0.0744498 0.139966 … -0.0630036 0.191786; 0.136124 0.0865397 … 0.0344445 -0.0481269], [-0.0741631 0.09068 … -0.0214639 0.0668393])

In [21]:
X,δ = neural_net(params,x)
D = Diagonal([[Δ(δ[i]) ⦿ RM(X[i]) Δ(δ[i])] for i=1:N])
L = Bidiagonal(zeros(Box,N), [Box(RM(Δ(δ[i])) ⦿ params[i][1]) for i=2:N] , :L)
f = [[zero(Box) for i=1:N-1]; Box(𝓁′(X[N+1],y))]
∇J = D'*array.((UnitUpperTriangular(-L')\f))

3-element Array{Array{Any,2},1}:
 [Box([0.00618882 0.00361767 … -0.00908024 -0.00422349; 0.0132648 0.00733459 … -0.0193456 -0.00852639; 0.00908938 0.0051391 … -0.0127821 -0.00576116; 0.00313324 0.00182513 … -0.00457784 -0.00218063]); Box([-0.0189083 -0.0175019 … -0.0181811 -0.0174098; -0.038867 -0.0361953 … -0.0365819 -0.0371096; -0.0269447 -0.0229572 … -0.0263581 -0.0254893; -0.0098281 -0.00887297 … -0.00946017 -0.00884129])]
 [Box([-0.460535 -0.53776 -0.449085 -0.429307; -1.54289 -1.80172 -1.50454 -1.43826; -1.57203 -1.83583 -1.53297 -1.4654]); Box([-0.0680285 -0.0603105 … -0.0655664 -0.0614248; -0.227591 -0.202145 … -0.218949 -0.206563; -0.231238 -0.205986 … -0.22242 -0.211182])]                                                                                                                                         
 [Box([-14.7748 -13.7827 -15.2145]); Box([-1.95044 -1.7291 … -1.87804 -1.7641])]                                                                                           

In [22]:
∇Jfd = params*0
ϵ=params*0
for i=1:length(params), wb=1:2
    for j=1:length(ϵ[i][wb])
        ϵ[i][wb][j] = 𝜀
        ∇Jfd[i][wb][j] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
        ϵ[i][wb][j] = .0
     end
end
∇Jfd

3-element Array{Array{Array{Float64,N} where N,1},1}:
 [[0.00618882 0.00361767 … -0.00908024 -0.00422349; 0.0132648 0.00733459 … -0.0193456 -0.00852639; 0.00908938 0.0051391 … -0.0127821 -0.00576116; 0.00313324 0.00182513 … -0.00457784 -0.00218063], [-0.124568, -0.258205, -0.176349, -0.063644]]
 [[-0.460535 -0.53776 -0.449085 -0.429307; -1.54289 -1.80172 -1.50454 -1.43826; -1.57203 -1.83583 -1.53297 -1.4654], [-0.441199, -1.47809, -1.50599]]                                                                                            
 [[-14.7748 -13.7827 -15.2145], [-12.6528]]                                                                                                                                                                                                      

In [23]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
 0.00618882  0.00361767  0.0100704   -0.00908024  -0.00422349
 0.0132648   0.00733459  0.0206598   -0.0193456   -0.00852639
 0.00908938  0.0051391   0.0138016   -0.0127821   -0.00576116
 0.00313324  0.00182513  0.00512099  -0.00457784  -0.00218063

In [24]:
value(∇J[1][1])

4×5 Array{Float64,2}:
 0.00618882  0.00361767  0.0100704   -0.00908024  -0.00422349
 0.0132648   0.00733459  0.0206598   -0.0193456   -0.00852639
 0.00908938  0.0051391   0.0138016   -0.0127821   -0.00576116
 0.00313324  0.00182513  0.00512099  -0.00457784  -0.00218063

### Densely Connected Matrix Network

In [13]:
function neural_net(params,input;h=h, h′= h′)
    X     = [input]
    δ     = []
    for i=1:length(params)
       x = broadcast(+,(params[i] .* [X;One()])...)
       push!(X,h.(x))
       push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end
array(x) = fill(x,1,1);

In [14]:
params = [[j==i+1 ?  init(n[i+1],1) : init(n[i+1],n[j])  for j=1:i+1] for i=1:N]
x,y = init(n[1],B), init(1,B);

In [15]:
X,δ = neural_net(params,x)
D = Diagonal([[[ (Δ(δ[i]) ⦿ RM(X[j]))' for j=1:i]' Δ(δ[i])] for i=1:N])
L = LowerTriangular(zeros(Box,N,N)) 
for i=2:N, j=1:i-1
    L[i,j] = Box(RM(Δ(δ[i])) ⦿ params[i][j+1]) 
end
g  = [[zero(Box) for i=1:N-1]; Box(𝓁′(X[N+1],y))]
∇J = D'*array.(UnitUpperTriangular(-L')\g)

3-element Array{Array{Any,2},1}:
 [Box([-0.0128427 0.0198229 … -0.0126271 -0.0135073; 0.00629759 -0.0103409 … 0.00734165 0.00739555; 0.0401913 -0.0629629 … 0.0387595 0.0470523; -0.00133792 0.00133275 … -0.0033493 -0.000352624]); Box([-0.0603623 -0.0652655 … -0.0667794 -0.0447372; 0.0317128 0.0317886 … 0.0336059 0.0251691; 0.195772 0.199804 … 0.219218 0.148352; -0.00380126 -0.00445663 … -0.0034967 -0.00346621])]            
 [Box([-0.0312327 0.0490034 … -0.0300554 -0.0364347; 0.0291568 -0.0426152 … 0.0279949 0.0326496; -0.0853052 0.126531 … -0.0956695 -0.0879409]); Box([-1.26516 -0.912982 -1.16291 -0.880338; 1.11376 0.804179 1.02422 0.775654; -3.31195 -2.39183 -3.04496 -2.30811]); Box([-0.151701 -0.157199 … -0.171871 -0.115664; 0.134598 0.134879 … 0.152597 0.0989407; -0.389885 -0.40483 … -0.425935 -0.293539])]
 [Box([-0.245953 0.385063 … -0.239604 -0.27517]); Box([-9.79772 -7.07042 -9.00471 -6.81814]); Box([-9.73722 -7.45647 -8.84728]); Box([-1.18703 -1.23127 … -1.30091 -0.88588])]     

In [16]:
∇Jfd = params*0
ϵ=params*0
for i=1:length(ϵ), j=1:length(ϵ[i]), k=1:length(ϵ[i][j])
        ϵ[i][j][k] = 𝜀
        ∇Jfd[i][j][k] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
        ϵ[i][j][k] = .0
end

In [17]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
 -0.0128427    0.0198229    0.011221    -0.0126271   -0.0135073  
  0.00629759  -0.0103409   -0.0050128    0.00734165   0.00739555 
  0.0401913   -0.0629629   -0.034096     0.0387595    0.0470523  
 -0.00133792   0.00133275   7.00921e-5  -0.0033493   -0.000352624

In [None]:
value(∇J[1][1])