In [1]:
using LinearAlgebra

In [2]:
h(x) = exp(-x)
h′(x,y) = -y
𝓁(x,y) = sum(abs2,x-y)/2
𝓁′(x,y) = x-y
init(sizes...) = 0.1randn(sizes...)

init (generic function with 1 method)

In [3]:
𝜀 = .0001
n = [5,4,3,1]
N = length(n)-1
B = 7

7

### Scalar Neural Network

In [4]:
function neural_net(params, input; h=h, h′=h′, N=length(params))
    δ = [];
    X = [input];
    for i=1:N
        x = sum(params[i] .* [X[i],1])
        push!(X,h(x))
        push!(δ, h′.(x,X[i+1]))
    end
    return X,δ
end


neural_net (generic function with 1 method)

In [5]:
params =[[init(),init()] for i=1:N] # W and B
x,y = init(),init() # input and output

(-0.23130081044702488, -0.030923296815536468)

In [6]:
X,δ = neural_net(params,x)
L   = Bidiagonal(zeros(N),[δ[i] * params[i][1] for i=2:N],:L)
D   = Diagonal(δ.*[[X[i],1]' for i=1:N])
f   = [zeros(N-1);𝓁′(X[N+1],y)]
∇J  = D'*((I-L')\f);

In [7]:
∇Jfd = ∇J * 0
ϵ    = ∇J * 0
for i=1:N, j=1:2       
    ϵ[i][j] = 𝜀
    ∇Jfd[i][j]=(𝓁(neural_net(params.+ϵ,x)[1][N+1],y)-𝓁(neural_net(params.-ϵ,x)[1][N+1],y))/2𝜀
    ϵ[i][j] = .0
end
∇Jfd

3-element Array{Array{Float64,1},1}:
 [1.78643e-5, -7.72338e-5]
 [0.0109472, 0.0100777]   
 [-1.1834, -1.07989]      

### Simple Matrix Neural Network
- [] needed to create a box type because operations on matrices of abstract types causes problems

In [8]:
import Base: +,-,*,/,zero,one,adjoint,convert,inv,size,iszero,transpose,length

abstract type Map; end
struct (⊗)   <: Map; A; B; end
struct (⊗′)  <: Map; A; B; end
struct (Δ)   <: Map; A; end
struct Zero  <: Map; end # zero of the vector space
struct One   <: Map; sign::Bool; end # identity map of the vector space

-(K::⊗′) = -K.A ⊗′ K.B 
*(K::⊗′,X::Union{AbstractArray,Number}) = (K.A * X) * K.B' 
adjoint(K::⊗′) = K.B' ⊗ K.A'

-(K::⊗) = -K.A ⊗ K.B
*(K::⊗,X::Union{AbstractArray,Number}) =  K.B * (X * K.A') 
adjoint(K::⊗) = K.B' ⊗′ K.A'

-(X::Δ) = Δ(-X.A)
*(X::Δ,Y::Union{AbstractArray,Number}) = X.A .* Y
*(Y::Union{AbstractArray,Number},X::Δ) = Y .* X.A
adjoint(X::Δ) = Δ(X.A)

*(X::Zero,Y) = Zero()
*(Y,X::Zero) = Zero()
*(::Zero,::Zero) = Zero()

+(X::Zero,Y) = Y
+(Y,X::Zero) = X+Y
+(::Zero,::Zero) = Zero()

-(X::Zero)   = X
-(X::Zero,Y) = -Y
-(Y,X::Zero) = Y
-(::Zero,::Zero) = Zero()
adjoint(X::Zero) = X

*(Y,X::One) = X.sign ?  Y : -Y
*(X::One,Y) = Y*X
-(X::One) = One(!X.sign)
One() = One(true)
adjoint(X::One) = X

iszero(X::Map) = isa(X,Zero)

iszero (generic function with 12 methods)

In [9]:
struct Box; X; end

value(R::Box) = R.X
value(A::Array) = value.(A)
zero(::Type{Box}) = Box(Zero())
zero(::Box) = zero(Box)
iszero(R::Box) = isa(value(R),Zero)
one(::Type{Box})  = Box(One())
one(::Box) = one(Box)
adjoint(R::Box)   = Box(adjoint(value(R)))
inv(R::Box) = Box(inv(value(R)))
convert(::Type{Box},x) = Box(x)
convert(::Type{Box},x::Box) = x

*(R::Box, X) = Box(value(R) * X)
*(X, R::Box) = Box(X * value(R))
*(R1::Box, R2::Box) = Box(value(R1) * value(R2))

-(R::Box)    = Box(-value(R))
-(R::Box, X) = Box(value(R)-X)
-(X,R::Box)  = Box(X-value(R))
-(R1::Box,R2::Box) = Box(value(R1) - value(R2))

+(R1::Box,R2::Box) = Box(value(R1) + value(R2))

/(X,R::Box)  = Box(X*inv(R))

array(x) = fill(x,1,1)

array (generic function with 1 method)

In [10]:
function neural_net(params,input;h=h,h′= h′)
    X     = [input]
    δ     = []
    for i=1:length(params)
        x = params[i][1]*X[i] .+ params[i][2]         
        push!(X,h.(x))
        push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end


neural_net (generic function with 1 method)

In [11]:
params =[[init(n[i+1],n[i]),init(n[i+1])] for i=1:N]
x, y = init(n[1],B), init(1,B)

([-0.0010061 0.0351535 … 0.0220259 0.111667; 0.121242 -0.0530323 … -0.00441481 -0.0337637; … ; 0.0984444 0.0966088 … -0.0340291 -0.0465568; 0.196182 0.0754752 … 0.150032 -0.0689269], [0.020698 -0.050361 … -0.00134641 0.0956486])

In [12]:
X,δ = neural_net(params,x)
D = Diagonal([[X[i]' ⊗  Δ(δ[i]) Δ(δ[i])] for i=1:N])
L = Bidiagonal(zeros(Box,N), [Box(params[i][1] ⊗′ Δ(δ[i])) for i=2:N] , :L)
f = [[zero(Box) for i=1:N-1]; Box(𝓁′(X[N+1],y))]
∇J = D'*array.((UnitUpperTriangular(-L')\f))

3-element Array{Array{Any,2},1}:
 [Box([0.00617418 0.0024111 … 0.00990453 0.00870872; 0.00030609 0.000121833 … 0.000488548 0.000434896; -0.00579272 -0.00208721 … -0.00910052 -0.00751858; 0.00430838 0.00157069 … 0.00676649 0.00570556]); Box([0.0193859 0.0202149 … 0.0192713 0.016717; 0.000956741 0.00101638 … 0.000986411 0.000829209; -0.0162937 -0.0189491 … -0.0169781 -0.0158765; 0.0125041 0.0139799 … 0.0128539 0.0118898])]
 [Box([0.802546 0.774573 0.763389 0.783755; 0.959815 0.926385 0.912982 0.937334; -0.8327 -0.803739 -0.792208 -0.813297]); Box([0.112519 0.122063 … 0.115313 0.102747; 0.134617 0.146077 … 0.138194 0.122607; -0.115949 -0.126959 … -0.119406 -0.106692])]                                                                                                                                              
 [Box([-6.83499 -6.08048 -6.58724]); Box([-0.732917 -0.79755 … -0.753592 -0.671046])]                                                                                                  

In [13]:
∇Jfd = params*0
ϵ=params*0
for i=1:length(params), wb=1:2
    for j=1:length(ϵ[i][wb])
        ϵ[i][wb][j] = 𝜀
        ∇Jfd[i][wb][j] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
        ϵ[i][wb][j] = .0
     end
end
∇Jfd

3-element Array{Array{Array{Float64,N} where N,1},1}:
 [[0.00617418 0.0024111 … 0.00990453 0.00870872; 0.00030609 0.000121833 … 0.000488548 0.000434896; -0.00579272 -0.00208721 … -0.00910052 -0.00751858; 0.00430838 0.00157069 … 0.00676649 0.00570556], [0.136318, 0.00679685, -0.125152, 0.0933989]]
 [[0.802546 0.774573 0.763389 0.783755; 0.959815 0.926385 0.912982 0.937334; -0.8327 -0.803739 -0.792208 -0.813297], [0.819982, 0.980682, -0.850867]]                                                                                              
 [[-6.83499 -6.08048 -6.58724], [-5.35458]]                                                                                                                                                                                                        

In [14]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
  0.00617418   0.0024111     0.00167286   0.00990453    0.00870872 
  0.00030609   0.000121833   9.11259e-5   0.000488548   0.000434896
 -0.00579272  -0.00208721   -0.00182361  -0.00910052   -0.00751858 
  0.00430838   0.00157069    0.00127598   0.00676649    0.00570556 

In [15]:
value(∇J[1][1])

4×5 Array{Float64,2}:
  0.00617418   0.0024111     0.00167286   0.00990453    0.00870872 
  0.00030609   0.000121833   9.11259e-5   0.000488548   0.000434896
 -0.00579272  -0.00208721   -0.00182361  -0.00910052   -0.00751858 
  0.00430838   0.00157069    0.00127598   0.00676649    0.00570556 

### Densely Connected Matrix Network

In [16]:
function neural_net(params,input;h=h, h′= h′)
    X     = [input]
    δ     = []
    for i=1:length(params)
       x = broadcast(+,(params[i] .* [X;One()])...)
       push!(X,h.(x))
       push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end
array(x) = fill(x,1,1);

In [17]:
params = [[j==i+1 ?  init(n[i+1],1) : init(n[i+1],n[j])  for j=1:i+1] for i=1:N]
x,y = init(n[1],B), init(1,B);

In [18]:
X,δ = neural_net(params,x)
D = Diagonal([[[(X[j]' ⊗  Δ(δ[i]))' for j=1:i]' Δ(δ[i])] for i=1:N])
L = LowerTriangular(zeros(Box,N,N)) 
for i=2:N, j=1:i-1
    L[i,j] = Box(params[i][j+1] ⊗′ Δ(δ[i]))
end
g  = [[zero(Box) for i=1:N-1]; Box(𝓁′(X[N+1],y))]
∇J = D'*array.(UnitUpperTriangular(-L')\g)

3-element Array{Array{Any,2},1}:
 [Box([0.00147263 0.000878007 … 0.00317761 0.00134016; 0.0107609 0.00476976 … 0.0233219 0.0101893; 0.00530489 0.00308752 … 0.0116158 0.00459378; 0.00837458 0.00152393 … 0.0169338 0.0075329]); Box([0.00844684 0.00778185 … 0.00799274 0.00695372; 0.0581577 0.0566132 … 0.0620599 0.0480891; 0.0298291 0.0277436 … 0.0297046 0.0259149; 0.0420299 0.0424021 … 0.0498694 0.0369281])]                                                           
 [Box([0.00522154 0.0029278 … 0.0113374 0.00447353; -0.00380147 -0.00147879 … -0.00823059 -0.00357568; -0.00140382 -0.000515823 … -0.00292652 -0.00123515]); Box([0.183875 0.176399 0.198902 0.199122; -0.129887 -0.124764 -0.140498 -0.140885; -0.0478206 -0.0459046 -0.0517189 -0.0519111]); Box([0.0306026 0.0278889 … 0.0287812 0.025857; -0.0209272 -0.0204659 … -0.0220021 -0.0170939; -0.00758397 -0.00721398 … -0.00806272 -0.00653656])]
 [Box([-0.0516927 -0.0225824 … -0.110282 -0.0485074]); Box([-1.77215 -1.70112 -1.91629 -1.92118]); 

In [19]:
∇Jfd = params*0
ϵ=params*0
for i=1:length(ϵ), j=1:length(ϵ[i]), k=1:length(ϵ[i][j])
        ϵ[i][j][k] = 𝜀
        ∇Jfd[i][j][k] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
        ϵ[i][j][k] = .0
end

In [20]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
 0.00147263  0.000878007  -8.48615e-5   0.00317761  0.00134016
 0.0107609   0.00476976   -0.000160236  0.0233219   0.0101893 
 0.00530489  0.00308752    3.19438e-5   0.0116158   0.00459378
 0.00837458  0.00152393    0.000896062  0.0169338   0.0075329 

In [21]:
value(∇J[1][1])

4×5 Array{Float64,2}:
 0.00147263  0.000878007  -8.48615e-5   0.00317761  0.00134016
 0.0107609   0.00476976   -0.000160236  0.0233219   0.0101893 
 0.00530489  0.00308752    3.19438e-5   0.0116158   0.00459378
 0.00837458  0.00152393    0.000896062  0.0169338   0.0075329 