In [1]:
using LinearAlgebra

In [2]:
# Fixes needed in Julia
import Base: \

function LinearAlgebra.Bidiagonal(dv::Vector{T}, ev::Vector{S}, uplo::Symbol) where {T,S}
    TS = promote_type(T,S)
    return Bidiagonal{TS,Vector{TS}}(dv, ev, uplo)
end

## The base method narrows the type too much. We'll have to ensure that it's as least as wide as the input
function \(adjA::Adjoint{<:Any,<:Union{UnitUpperTriangular,UnitLowerTriangular}}, B::AbstractVector)
    A = adjA.parent
    TAB = promote_type(eltype(A), eltype(B), typeof(zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B))))
    BB = similar(B, TAB, size(B))
    copyto!(BB, B)
    ldiv!(adjoint(convert(AbstractArray{TAB}, A)), BB)
end

\ (generic function with 152 methods)

In [3]:
h(x) = exp(-x)
h′(x,y) = -y
𝓁(x,y) = sum(abs2,x-y)/2
𝓁′(x,y) = x-y
init(sizes...) = 0.1randn(sizes...)
array(A) = fill.(A, 1, 1)

array (generic function with 1 method)

In [6]:
𝜀 = 0.0001
n = [5,4,3,1]
N = length(n)-1
B = 7

7

### Scalar Neural Network

In [7]:
function neural_net(params, input; h=h, h′=h′, N=length(params))
    δ = []
    X = [input]
    for i in 1:N
        x = sum(params[i] .* [X[i],1])
        push!(X,h(x))
        push!(δ, h′.(x,X[i+1]))
    end
    return X,δ
end


neural_net (generic function with 1 method)

In [8]:
params =[[init(),init()] for i=1:N] # W and B
x,y = init(),init() # input and output

(-0.2627594087878217, -0.035432865331720395)

In [9]:
X,δ = neural_net(params,x)
L   = Bidiagonal(zeros(N),[δ[i] * params[i][1] for i=2:N],:L)
D   = Diagonal(δ.*[[X[i],1]' for i=1:N])
g   = [zeros(N-1);𝓁′(X[N+1],y)]
∇J  = D'*((I-L')\g)

3-element Array{Array{Float64,1},1}:
 [0.00196596, -0.00748199]
 [-0.11963, -0.130996]    
 [-1.1859, -1.0867]       

In [10]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = ∇J * 0
ϵ    = ∇J * 0
for i in 1:N, j in 1:2       
    ϵ[i][j] = 𝜀
    ∇Jfd[i][j]=(𝓁(neural_net(params.+ϵ,x)[1][N+1],y)-𝓁(neural_net(params.-ϵ,x)[1][N+1],y))/2𝜀
    ϵ[i][j] = .0
end
∇Jfd

3-element Array{Array{Float64,1},1}:
 [0.00196596, -0.00748199]
 [-0.11963, -0.130996]    
 [-1.1859, -1.0867]       

In [11]:
using LinearAlgebra
import Base: +,-,*,/,∘

abstract type Map{T} end

struct RightMul{T} <: Map{T}
    A::T
end
Base.copy(K::RightMul) = RightMul(copy(K.A))
-(K::RightMul) = RightMul(-K.A)
*(K::RightMul, X::Union{AbstractArray,Number}) =  X * K.A
Base.adjoint(K::RightMul) = RightMul(K.A')


struct HadMul{T} <: Map{T}
    A::T
end
Base.copy(K::HadMul) = HadMul(copy(K.A))
-(X::HadMul) = HadMul(-X.A)
*(X::HadMul, Y::Union{AbstractArray,Number}) = X.A .* Y
*(Y::Union{AbstractArray,Number}, X::HadMul) = Y .* X.A
Base.adjoint(X::HadMul) = HadMul(X.A)

# A zero
struct Zero end
Base.zero(::Type{Any}) = Zero()
+(::Zero, ::Zero) = Zero()
-(::Zero, A) = -A
*(::Zero, ::Zero) = Zero()

# Composition of Mappings. It applies a chain of two operation.
struct Composition{TA,TB} <: Map{Union{TA,TB}}
    A::TA
    B::TB
end
∘(A::Map, B) = Composition(A, B)
∘(A, B::Map) = Composition(A, B)
∘(A::Map, B::Map) = Composition(A, B)
*(C::Composition, X::Union{AbstractArray,Number}) = C.A*(C.B*X)
Base.adjoint(K::Composition) = K.B' ∘ K.A'
-(K::Composition) = Composition(-K.A, K.B)
Base.copy(K::Composition) = Composition(copy(K.A), copy(K.B))

### Simple Matrix Neural Network

In [12]:
function neural_net(params,input;h=h,h′= h′)
    X     = [input]
    δ     = []
    for i=1:length(params)
        x = params[i][1]*X[i] .+ params[i][2]         
        push!(X,h.(x))
        push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end


neural_net (generic function with 1 method)

In [13]:
# params: `Wi` and `Bi`s
params =[[init(n[i+1],n[i]),init(n[i+1])] for i in 1:N]
x, y = init(n[1],B), init(1,B)

([0.0827806 0.0546721 … 0.211481 0.0421459; -0.0312396 -0.0393926 … -0.0730213 0.0210276; … ; 0.116678 0.0724615 … -0.0410583 0.0759445; 0.210051 -0.0300454 … -0.158249 0.234255], [0.137431 0.149247 … -0.0285414 -0.0848659])

In [14]:
X,δ = neural_net(params,x)
D   = Diagonal([[HadMul(δ[i]) ∘ RightMul(X[i]) HadMul(δ[i])] for i in 1:N])
ImL = Bidiagonal([I for i in 1:N], -[HadMul(δ[i]) ∘ params[i][1] for i in 2:N] , :L)
g   = [Zero(), Zero(), 𝓁′(X[N+1],y)]
∇J  = D'*fill.(ImL'\g, 1, 1)

ErrorException: Element type mismatch. Tried to create an `Adjoint{Union{##56#57{_1,_2} where _2 where _1, Composition{_1,_2} where _2 where _1, HadMul{_1} where _1, RightMul{_1} where _1}}` from an object with eltype `Map`, but the element type of the adjoint of an object with eltype `Map` must be `Union{##56#57{_1,_2} where _2 where _1, Composition{_1,_2} where _2 where _1, HadMul{_1} where _1, RightMul{_1} where _1}`.

In [16]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = params*0
ϵ = params*0
for i in 1:length(params), wb in 1:2
    for j in 1:length(ϵ[i][wb])
        ϵ[i][wb][j] = 𝜀
        ∇Jfd[i][wb][j] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
        ϵ[i][wb][j] = .0
     end
end
∇Jfd

3-element Array{Array{Array{Float64,N} where N,1},1}:
 [[-0.00223817 0.003415 … -0.00406045 -0.00840857; -3.64179e-5 3.76352e-5 … -4.31771e-5 -7.77601e-5; 0.00325625 -0.00543696 … 0.00662553 0.0140046; -0.000646001 0.000998548 … -0.00125042 -0.00258659], [-0.0813833, -0.000935114, 0.13075, -0.0247087]]
 [[0.0908113 0.088169 0.0936524 0.103466; 0.840605 0.816417 0.867172 0.957789; 0.0855697 0.0830889 0.0882575 0.0974874], [0.0821645, 0.760645, 0.0774244]]                                                                                               
 [[-4.99603 -5.81764 -4.03329], [-5.26953]]                                                                                                                                                                                                              

In [17]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
 -0.00223817    0.003415      0.000465945  -0.00406045  -0.00840857
 -3.64179e-5    3.76352e-5    9.1481e-6    -4.31771e-5  -7.77601e-5
  0.00325625   -0.00543696   -0.000728925   0.00662553   0.0140046 
 -0.000646001   0.000998548   0.000189926  -0.00125042  -0.00258659

In [18]:
∇J[1][1]

0.0019659633544817966

### Densely Connected Matrix Network

In [19]:
function neural_net(params,input;h=h, h′= h′)
    X     = [input]
    δ     = []
    for i in 1:length(params)
       x = broadcast(+,(params[i] .* [X..., I])...)
       push!(X,h.(x))
       push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end;

In [20]:
params = [[j==i+1 ?  init(n[i+1],1) : init(n[i+1],n[j])  for j in 1:i+1] for i in 1:N]
x,y = init(n[1],B), init(1,B);

In [21]:
X,δ = neural_net(params,x)
D   = Diagonal([[[ (HadMul(δ[i]) ∘ RightMul(X[j]))' for j in 1:i]' HadMul(δ[i])] for i in 1:N])
ImL = UnitLowerTriangular(Matrix{Any}(undef,N,N))
for i in 2:N, j in 1:i-1
    ImL[i,j] = -HadMul(δ[i]) ∘ params[i][j+1]
end
g  = [[Zero() for i=1:N-1]..., 𝓁′(X[N+1],y)]
∇J = D'*fill.(ImL'\g, 1, 1)

ErrorException: Element type mismatch. Tried to create an `Adjoint{Union{##56#57{_1,_2} where _2 where _1, Composition{_1,_2} where _2 where _1, HadMul{_1} where _1, RightMul{_1} where _1}}` from an object with eltype `Map`, but the element type of the adjoint of an object with eltype `Map` must be `Union{##56#57{_1,_2} where _2 where _1, Composition{_1,_2} where _2 where _1, HadMul{_1} where _1, RightMul{_1} where _1}`.

In [22]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = params*0
ϵ=params*0
for i in 1:length(ϵ), j in 1:length(ϵ[i]), k in 1:length(ϵ[i][j])
    ϵ[i][j][k] = 𝜀
    ∇Jfd[i][j][k] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
    ϵ[i][j][k] = .0
end

In [23]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
 -0.0355113    0.02095      0.00312179    0.0569558    0.0470081 
  0.0022544   -0.00118706  -0.000506951  -0.00428202  -0.00329552
 -0.00561847   0.00301074   0.000444557   0.00901663   0.00711608
  0.0148411   -0.0103891   -0.00306082   -0.0278954   -0.0234659 

In [24]:
∇J[1][1]

0.0019659633544817966