In [1]:
using LinearAlgebra

In [2]:
# Fixes needed in Julia
import Base: \

function LinearAlgebra.Bidiagonal(dv::Vector{T}, ev::Vector{S}, uplo::Symbol) where {T,S}
    TS = promote_type(T,S)
    return Bidiagonal{TS,Vector{TS}}(dv, ev, uplo)
end

## The base method narrows the type too much. We'll have to ensure that it's as least as wide as the input
function \(adjA::Adjoint{<:Any,<:Union{UnitUpperTriangular,UnitLowerTriangular}}, B::AbstractVector)
    A = adjA.parent
    TAB = promote_type(eltype(A), eltype(B), typeof(zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B))))
    BB = similar(B, TAB, size(B))
    copyto!(BB, B)
    ldiv!(adjoint(convert(AbstractArray{TAB}, A)), BB)
end

\ (generic function with 152 methods)

In [3]:
h(x) = exp(-x)
h′(x,y) = -y
𝓁(x,y) = sum(abs2,x-y)/2
𝓁′(x,y) = x-y
init(sizes...) = 0.1randn(sizes...)
array(A) = fill.(A, 1, 1)

array (generic function with 1 method)

In [4]:
𝜀 = .0001
n = [5,4,3,1]
N = length(n)-1
B = 7

7

### Scalar Neural Network

In [5]:
function neural_net(params, input; h=h, h′=h′, N=length(params))
    δ = []
    X = [input]
    for i in 1:N
        x = sum(params[i] .* [X[i],1])
        push!(X,h(x))
        push!(δ, h′.(x,X[i+1]))
    end
    return X,δ
end


neural_net (generic function with 1 method)

In [6]:
params =[[init(),init()] for i=1:N] # W and B
x,y = init(),init() # input and output

(-0.06001308766139209, -0.04019876163516568)

In [7]:
X,δ = neural_net(params,x)
L   = Bidiagonal(zeros(N),[δ[i] * params[i][1] for i=2:N],:L)
D   = Diagonal(δ.*[[X[i],1]' for i=1:N])
g   = [zeros(N-1);𝓁′(X[N+1],y)]
∇J  = D'*((I-L')\g)

3-element Array{Array{Float64,1},1}:
 [-1.33566e-5, 0.000222562]
 [0.0222118, 0.0187229]    
 [-0.807126, -0.802968]    

In [8]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = ∇J * 0
ϵ    = ∇J * 0
for i in 1:N, j in 1:2       
    ϵ[i][j] = 𝜀
    ∇Jfd[i][j]=(𝓁(neural_net(params.+ϵ,x)[1][N+1],y)-𝓁(neural_net(params.-ϵ,x)[1][N+1],y))/2𝜀
    ϵ[i][j] = .0
end
∇Jfd

3-element Array{Array{Float64,1},1}:
 [-1.33566e-5, 0.000222562]
 [0.0222118, 0.0187229]    
 [-0.807126, -0.802968]    

In [9]:
using LinearAlgebra
import Base: +,-,*,/,∘

abstract type Map{T} end

struct RightMul{T} <: Map{T}
    A::T
end
Base.copy(K::RightMul) = RightMul(copy(K.A))
-(K::RightMul) = RightMul(-K.A)
*(K::RightMul, X::Union{AbstractArray,Number}) =  X * K.A
Base.adjoint(K::RightMul) = RightMul(K.A')


struct HadMul{T} <: Map{T}
    A::T
end
Base.copy(K::HadMul) = HadMul(copy(K.A))
-(X::HadMul) = HadMul(-X.A)
*(X::HadMul, Y::Union{AbstractArray,Number}) = X.A .* Y
*(Y::Union{AbstractArray,Number}, X::HadMul) = Y .* X.A
Base.adjoint(X::HadMul) = HadMul(X.A)

# A zero
struct Zero end
Base.zero(::Type{Any}) = Zero()
+(::Zero, ::Zero) = Zero()
-(::Zero, A) = -A
*(::Zero, ::Zero) = Zero()

# Composition of Mappings. It applies a chain of two operation.
struct Composition{TA,TB} <: Map{Union{TA,TB}}
    A::TA
    B::TB
end
∘(A::Map, B) = Composition(A, B)
∘(A, B::Map) = Composition(A, B)
∘(A::Map, B::Map) = Composition(A, B)
*(C::Composition, X::Union{AbstractArray,Number}) = C.A*(C.B*X)
Base.adjoint(K::Composition) = K.B' ∘ K.A'
-(K::Composition) = Composition(-K.A, K.B)
Base.copy(K::Composition) = Composition(copy(K.A), copy(K.B))

### Simple Matrix Neural Network

In [10]:
function neural_net(params,input;h=h,h′= h′)
    X     = [input]
    δ     = []
    for i=1:length(params)
        x = params[i][1]*X[i] .+ params[i][2]         
        push!(X,h.(x))
        push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end


neural_net (generic function with 1 method)

In [11]:
# params: `Wi` and `Bi`s
params =[[init(n[i+1],n[i]),init(n[i+1])] for i in 1:N]
x, y = init(n[1],B), init(1,B)

([-0.0313474 0.143545 … -0.0634659 0.0838473; 0.0202179 0.0994812 … 0.120648 0.0772788; … ; 0.0502375 -0.0156043 … -0.0237093 0.0382377; -0.0409106 -0.0359764 … -0.0715288 0.0407063], [-0.021889 -0.0226897 … -0.0504058 -0.0171547])

In [23]:
X,δ = neural_net(params,x)
D   = Diagonal([[HadMul(δ[i]) ∘ RightMul(X[i]) HadMul(δ[i])] for i in 1:N])
ImL = Bidiagonal([I for i in 1:N], -[HadMul(δ[i]) ∘ params[i][1] for i in 2:N] , :L)
g   = [Zero(), Zero(), 𝓁′(X[N+1],y)]
∇J  = D'*fill.(ImL'\g, 1, 1)

3-element Array{Any,1}:
 Zero()                              
 Zero()                              
 [0.946642 0.940773 … 1.22217 1.0527]

In [32]:
D'

ErrorException: Element type mismatch. Tried to create an `Adjoint{Union{##56#57{_1,_2} where _2 where _1, Composition{_1,_2} where _2 where _1, HadMul{_1} where _1, RightMul{_1} where _1}}` from an object with eltype `Map`, but the element type of the adjoint of an object with eltype `Map` must be `Union{##56#57{_1,_2} where _2 where _1, Composition{_1,_2} where _2 where _1, HadMul{_1} where _1, RightMul{_1} where _1}`.

In [14]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = params*0
ϵ = params*0
for i in 1:length(params), wb in 1:2
    for j in 1:length(ϵ[i][wb])
        ϵ[i][wb][j] = 𝜀
        ∇Jfd[i][wb][j] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
        ϵ[i][wb][j] = .0
     end
end
∇Jfd

3-element Array{Array{Array{Float64,N} where N,1},1}:
 [[-0.00144853 -0.00326196 … 0.00214023 0.00110784; 0.00494567 0.0107853 … -0.00799231 -0.00438035; -0.002618 -0.00581151 … 0.0042953 0.00241103; 0.00345424 0.00745893 … -0.00545911 -0.00306579], [-0.0507631, 0.16802, -0.0906874, 0.116496]]
 [[0.403368 0.458488 0.407885 0.427451; 0.861356 0.979078 0.871023 0.912858; -0.483047 -0.548904 -0.488334 -0.511758], [0.453673, 0.968823, -0.543186]]                                                                                         
 [[-15.1995 -6.84618 -6.62077], [-7.87231]]                                                                                                                                                                                                     

In [15]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
 -0.00144853  -0.00326196   0.00127972   0.00214023   0.00110784
  0.00494567   0.0107853   -0.00354377  -0.00799231  -0.00438035
 -0.002618    -0.00581151   0.00189133   0.0042953    0.00241103
  0.00345424   0.00745893  -0.00244342  -0.00545911  -0.00306579

In [16]:
∇J[1][1]

-1.33566043589133e-5

### Densely Connected Matrix Network

In [17]:
function neural_net(params,input;h=h, h′= h′)
    X     = [input]
    δ     = []
    for i in 1:length(params)
       x = broadcast(+,(params[i] .* [X..., I])...)
       push!(X,h.(x))
       push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end;

In [18]:
params = [[j==i+1 ?  init(n[i+1],1) : init(n[i+1],n[j])  for j in 1:i+1] for i in 1:N]
x,y = init(n[1],B), init(1,B);

In [19]:
X,δ = neural_net(params,x)
D   = Diagonal([[[ (HadMul(δ[i]) ∘ RightMul(X[j]))' for j in 1:i]' HadMul(δ[i])] for i in 1:N])
ImL = UnitLowerTriangular(Matrix{Any}(undef,N,N))
for i in 2:N, j in 1:i-1
    ImL[i,j] = -HadMul(δ[i]) ∘ params[i][j+1]
end
g  = [[Zero() for i=1:N-1]..., 𝓁′(X[N+1],y)]
∇J = D'*fill.(ImL'\g, 1, 1)

ErrorException: Element type mismatch. Tried to create an `Adjoint{Union{##56#57{_1,_2} where _2 where _1, Composition{_1,_2} where _2 where _1, HadMul{_1} where _1, RightMul{_1} where _1}}` from an object with eltype `Map`, but the element type of the adjoint of an object with eltype `Map` must be `Union{##56#57{_1,_2} where _2 where _1, Composition{_1,_2} where _2 where _1, HadMul{_1} where _1, RightMul{_1} where _1}`.

In [20]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = params*0
ϵ=params*0
for i in 1:length(ϵ), j in 1:length(ϵ[i]), k in 1:length(ϵ[i][j])
    ϵ[i][j][k] = 𝜀
    ∇Jfd[i][j][k] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
    ϵ[i][j][k] = .0
end

In [21]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
  0.021951    -0.096897    -0.032047    -0.0250698   -0.0334673 
  0.00560765  -0.0276877   -0.00867713  -0.00620658  -0.00929643
  0.00198238  -0.00928748  -0.00295774  -0.00227595  -0.00317577
 -0.019175     0.0913346    0.0294824    0.022319     0.0309022 

In [22]:
∇J[1][1]

-1.33566043589133e-5