In [3]:
using LinearAlgebra
import Base: \
function LinearAlgebra.Bidiagonal(dv::Vector{T}, ev::Vector{S}, uplo::Symbol) where {T,S}
    TS = promote_type(T,S)
    return Bidiagonal{TS,Vector{TS}}(dv, ev, uplo)
end


## The base method narrows the type too much. We'll have to ensure that it's as least as wide as the input
function  \(adjA::Adjoint{<:Any,<:Union{UnitUpperTriangular,UnitLowerTriangular}}, B::AbstractVector)
    A = adjA.parent
    TAB = promote_type(eltype(A), eltype(B), typeof(zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B))))
    BB = similar(B, TAB, size(B))
    copyto!(BB, B)
    ldiv!(adjoint(convert(AbstractArray{TAB}, A)), BB)
end

\ (generic function with 152 methods)

In [4]:
h(x) = exp(-x)
h′(x,y) = -y
𝓁(x,y) = sum(abs2,x-y)/2
𝓁′(x,y) = x-y
init(sizes...) = 0.01randn(sizes...)

init (generic function with 1 method)

In [5]:
𝜀 = .0001
n = [5,4,3,1]
N = length(n)-1
B = 7

7

### Scalar Neural Network

In [6]:
function neural_net(params, input; h=h, h′=h′, N=length(params))
    δ = [];
    X = [input];
    for i=1:N
        x = sum(params[i] .* [X[i],1])
        push!(X,h(x))
        push!(δ, h′.(x,X[i+1]))
    end
    return X,δ
end


neural_net (generic function with 1 method)

In [7]:
params =[[init(),init()] for i=1:N] # W and B
x,y = init(),init() # input and output

(0.0168052294474895, 0.00869503242558762)

In [8]:
X,δ = neural_net(params,x)
L   = Bidiagonal(zeros(N),[δ[i] * params[i][1] for i=2:N],:L)
D   = Diagonal(δ.*[[X[i],1]' for i=1:N])
g   = [zeros(N-1);𝓁′(X[N+1],y)]
∇J  = D'*((I-L')\g)

3-element Array{Array{Float64,1},1}:
 [-1.85356e-6, -0.000110297]
 [-0.00603026, -0.0059671]  
 [-1.04597, -1.02742]       

In [9]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = ∇J * 0
ϵ    = ∇J * 0
for i=1:N, j=1:2       
    ϵ[i][j] = 𝜀
    ∇Jfd[i][j]=(𝓁(neural_net(params.+ϵ,x)[1][N+1],y)-𝓁(neural_net(params.-ϵ,x)[1][N+1],y))/2𝜀
    ϵ[i][j] = .0
end
∇Jfd

3-element Array{Array{Float64,1},1}:
 [-1.85356e-6, -0.000110297]
 [-0.00603026, -0.0059671]  
 [-1.04597, -1.02742]       

### Matrix Neural Network

In [13]:
import Base: +,-,*,/,∘

struct LinearMatrixOp # Is parametric type necessary? It causes un-readable error messages and some other issues.
    f
    fadj
end
LinearMatrixOp(f::Function) = LinearMatrixOp(f,f)

LeftMul(A::AbstractMatrix) = LinearMatrixOp(X->A*X, X->A'*X)


RightMul(A::AbstractMatrix) = LinearMatrixOp(X->X*A, X->X*A')
HadMul(A::AbstractMatrix) = LinearMatrixOp(X->X.*A)
ZeroMul() = LinearMatrixOp(X->Zero())
IdentMul() = LinearMatrixOp(X->X) #not neccessary, can be commented

Base.zero(::Type{LinearMatrixOp}) = ZeroMul() 
Base.one(::Type{LinearMatrixOp}) = IdentMul()
Base.adjoint(A::LinearMatrixOp) = LinearMatrixOp(A.fadj,A.f)
Base.copy(A::LinearMatrixOp) =  LinearMatrixOp(A.f,A.fadj)

*(A::LinearMatrixOp,X::Union{AbstractArray,Number}) = A.f(X)
-(A::LinearMatrixOp) = LinearMatrixOp(X->-A.f(X), X->-A.fadj(X))
∘(A::LinearMatrixOp, B::LinearMatrixOp) = LinearMatrixOp(A.f ∘ B.f, B.fadj ∘ A.fadj)

# A zero
struct Zero end
Base.zero(::Type{Any}) = Zero()
+(::Zero, ::Zero) = Zero()
-(::Zero, A) = -A
+(::Zero, A) = A
*(::Zero, ::Zero) = Zero()
*(X, ::Zero) = Zero()

* (generic function with 349 methods)

In [14]:
function neural_net(params,input;h=h,h′= h′)
    X     = [input]
    δ     = []
    for i=1:length(params)
        x = params[i][1]*X[i] .+ params[i][2]         
        push!(X,h.(x))
        push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end
array(x)= fill(x,1,1)

array (generic function with 1 method)

In [15]:
# params: `W_i` and `b_i`s: x_{i+1} <- Wi*x_i .+ b_i
params =[[init(n[i+1],n[i]),init(n[i+1])] for i=1:N]
x, y = init(n[1],B), init(1,B);

In [17]:
X,δ = neural_net(params,x)
D = Diagonal([[HadMul(δ[i]) ∘ RightMul(X[i]) HadMul(δ[i])] for i=1:N])
ImL = Bidiagonal([I for i in 1:N], -[HadMul(δ[i]) ∘ LeftMul(params[i][1]) for i=2:N] , :L)
g = [ [Zero() for i=1:N-1]; [𝓁′(X[N+1],y)] ] 
∇J = D'*array.(ImL'\g)

3-element Array{Array{Any,2},1}:
 [[-1.59067e-7 -2.09214e-7 … 4.62421e-8 -3.61611e-8; -6.0108e-7 -7.90725e-7 … 1.74694e-7 -1.36698e-7; 1.24805e-6 1.64242e-6 … -3.6276e-7 2.84086e-7; -1.56557e-7 -2.06013e-7 … 4.55584e-8 -3.55725e-8]; [5.61827e-6 5.65542e-6 … 5.59032e-6 5.65695e-6; 2.12416e-5 2.13802e-5 … 2.11266e-5 2.13824e-5; -4.41431e-5 -4.44135e-5 … -4.3878e-5 -4.44167e-5; 5.53313e-6 5.56989e-6 … 5.50453e-6 5.57091e-6]]
 [[0.00810725 0.00812535 0.00810091 0.008148; -0.0163362 -0.0163727 -0.0163235 -0.0164184; -0.00707229 -0.00708808 -0.00706676 -0.00710784]; [0.00115084 0.00115825 … 0.00114448 0.00115812; -0.00231897 -0.0023339 … -0.00230613 -0.00233362; -0.00100392 -0.00101038 … -0.000998366 -0.00101028]]                                                                                                     
 [[-7.12786 -7.13722 -7.77584]; [-1.04516 -1.05189 … -1.03938 -1.05178]]                                                                                                             

In [18]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = params*0
ϵ=params*0
for i=1:length(params), wb=1:2
    for j=1:length(ϵ[i][wb])
        ϵ[i][wb][j] = 𝜀
        ∇Jfd[i][wb][j] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
        ϵ[i][wb][j] = .0
     end
end
∇Jfd;

In [19]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
 -1.59068e-7  -2.09217e-7   6.68221e-8   4.62386e-8  -3.616e-8  
 -6.01079e-7  -7.90725e-7   2.52556e-7   1.74696e-7  -1.36697e-7
  1.24805e-6   1.64242e-6  -5.24329e-7  -3.62761e-7   2.84084e-7
 -1.56553e-7  -2.06015e-7   6.57674e-8   4.55613e-8  -3.55715e-8

In [20]:
∇J[1][1]

4×5 Array{Float64,2}:
 -1.59067e-7  -2.09214e-7   6.68235e-8   4.62421e-8  -3.61611e-8
 -6.0108e-7   -7.90725e-7   2.52558e-7   1.74694e-7  -1.36698e-7
  1.24805e-6   1.64242e-6  -5.2433e-7   -3.6276e-7    2.84086e-7
 -1.56557e-7  -2.06013e-7   6.57683e-8   4.55584e-8  -3.55725e-8

### A Showcase: Densely Connected Matrix Network

In [21]:
function neural_net(params,input;h=h, h′= h′)
    X     = [input]
    δ     = []
    for i in 1:length(params)
       x = broadcast(+,(params[i] .* [X..., I])...)
       push!(X,h.(x))
       push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end;
array(x) = fill(x,1,1);

In [22]:
params = [[j==i+1 ?  init(n[i+1],1) : init(n[i+1],n[j])  for j=1:i+1] for i=1:N]
x,y = init(n[1],B), init(1,B);

In [23]:
X,δ = neural_net(params,x)
D = Diagonal([[[(HadMul(δ[i]) ∘ RightMul(X[j]))' for j=1:i]' HadMul(δ[i])] for i=1:N])
ImL = UnitLowerTriangular(Matrix{Any}(undef,N,N))
for i=2:N, j=1:i-1
    ImL[i,j] = -HadMul(δ[i]) ∘ LeftMul(params[i][j+1]) 
end
g =[ [Zero() for i=1:N-1]; [𝓁′(X[N+1],y)] ] 
∇J = D'*array.(ImL'\g)

3-element Array{Array{Any,2},1}:
 [[4.86838e-6 -0.00021486 … 6.18163e-5 -0.000185313; -1.37943e-6 6.11353e-5 … -1.75969e-5 5.27272e-5; -4.40213e-6 0.000195394 … -5.61808e-5 0.000168519; -5.65885e-6 0.000250325 … -7.20352e-5 0.000215918]; [0.00664594 0.00671516 … 0.00678023 0.00678137; -0.00189094 -0.00191047 … -0.00192941 -0.00192953; -0.00604304 -0.00610233 … -0.00616307 -0.00616435; -0.00774329 -0.00781828 … -0.00789594 -0.00790021]]                            
 [[-8.56756e-6 0.000377095 … -0.000108513 0.00032528; 3.48308e-6 -0.000154476 … 4.44189e-5 -0.000133239; -1.22994e-6 5.40883e-5 … -1.55688e-5 4.66486e-5]; [-0.0843554 -0.0828968 -0.0837584 -0.0824644; 0.0345309 0.0339338 0.0342865 0.0337568; -0.0120975 -0.0118883 -0.0120119 -0.0118263]; [-0.0116644 -0.0117983 … -0.0119111 -0.0119092; 0.00477761 0.00482704 … 0.00487509 0.00487497; -0.00167314 -0.00169188 … -0.00170797 -0.00170759]]
 [[-0.00075029 0.0331917 … -0.00955322 0.0286274]; [-7.42108 -7.29276 -7.36856 -7.25472]; [-7.337

In [24]:
# ∇Jfd is gradient calculated with finite differences method
∇Jfd = params*0
ϵ=params*0
for i=1:length(ϵ), j=1:length(ϵ[i]), k=1:length(ϵ[i][j])
        ϵ[i][j][k] = 𝜀
        ∇Jfd[i][j][k] =(𝓁(neural_net(params+ϵ,x)[1][N+1],y)-𝓁(neural_net(params-ϵ,x)[1][N+1],y))/2𝜀
        ϵ[i][j][k] = .0
end

In [25]:
∇Jfd[1][1]

4×5 Array{Float64,2}:
  4.86838e-6  -0.00021486   -0.000272776   6.18163e-5  -0.000185313
 -1.37943e-6   6.11353e-5    7.76077e-5   -1.75969e-5   5.27272e-5 
 -4.40213e-6   0.000195394   0.000247964  -5.61808e-5   0.000168519
 -5.65885e-6   0.000250325   0.000317611  -7.20352e-5   0.000215918

In [26]:
∇J[1][1]

4×5 Array{Float64,2}:
  4.86838e-6  -0.00021486   -0.000272776   6.18163e-5  -0.000185313
 -1.37943e-6   6.11353e-5    7.76077e-5   -1.75969e-5   5.27272e-5 
 -4.40213e-6   0.000195394   0.000247964  -5.61808e-5   0.000168519
 -5.65885e-6   0.000250325   0.000317611  -7.20352e-5   0.000215918

## MNIST MLP Example

In [28]:
# Data
using Knet
import Knet: Data
include(Knet.dir("data","mnist.jl"))
dtrn,dtst = mnistdata(xsize=(784,:)); # dtrn and dtst = [ (x1,y1), (x2,y2), ... ] where xi,yi are

#Layers
n = [784,128,64,10]
N = length(n)-1
init(sizes...) = 0.1randn(sizes...)

#Nonlinearity
h(x)    = x>0 ? x : zero(x) # relu
h′(x,y) = y>0 ? one(x) : zero(x) # derivative of relu

#Loss
𝓁(x,a) = nll(x,a;average=true) # negative log likelihood loss, x is dxb matrix, 
                               # a is d-length integer array keeps the correct answers 
function 𝓁′(x,a)  # Note!: this will be simplified if we can figure out how to integrate derivative of getindex in to our formulatin
    indices = Knet.findindices(x,a,dims=1)
    yz = zero(x)
    yz[indices] .= 1
    return (softmax(x,dims=1) .- yz)./length(a)
end

#Forward Function
function neural_net(params,input;h=h,h′= h′)
    X     = [input]; δ     = []
    for i=1:length(params)-1
        x = params[i][1]*X[end] .+ params[i][2]         
        push!(X,h.(x)); push!(δ,h′.(x,X[end]))
    end 
    x = params[end][1]*X[end] .+ params[end][2]    
    push!(X,x); push!(δ,one.(x))
    X,δ
end

┌ Info: Recompiling stale cache file /Users/ekin/.julia/compiled/v1.1/Knet/f4vSz.ji for Knet [1902f260-5fb4-5aff-8c31-6271790ab950]
└ @ Base loading.jl:1184
ERROR: LoadError: InitError: /Users/ekin/.julia/packages/SpecialFunctions/fvheQ/deps/usr/lib/libopenspecfun.1.3.dylib cannot be opened, Please re-run Pkg.build("SpecialFunctions"), and restart Julia.
Stacktrace:
 [1] error(::String) at ./error.jl:33
 [2] check_deps() at /Users/ekin/.julia/packages/SpecialFunctions/fvheQ/deps/deps.jl:20
 [3] __init__() at /Users/ekin/.julia/packages/SpecialFunctions/fvheQ/src/SpecialFunctions.jl:12
 [4] _include_from_serialized(::String, ::Array{Any,1}) at ./loading.jl:633
 [5] _require_from_serialized(::String) at ./loading.jl:684
 [6] _require(::Base.PkgId) at ./loading.jl:967
 [7] require(::Base.PkgId) at ./loading.jl:858
 [8] require(::Module, ::Symbol) at ./loading.jl:853
 [9] include at ./boot.jl:326 [inlined]
 [10] include_relative(::Module, ::String) at ./loading.jl:1038
 [11] include(::Modu

ErrorException: Failed to precompile Knet [1902f260-5fb4-5aff-8c31-6271790ab950] to /Users/ekin/.julia/compiled/v1.1/Knet/f4vSz.ji.

In [27]:
params =[[init(n[i+1],n[i]),zeros(n[i+1])] for i=1:N] # model parameters
α = 0.5 # learning rate 
epochs=3# number of epochs to train model 
@time for i=1:epochs # 1 epoch takes ~ 65 seconds  in my macbook
    for (x,y) in dtrn
        X,δ = neural_net(params,x;h=h, h′= h′)
        D = Diagonal([[HadMul(δ[i]) ∘ RightMul(X[i]) HadMul(δ[i])] for i=1:N])
        ImL = Bidiagonal([I for i in 1:N], -[HadMul(δ[i]) ∘ LeftMul(params[i][1]) for i=2:N] , :L)
        g = push!(Any[Zero() for i=1:N-1],𝓁′(X[N+1],y))
        ∇J = D'*array.(ImL'\g);
        for i =1:length(params)
            params[i][1] = params[i][1] - α*∇J[i][1]
            params[i][2] = params[i][2] - α*sum(∇J[i][2],dims=2)
        end
    end
end

UndefVarError: UndefVarError: dtrn not defined

In [None]:
zeroone=total=0
for (x,y) in dtst
    yn        = neural_net(params,x;h=h, h′= h′)[1][end]
    answers   = vec(getindex.(argmax(yn,dims=1),1))
    global zeroone += sum(y .== answers)
    global total   += length(answers)
end
accuracy = 100zeroone/total

## Neural Network with Orthogonal Parameters

In [38]:
n = [2,2,2,1]
N = length(n)-1
B = 7
h(x) = exp(-x)
h′(x,y) = -y
𝓁(x,y) = sum(abs2,x-y)/2
𝓁′(x,y) = x-y
init(sizes...) = 0.01randn(sizes...)

init (generic function with 1 method)

In [39]:
struct OrthogonalTransform{T} <: AbstractMatrix{T}
    θ::T
end
Base.Matrix{T}(R::OrthogonalTransform{T}) where T =   [cos(R.θ) -sin(R.θ); sin(R.θ) cos(R.θ)] 
*(R::OrthogonalTransform, x::AbstractArray{T,2} where T) = [cos(R.θ) -sin(R.θ); sin(R.θ) cos(R.θ)]  * x
Base.adjoint(R::OrthogonalTransform) = OrthogonalTransform(-R.θ)
Base.size(R::OrthogonalTransform) = (2,2)
Base.getindex(R::OrthogonalTransform, inds...) = [cos(R.θ) -sin(R.θ); sin(R.θ) cos(R.θ)][inds...]
-(R::OrthogonalTransform{T}) where T = OrthogonalTransform{T}(-R.θ)
Ort(Q::OrthogonalTransform) = LinearMatrixOp(X->Q*X, X->(X-Q*X'*Q)/2) # onl definition needed so far. So I left f as identity

Ort (generic function with 1 method)

In [40]:
function neural_net(params,input;h=h,h′= h′)
    X     = [input]
    δ     = []
    for i=1:length(params)
        x = params[i][1]*X[i] .+ params[i][2]         
        push!(X,h.(x))
        push!(δ,h′.(x,X[i+1]))
    end 
    X,δ
end
array(x)= fill(x,1,1)

array (generic function with 1 method)

In [41]:
OrthogonalTransform(π/rand([1,2,3,4,6])) 

2×2 OrthogonalTransform{Float64}:
 0.707107  -0.707107
 0.707107   0.707107

In [42]:
# params: `W_i` and `b_i`s: x_{i+1} <- Wi*x_i .+ b_i
params =[[(i!=N ? OrthogonalTransform(π/rand([1,2,3,4,6])) : init(n[i+1],n[i])), init(n[i+1])] for i=1:N]
x, y = init(n[1],B), init(1,B);

In [43]:
X,δ = neural_net(params,x)
D = Diagonal([[HadMul(δ[i]) ∘ RightMul(X[i]) ∘  (i!=N ? Ort(params[i][1]) : IdentMul())     HadMul(δ[i])] for i=1:N])
ImL = Bidiagonal([I for i in 1:N], -[HadMul(δ[i]) ∘ LeftMul(params[i][1]) for i=2:N] , :L)
g = push!(Any[Zero() for i=1:N-1],𝓁′(X[N+1],y))
∇J = D'*array.(ImL'\g)

3-element Array{Array{Any,2},1}:
 [[-0.000178181 -6.77626e-21; 1.35525e-20 -0.000178181]; [0.00132289 0.00125169 … 0.00140132 0.00134872; -0.00482635 -0.00461089 … -0.00510602 -0.00491336]]
 [[-0.0151478 -0.0151478; 0.0151478 -0.0151478]; [-0.0043048 -0.00413347 … -0.00451705 -0.00438107; 0.00243219 0.00237051 … 0.00249019 0.00247303]]         
 [[-7.05508 -1.68532]; [-0.987071 -0.958283 … -1.0078 -1.00453]]                                                                                            

In [44]:
# Test
diff = Matrix(OrthogonalTransform(params[1][1].θ + 𝜀)) - Matrix(params[1][1])
@show Δ = sum(diff.* ∇J[1][1])
orig    = 𝓁(neural_net(params,x)[1][N+1],y)
params[1][1] = OrthogonalTransform(params[1][1].θ + 𝜀)
updated = 𝓁(neural_net(params,x)[1][N+1],y)
params[1][1] = OrthogonalTransform(params[1][1].θ - 𝜀)
@show Δ = updated-orig;

Δ = sum(diff .* (∇J[1])[1]) = 3.563611434665406e-8
Δ = updated - orig = 3.563559003438854e-8


In [69]:
struct HouseHolderTransform{T} <: AbstractMatrix{T}
    mat::Matrix{T}
end
HouseHolderTransform(v::Vector{T}) where T = HouseHolderTransform(I-2*v*v')
Base.Matrix{T}(R::HouseHolderTransform{T}) where T =   R.mat
*(R::HouseHolderTransform, x::AbstractArray{T,2} where T) =  R.mat* x
Base.adjoint(R::HouseHolderTransform) = HouseHolderTransform(permutedims(R.mat))
Base.size(R::HouseHolderTransform) = size(R.mat)
Base.getindex(R::HouseHolderTransform, inds...) = R.mat[inds...]
-(R::HouseHolderTransform) = -R.mat
Ort(Q::AbstractMatrix) = LinearMatrixOp(X->Q*X, X->(X-Q*X'*Q)/2)

Ort (generic function with 2 methods)

In [70]:
n = [4,4,4,1]
N = length(n)-1
B = 7

7

In [71]:
# params: `W_i` and `b_i`s: x_{i+1} <- Wi*x_i .+ b_i
vs = [init(n[i]) for i=1:N-1] # householder parameters
vs = map(v-> (v ./ norm(v)), vs)
params =[[(i!=N ? HouseHolderTransform(vs[i]) : init(n[i+1],n[i])), init(n[i+1])] for i=1:N]
x, y = init(n[1],B), init(1,B);

In [72]:
X,δ = neural_net(params,x)
D = Diagonal([[HadMul(δ[i]) ∘ RightMul(X[i]) ∘  (i!=N ? Ort(params[i][1]) : IdentMul())     HadMul(δ[i])] for i=1:N])
ImL = Bidiagonal([I for i in 1:N], -[HadMul(δ[i]) ∘ LeftMul(params[i][1]) for i=2:N] , :L)
g = push!(Any[Zero() for i=1:N-1],𝓁′(X[N+1],y))
∇J = D'*array.(ImL'\g)

3-element Array{Array{Any,2},1}:
 [[-2.06631e-5 -1.56317e-6 -3.58314e-5 -6.15716e-5; 6.10719e-7 6.80924e-6 3.59571e-6 2.74547e-5; -1.61916e-5 1.08716e-5 -2.37478e-5 -2.05755e-6; -3.28906e-5 3.33844e-5 -4.448e-5 3.76017e-5]; [-0.00364452 -0.00362456 … -0.00356906 -0.00360129; 0.000655312 0.000630484 … 0.000633767 0.000627804; -0.00185489 -0.00184147 … -0.0018252 -0.00183877; -0.00291976 -0.00288985 … -0.00285274 -0.00289904]]
 [[-0.0138175 0.00587693 -0.00567307 -0.0107641; 0.00595036 0.0130423 0.00890904 0.0069044; -0.00198221 0.0111853 0.00348026 -3.75971e-5; -0.00503015 0.0104691 0.00139348 -0.002705]; [0.000836869 0.000829042 … 0.000814388 0.000834085; 0.00350536 0.0034517 … 0.00337857 0.00342527; 0.00267704 0.00264306 … 0.00263368 0.0026432; 0.00235646 0.00237045 … 0.00230256 0.00230227]]                     
 [[-1.80904 -4.15316 -2.81943 -2.42074]; [-0.991478 -0.984398 … -0.969487 -0.981775]]                                                                                          

In [73]:
∇J[1][1]

4×4 Array{Float64,2}:
 -2.06631e-5  -1.56317e-6  -3.58314e-5  -6.15716e-5
  6.10719e-7   6.80924e-6   3.59571e-6   2.74547e-5
 -1.61916e-5   1.08716e-5  -2.37478e-5  -2.05755e-6
 -3.28906e-5   3.33844e-5  -4.448e-5     3.76017e-5

In [74]:
# Test
vs1_changed = vs[1] .+ 𝜀
vs1_changed = vs1_changed ./ norm(vs1_changed)
#vs1_changed[1] = vs[1][1] + 𝜀
diff = Matrix(HouseHolderTransform(vs1_changed)) - Matrix(params[1][1])
@show Δ = sum(diff.* ∇J[1][1])
orig    = 𝓁(neural_net(params,x)[1][N+1],y)
params[1][1] = HouseHolderTransform(vs1_changed)
updated = 𝓁(neural_net(params,x)[1][N+1],y)
params[1][1] = HouseHolderTransform(vs[1])
@show Δ = updated-orig;

Δ = sum(diff .* (∇J[1])[1]) = -4.686944645157155e-8
Δ = updated - orig = -4.687085031207516e-8


In [75]:
vs1_changed 

4-element Array{Float64,1}:
 -0.7455803491491308
  0.4113186460752907
 -0.4465610952799094
  0.2747910161119856