Skip to content

Commit

Permalink
Merge 01f3132 into b90c998
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBegert committed Feb 19, 2018
2 parents b90c998 + 01f3132 commit 7a0891b
Show file tree
Hide file tree
Showing 16 changed files with 294 additions and 201 deletions.
184 changes: 92 additions & 92 deletions examples/EResNN_CIFAR10.jl
Original file line number Diff line number Diff line change
@@ -1,92 +1,92 @@
using MAT, Meganet
# BLAS.set_num_threads(1)
n = 512;
Y_train,C_train,Y_test,C_test = getCIFAR10(n,Pkg.dir("Meganet")*"/data/CIFAR10/");

# using PyPlot
# y = Y_train[:,50]; y = y - minimum(y); y = y./maximum(y);
# y = reshape(y,32,32,3);
# y[:,:,1] = y[:,:,1]';y[:,:,2] = y[:,:,2]';y[:,:,3] = y[:,:,3]';
# figure(); imshow(y)

miniBatchSize = 64;
nImg = [32; 32]
cin = 3
nc = [16;32;64;64]
nt = 2*[1;1;1]
h = [1.;1.;1.]

TYPE = Float32;

getConvKernel = (nImg,sK) -> getConvGEMMKernel(TYPE,nImg,sK);
# getConvKernel = (nImg,sK) -> getConvFFTKernel(TYPE,nImg,sK);
#getConvKernel = (nImg,sK) -> getSparseConvKernel2D(TYPE,nImg,sK);

# opening layer
K1 = getConvKernel(nImg,[3,3,cin,nc[1]]);

nL = getBatchNormLayer(TYPE,[prod(nImg);nc[1]],isTrainable=true);
blocks = [getSingleLayer(TYPE,K1,nL)]

for k=1:length(nt)
# ResNN layers
K2 = getConvKernel(nImg,[3,3,nc[k],nc[k]])
nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k]],isTrainable=true)
L2 = getDoubleSymLayer(TYPE,K2,nL)
RN = getResNN(TYPE,L2,nt[k],h[k])

if k<length(nt)
RN.outTimes *=0
end
blocks = [blocks;RN]
# change channels
Kc = getConvKernel(nImg,[1,1,nc[k],nc[k+1]]);

nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k+1]],isTrainable=true)
blocks = [blocks; getSingleLayer(TYPE,Kc,nL)]

if k<length(nt)
Kp = getAverageMatrix(TYPE,nImg,nc[k+1])
blocks = [blocks; getConnector(TYPE,Kp)]
# nImg ./=2
nImg = div.(nImg,2)
end
end

# Connector block
B = kron(speye(TYPE,nc[end]),ones(TYPE, prod(nImg)))/prod(nImg);
blocks = [blocks; getConnector(TYPE,B')];
blocks[end].outTimes=1

net = getNN(blocks)
theta = initTheta(net);

display(net)

# @time Zj = apply(net,theta,Y_train[:,1:2],true)
# @time Zj = apply(net,theta,Y_train[:,1:miniBatchSize],true)


# regularizers
pRegTh = getTikhonovReg(TYPE;alpha=4e-4)
pRegW = getTikhonovReg(TYPE;alpha=4e-4)
pLoss = getSoftMaxLoss(TYPE);
objFun = dnnObjFctn(net,pLoss,pRegTh,pRegW)
opt = getSGDsolver(TYPE,learningRate=1e-2,maxEpochs=1,miniBatch=miniBatchSize,out=true, nesterov=true)

W = 0.1*vec(randn(TYPE,10,nFeatOut(net)+1));
W = min.(W,.2);
W = max.(W,-.2);
W = convert(Array{TYPE},W);

solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test)
@time solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test)

# Profile.clear()
# Profile.clear_malloc_data()
# Profile.init(n = 10^7, delay = 0.01)
# @profile solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test)

# open("/tmp/EREsNN_CIFAR10.txt", "w") do s
# Profile.print(IOContext(s, :displaysize => (24, 500)))
# end
using MAT, Meganet
# BLAS.set_num_threads(1)
n = 512;
Y_train,C_train,Y_test,C_test = getCIFAR10(n,Pkg.dir("Meganet")*"/data/CIFAR10/");

# using PyPlot
# y = Y_train[:,50]; y = y - minimum(y); y = y./maximum(y);
# y = reshape(y,32,32,3);
# y[:,:,1] = y[:,:,1]';y[:,:,2] = y[:,:,2]';y[:,:,3] = y[:,:,3]';
# figure(); imshow(y)

miniBatchSize = 64;
nImg = [32; 32]
cin = 3
nc = [16;32;64;64]
nt = 2*[1;1;1]
h = [1.;1.;1.]

TYPE = Float32;

getConvKernel = (nImg,sK) -> getConvGEMMKernel(TYPE,nImg,sK);
# getConvKernel = (nImg,sK) -> getConvFFTKernel(TYPE,nImg,sK);
#getConvKernel = (nImg,sK) -> getSparseConvKernel2D(TYPE,nImg,sK);

# opening layer
K1 = getConvKernel(nImg,[3,3,cin,nc[1]]);

nL = getBatchNormLayer(TYPE,[prod(nImg);nc[1]],isTrainable=true);
blocks = [getSingleLayer(TYPE,K1,nL)]

for k=1:length(nt)
# ResNN layers
K2 = getConvKernel(nImg,[3,3,nc[k],nc[k]])
nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k]],isTrainable=true)
L2 = getDoubleSymLayer(TYPE,K2,nL)
RN = getResNN(TYPE,L2,nt[k],h[k])

if k<length(nt)
RN.outTimes *=0
end
blocks = [blocks;RN]
# change channels
Kc = getConvKernel(nImg,[1,1,nc[k],nc[k+1]]);

nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k+1]],isTrainable=true)
blocks = [blocks; getSingleLayer(TYPE,Kc,nL)]

if k<length(nt)
Kp = getAverageMatrix(TYPE,nImg,nc[k+1])
blocks = [blocks; getConnector(TYPE,Kp)]
# nImg ./=2
nImg = div.(nImg,2)
end
end

# Connector block
B = kron(speye(TYPE,nc[end]),ones(TYPE, prod(nImg)))/prod(nImg);
blocks = [blocks; getConnector(TYPE,B')];
blocks[end].outTimes=1

net = getNN(blocks)
theta = initTheta(net);

display(net)

# @time Zj = apply(net,theta,Y_train[:,1:2],true)
# @time Zj = apply(net,theta,Y_train[:,1:miniBatchSize],true)


# regularizers
pRegTh = getTikhonovReg(TYPE;alpha=4e-4)
pRegW = getTikhonovReg(TYPE;alpha=4e-4)
pLoss = getSoftMaxLoss(TYPE);
objFun = dnnObjFctn(net,pLoss,pRegTh,pRegW)
opt = getSGDsolver(TYPE,learningRate=1e-2,maxEpochs=1,miniBatch=miniBatchSize,out=true, nesterov=true)

W = 0.1*vec(randn(TYPE,10,nFeatOut(net)+1));
W = min.(W,.2);
W = max.(W,-.2);
W = convert(Array{TYPE},W);

solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test)
@time solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test)

# Profile.clear()
# Profile.clear_malloc_data()
# Profile.init(n = 10^7, delay = 0.01)
# @profile solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test)

# open("/tmp/EREsNN_CIFAR10.txt", "w") do s
# Profile.print(IOContext(s, :displaysize => (24, 500)))
# end
12 changes: 7 additions & 5 deletions src/activations/identityActivation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ export identityActivation
A - activation
dA - derivatives
"""
function identityActivation(Y::Array{T},doDerivative::Bool=false) where {T}
function identityActivation(Y::Array{T},dA,doDerivative::Bool=false) where {T}

if doDerivative
dA = ones(T,Y);
else
dA = zeros(T,0)
if isempty(dA)
dA = ones(T,Y);
else
dA .= ones(T,Y);
end
end

return A,dA
return A,dA #Depricated? A Isnt even declared lol
end
35 changes: 18 additions & 17 deletions src/activations/reluActivation.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export reluActivation
export reluActivation, reluActivation!

"""
relu activation A = relu(Y)
Expand All @@ -19,26 +19,27 @@ export reluActivation
"""
function reluActivation(Y::Array{T},doDerivative::Bool=false) where {T}

A = max.(Y,zero(T));
A = max.(Y,zero(T));

if doDerivative
dA = sign.(A);
else
dA = zeros(T,0)
end
if doDerivative
dA = sign.(A);
else
dA = zeros(T,0)
end

return A,dA
return A,dA
end



function reluActivation!(A::Array{T},dA::Array{T} = zeros(T,size(A)),doDerivative::Bool=false) where {T}
A .= max.(A,zero(T));
if doDerivative
dA .= sign.(A);
else
dA = zeros(T,0)
end

return A,dA
function reluActivation!(A::Array{T},dA,doDerivative::Bool=false) where {T}
A .= max.(A,zero(T));
if doDerivative
if isempty(dA)
dA = sign.(A);
else
dA .= sign.(A);
end
end
return A,dA
end
16 changes: 14 additions & 2 deletions src/activations/tanhActivation.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export tanhActivation
export tanhActivation, tanhActivation!

"""
hyperbolic tan activation A = tanh(Y)
Expand All @@ -19,11 +19,23 @@ export tanhActivation
"""
function tanhActivation(Y::Array{T,2},doDerivative::Bool=false) where {T <: Number}


A = tanh.(Y)
dA = zeros(A)
if doDerivative
dA .= one(T) .- A.^2
end
return A, dA
end

function tanhActivation!(A::Array{T,2},dA=[],doDerivative::Bool=false) where {T <: Number}

A .= tanh.(A)
if doDerivative
if isempty(dA)
dA = one(T) .- A.^2
else
dA .= one(T) .- A.^2
end
end
return A, dA
end
32 changes: 25 additions & 7 deletions src/integrators/NN.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,26 +63,44 @@ end


# --------- forward problem ----------
function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) where {T<:Number}
Y::Array{T,2} = copy(Y0)
function apply(this::NN{T},theta::Array{T},Y::Array{T,2},tmp,doDerivative=true) where {T<:Number}

nex = div(length(Y),nFeatIn(this))::Int
nt = length(this.layers)

tmp = Array{Any}(nt+1,2)

if isempty(tmp) #TODO Will have to make sure size of Y doesnt change
tmp = Array{Any}(nt+1,2)
end

if doDerivative
tmp[1,1] = Y0
if isassigned(tmp,1,1)
#tmp[1,1] .= Y This does not work, need to hack like below :)
tmp11 = tmp[1,1]
tmp11 .= Y
else
tmp[1,1] = copy(Y)
end
end

Ydata::Array{T,2} = zeros(T,0,nex)
cnt = 0
for i=1:nt
ni = nTheta(this.layers[i])::Int
Yd::Array{T,2}, Y, tmp[i,2] = apply(this.layers[i],theta[cnt+(1:ni)],Y,doDerivative)
if !isassigned(tmp,i,2)
tmp[i,2] = Array{Any}(0)
end
Yd::Array{T,2}, Y, tmp[i,2] = apply(this.layers[i],theta[cnt+(1:ni)],Y,tmp[i,2],doDerivative)

if this.outTimes[i]==1
Ydata = [Ydata; this.Q*Yd]
end
if doDerivative
tmp[i+1,1] = copy(Y)
if isassigned(tmp,i+1,1)
tmp1 = tmp[i+1,1]
tmp1 .= Y
else
tmp[i+1,1] = copy(Y)
end
end
cnt = cnt + ni
end
Expand Down
37 changes: 28 additions & 9 deletions src/integrators/ResNN.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,27 +48,46 @@ function initTheta(this::ResNN{T}) where {T<:Number}
end

# ------- apply forward problems -----------
function apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},doDerivative=true) where {T<:Number}
function apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},tmp,doDerivative=true) where {T<:Number}
if isempty(tmp)
tmp = Array{Any}(this.nt+1,2)
end

nex = div(length(Y0),nFeatIn(this))
Y = reshape(Y0,:,nex)
tmp = Array{Any}(this.nt+1,2)
if doDerivative
tmp[1,1] = Y0
if isassigned(tmp,1,1)
tmp11 = tmp[1,1]
tmp11 .= Y0
else
tmp[1,1] = copy(Y0)
end
end

nex = div(length(Y0),nFeatIn(this))
Y = reshape(Y0,:,nex)


theta = reshape(theta_in,:,this.nt)

Ydata::Array{T,2} = zeros(T,0,nex)
for i=1:this.nt
Z,dummy,tmp[i,2] = apply(this.layer,theta[:,i],Y,doDerivative)
Y += this.h * Z
if doDerivative
tmp[i+1,1] = Y
if !isassigned(tmp,i,2)
tmp[i,2] = Array{Any}(0)
end
Z,dummy,tmp[i,2] = apply(this.layer,theta[:,i],Y,tmp[i,2],doDerivative)
Y += this.h * Z
if this.outTimes[i]==1
Ydata = [Ydata;this.Q*Y]
end

if doDerivative
if isassigned(tmp,i+1,1)
tmp1 = tmp[i+1,1]
tmp1 .= Y
else
tmp[i+1,1] = copy(Y)
end
end

end
return Ydata,Y,tmp
end
Expand Down

0 comments on commit 7a0891b

Please sign in to comment.