Merge 49747b0 into 467c053

XtractOpen · Feb 22, 2018 · ff7b9ff · ff7b9ff
2 parents 467c053 + 49747b0
commit ff7b9ff
Show file tree

Hide file tree

Showing 28 changed files with 862 additions and 664 deletions.
diff --git a/REQUIRE b/REQUIRE
@@ -3,4 +3,5 @@ LinearOperators
 MAT
 PkgDev
 JLD
-BenchmarkTools
+BenchmarkTools
+DistributedArrays
diff --git a/benchmarks/CIFAR10/cifar10_512_64.jl b/benchmarks/CIFAR10/cifar10_512_64.jl
@@ -67,7 +67,7 @@ pLoss = getSoftMaxLoss(TYPE);
 objFun = dnnObjFctn(net,pLoss,pRegTh,pRegW)
 opt = getSGDsolver(TYPE,learningRate=1e-2,maxEpochs=1,miniBatch=miniBatchSize,out=true)
 
-W      = 0.1*vec(randn(TYPE,10,nFeatOut(net)+1));
+W      = 0.01*vec(randn(TYPE,10,nFeatOut(net)+1));
 W = min.(W,.2);
 W = max.(W,-.2);
 W = convert(Array{TYPE},W);

diff --git a/benchmarks/micro/bm_batchnorm.jl b/benchmarks/micro/bm_batchnorm.jl
@@ -43,7 +43,8 @@ end
 
 function benchmarkApply(L, theta, Y, history)
     funcName = "apply"
-    Yout2,Yout2,tmp2 = apply(L,theta,Y,true)
+    Q = copy(Y)
+    Yout,Yout,tmp = apply(L,theta,Q,true)
 
     @code_warntype apply(L,theta,Y,true)
 
@@ -52,4 +53,6 @@ function benchmarkApply(L, theta, Y, history)
     Meganet.updatehistory!(history, trial, "hist")
     hist = JLD.load(history, "hist")
     judge(hist)
+
+    return trial
 end
diff --git a/benchmarks/micro/bm_batchnorm.jld b/benchmarks/micro/bm_batchnorm.jld
diff --git a/examples/EResNN_CIFAR10.jl b/examples/EResNN_CIFAR10.jl
@@ -1,6 +1,6 @@
 using MAT, Meganet
-
-n = 256;
+# BLAS.set_num_threads(1)
+n = 512;
 Y_train,C_train,Y_test,C_test = getCIFAR10(n,Pkg.dir("Meganet")*"/data/CIFAR10/");
 
 # using PyPlot
@@ -72,7 +72,7 @@ pRegTh = getTikhonovReg(TYPE;alpha=4e-4)
 pRegW = getTikhonovReg(TYPE;alpha=4e-4)
 pLoss = getSoftMaxLoss(TYPE);
 objFun = dnnObjFctn(net,pLoss,pRegTh,pRegW)
-opt = getSGDsolver(TYPE,learningRate=1e-2,maxEpochs=1,miniBatch=miniBatchSize,out=true)
+opt = getSGDsolver(TYPE,learningRate=1e-2,maxEpochs=1,miniBatch=miniBatchSize,out=true, nesterov=true)
 
 W      = 0.1*vec(randn(TYPE,10,nFeatOut(net)+1));
 W = min.(W,.2);

diff --git a/src/Meganet.jl b/src/Meganet.jl
@@ -17,6 +17,7 @@ include("kernelTypes/sparseKernel.jl")
 include("kernelTypes/convFFTKernel.jl");
 include("kernelTypes/convGEMMKernel.jl");
 include("kernelTypes/convCircKernel.jl");
+# include("kernelTypes/convDiagKernel.jl");
 
 
 

diff --git a/src/activations/identityActivation.jl b/src/activations/identityActivation.jl
@@ -17,13 +17,15 @@ export identityActivation
   A  - activation
   dA - derivatives
 """
-function identityActivation(Y::Array{T},doDerivative::Bool=false) where {T}
+function identityActivation(Y::Array{T},dA,doDerivative::Bool=false) where {T}
 
 if doDerivative
-    dA = ones(T,Y);
-else
-    dA = zeros(T,0)
+    if isempty(dA)
+        dA = ones(T,Y);
+    else
+        dA .= ones(T,Y);
+    end
 end
 
-return A,dA
+return A,dA #Depricated? A Isnt even declared lol
 end
diff --git a/src/activations/reluActivation.jl b/src/activations/reluActivation.jl
@@ -1,31 +1,45 @@
-export reluActivation
-
-"""
- relu activation A = relu(Y)
-
- Input:
-
-   Y - array of features
-
- Optional Input:
-
-   doDerivative - flag for computing derivative, set via varargin
-                  Ex: reluActivation(Y,true);
-
- Output:
-
-  A  - activation
-  dA - derivatives
-"""
-function reluActivation(Y::Array{T},doDerivative::Bool=false) where {T}
-
-Y = max.(Y,0);
-
-if doDerivative
-    dA = sign(Y);
-else
-    dA = zeros(T,0)
-end
-
-return A,dA
-end
+export reluActivation, reluActivation!
+
+"""
+ relu activation A = relu(Y)
+
+ Input:
+
+   Y - array of features
+
+ Optional Input:
+
+   doDerivative - flag for computing derivative, set via varargin
+                  Ex: reluActivation(Y,true);
+
+ Output:
+
+  A  - activation
+  dA - derivatives
+"""
+function reluActivation(Y::Array{T},doDerivative::Bool=false) where {T}
+
+    A = max.(Y,zero(T));
+
+    if doDerivative
+        dA = sign.(A);
+    else
+        dA = zeros(T,0)
+    end
+
+    return A,dA
+end
+
+
+
+function reluActivation!(A::Array{T},dA,doDerivative::Bool=false) where {T}
+    A .= max.(A,zero(T));   
+    if doDerivative
+        if isempty(dA)
+            dA = sign.(A);
+        else
+            dA .= sign.(A);
+        end
+    end
+    return A,dA
+end
diff --git a/src/activations/tanhActivation.jl b/src/activations/tanhActivation.jl
@@ -1,4 +1,4 @@
-export tanhActivation
+export tanhActivation, tanhActivation!
 
 """
  hyperbolic tan activation A = tanh(Y)
@@ -19,11 +19,23 @@ export tanhActivation
 """
 function tanhActivation(Y::Array{T,2},doDerivative::Bool=false) where {T <: Number}
 
-
     A = tanh.(Y)
     dA = zeros(A)
     if doDerivative
          dA .= one(T) .- A.^2
     end
     return A, dA
 end
+
+function tanhActivation!(A::Array{T,2},dA=[],doDerivative::Bool=false) where {T <: Number}
+
+    A .= tanh.(A)
+    if doDerivative
+        if isempty(dA)
+            dA = one(T) .- A.^2
+        else
+            dA .= one(T) .- A.^2
+        end
+    end
+    return A, dA
+end
diff --git a/src/integrators/NN.jl b/src/integrators/NN.jl
@@ -1,5 +1,6 @@
 export NN,getNN,initTheta
-
+# using TimerOutputs
+# to = TimerOutput()
 """
 NN Neural Network block
 
@@ -62,31 +63,47 @@ end
 
 
 # --------- forward problem ----------
-function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) where {T<:Number}
-    Y::Array{T,2}  = copy(Y0)
+function apply(this::NN{T},theta::Array{T},Y::Array{T,2},tmp,doDerivative=true) where {T<:Number}
+
     nex = div(length(Y),nFeatIn(this))::Int
     nt = length(this.layers)
 
-    tmp = Array{Any}(nt+1,2)
+    if isempty(tmp) #TODO Will have to make sure size of Y doesnt change
+        tmp = Array{Any}(nt+1,2)
+    end
+
     if doDerivative
-        tmp[1,1] = Y0
+        if isassigned(tmp,1,1)
+            #tmp[1,1] .= Y This does not work, need to hack like below :)
+            tmp11 = tmp[1,1]
+            tmp11 .= Y
+        else
+            tmp[1,1] = copy(Y)
+        end
     end
 
     Ydata::Array{T,2} = zeros(T,0,nex)
     cnt = 0
     for i=1:nt
         ni = nTheta(this.layers[i])::Int
+        if !isassigned(tmp,i,2)
+            tmp[i,2] = Array{Any}(0)
+        end
+        Yd::Array{T,2}, Y, tmp[i,2] = apply(this.layers[i],theta[cnt+(1:ni)],Y,tmp[i,2],doDerivative)
 
-        Yd::Array{T,2}, Y, tmp[i,2] = apply(this.layers[i],theta[cnt+(1:ni)],Y,doDerivative)
         if this.outTimes[i]==1
             Ydata = [Ydata; this.Q*Yd]
         end
         if doDerivative
-            tmp[i+1,1] = copy(Y)
+            if isassigned(tmp,i+1,1)
+                tmp1 = tmp[i+1,1]
+                tmp1 .= Y
+            else
+                tmp[i+1,1] = copy(Y)
+            end
         end
         cnt = cnt + ni
     end
-
     return Ydata,Y,tmp
 end
 

diff --git a/src/integrators/ResNN.jl b/src/integrators/ResNN.jl
@@ -48,27 +48,46 @@ function initTheta(this::ResNN{T}) where {T<:Number}
 end
 
 # ------- apply forward problems -----------
-function  apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},doDerivative=true) where {T<:Number}
+function  apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},tmp,doDerivative=true) where {T<:Number}
+    if isempty(tmp)
+        tmp = Array{Any}(this.nt+1,2)
+    end
 
-    nex = div(length(Y0),nFeatIn(this))
-    Y   = reshape(Y0,:,nex)
-    tmp = Array{Any}(this.nt+1,2)
     if doDerivative
-        tmp[1,1] = Y0
+        if isassigned(tmp,1,1)
+            tmp11 = tmp[1,1]
+            tmp11 .= Y0
+        else
+            tmp[1,1] = copy(Y0)
+        end
     end
 
+    nex = div(length(Y0),nFeatIn(this))
+    Y   = reshape(Y0,:,nex)
+
+
     theta = reshape(theta_in,:,this.nt)
 
     Ydata::Array{T,2} = zeros(T,0,nex)
     for i=1:this.nt
-        Z,dummy,tmp[i,2] = apply(this.layer,theta[:,i],Y,doDerivative)
-        Y +=  this.h * Z
-        if doDerivative
-            tmp[i+1,1] = Y
+        if !isassigned(tmp,i,2)
+            tmp[i,2] = Array{Any}(0)
         end
+        Z,dummy,tmp[i,2] = apply(this.layer,theta[:,i],Y,tmp[i,2],doDerivative)
+        Y +=  this.h * Z
         if this.outTimes[i]==1
             Ydata = [Ydata;this.Q*Y]
         end
+
+        if doDerivative
+            if isassigned(tmp,i+1,1)
+                tmp1 = tmp[i+1,1]
+                tmp1 .= Y
+            else
+                tmp[i+1,1] = copy(Y)
+            end
+        end
+
     end
     return Ydata,Y,tmp
 end

diff --git a/src/integrators/batchNormNN.jl b/src/integrators/batchNormNN.jl
@@ -53,14 +53,21 @@ end
 
 
 # --------- forward problem ----------
-function apply(this::batchNormNN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) where {T<:Number}
-    Y::Array{T,2}  = copy(Y0)
+function apply(this::batchNormNN{T},theta::Array{T},Y::Array{T,2},tmp::Array,doDerivative=true) where {T<:Number}
     nex = div(length(Y),nFeatIn(this))::Int
     nt = length(this.layers)
 
-    tmp = Array{Any}(nt+1,2)
+    if isempty(tmp) #TODO Will have to make sure size of Y doesnt change
+        tmp = Array{Any}(nt+1,2)
+    end
+
     if doDerivative
-        tmp[1,1] = Y0
+        if isassigned(tmp,1,1)
+            tmp11 = tmp[1,1]
+            tmp11 .= Y
+        else
+            tmp[1,1] = copy(Y)
+        end
     end
 
     Ydata::Array{T,2} = zeros(T,0,nex)
@@ -73,7 +80,12 @@ function apply(this::batchNormNN{T},theta::Array{T},Y0::Array{T,2},doDerivative=
             Ydata = [Ydata; this.Q*Yd]
         end
         if doDerivative
-            tmp[i+1,1] = copy(Y)
+            if isassigned(tmp,i+1,1)
+                tmp1 = tmp[i+1,1]
+                tmp1 .= Y
+            else
+                tmp[i+1,1] = copy(Y)
+            end
         end
         cnt = cnt + ni
     end

diff --git a/src/integrators/connector.jl b/src/integrators/connector.jl
@@ -17,16 +17,24 @@ function getConnector(TYPE::Type, K; b = zero(TYPE),outTimes=0,Q=I)
 	return Connector(K,b,outTimes,Q);
 end
 
-
-function apply(this::Connector{T},theta::Array{T},Y0::Array{T},doDerivative=true) where {T <: Number}
+function apply(this::Connector{T},theta::Array{T},Y0::Array{T},tmp,doDerivative=true) where {T <: Number}
     nex = div(length(Y0),nFeatIn(this))
     Y0  = reshape(Y0,:,nex)
+
+    if doDerivative
+        if isempty(tmp)
+            tmp = copy(Y0)
+        else
+            tmp .= Y0
+        end
+    end
+
     Y = this.K*Y0 .+ this.b
     Ydata::Array{T,2} = Array{T, 2}(0, 0) # Temporary fix until we know what type Q is
     if this.outTimes==1
         Ydata = this.Q*Y
     end
-    tmp = Y0;
+
     return Ydata, Y, tmp
 end