# Convolutional Neural Networks with Knet
### Ref: https://github.com/denizyuret/Knet.jl/blob/master/tutorial/50.cnn.ipynb

In [1]:
# Setup display width, load packages, import symbols
ENV["COLUMNS"]=72
using Base.Iterators: flatten
using IterTools: ncycle, takenth
using Statistics: mean
using MLDatasets: MNIST
using FileIO: load, save
using CUDA: CUDA, CuArray # functional
using Knet: Knet, conv4, pool, mat, nll, accuracy, progress, sgd, param, param0, dropout, relu, minibatch, Data

In [2]:
# Convolution in 1-D
w = reshape([1.0,2.0,3.0], (3,1,1,1)); @show w
x = reshape([1.0:7.0...], (7,1,1,1)); @show x
@show y = conv4(w, x);  # size Y = X - W + 1 = 5 by default

w = [1.0; 2.0; 3.0;;;;]
x = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0; 7.0;;;;]
y = conv4(w, x) = [10.0; 16.0; 22.0; 28.0; 34.0;;;;]


In [3]:
# Padding
w = reshape([1.0,2.0,3.0], (3,1,1,1)); @show w
x = reshape([1.0:7.0...], (7,1,1,1)); @show x
@show y2 = conv4(w, x, padding=(1,0));  # size Y = X + 2P - W + 1 = 7 with padding=1
# To preserve input size (Y=X) for a given W, what padding P should we use?

w = [1.0; 2.0; 3.0;;;;]
x = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0; 7.0;;;;]
y2 = conv4(w, x, padding = (1, 0)) = [4.0; 10.0; 16.0; 22.0; 28.0; 34.0; 32.0;;;;]


In [4]:
# Stride
w = reshape([1.0,2.0,3.0], (3,1,1,1)); @show w
x = reshape([1.0:7.0...], (7,1,1,1)); @show x
@show y3 = conv4(w, x; padding=(1,0), stride=3);  # size Y = 1 + floor((X+2P-W)/S)

w = [1.0; 2.0; 3.0;;;;]
x = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0; 7.0;;;;]
y3 = conv4(w, x; padding = (1, 0), stride = 3) = [4.0; 22.0; 32.0;;;;]


In [5]:
# Mode
w = reshape([1.0,2.0,3.0], (3,1,1,1)); @show w
x = reshape([1.0:7.0...], (7,1,1,1)); @show x
@show y4 = conv4(w, x, mode=0);  # Default mode (convolution) inverts w
@show y5 = conv4(w, x, mode=1);  # mode=1 (cross-correlation) does not invert w

w = [1.0; 2.0; 3.0;;;;]
x = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0; 7.0;;;;]
y4 = conv4(w, x, mode = 0) = [10.0; 16.0; 22.0; 28.0; 34.0;;;;]
y5 = conv4(w, x, mode = 1) = [14.0; 20.0; 26.0; 32.0; 38.0;;;;]


In [6]:
# Convolution in more dimensions
x = reshape([1.0:9.0...], (3,3,1,1))

3×3×1×1 Array{Float64, 4}:
[:, :, 1, 1] =
 1.0  4.0  7.0
 2.0  5.0  8.0
 3.0  6.0  9.0

In [7]:
w = reshape([1.0:4.0...], (2,2,1,1))

2×2×1×1 Array{Float64, 4}:
[:, :, 1, 1] =
 1.0  3.0
 2.0  4.0

In [8]:
y = conv4(w, x)

2×2×1×1 Array{Float64, 4}:
[:, :, 1, 1] =
 23.0  53.0
 33.0  63.0

In [9]:
# Convolution with multiple channels, filters, and instances
# size X = [X1,X2,...,Xd,Cx,N] where d is the number of dimensions, Cx is channels, N is instances
x = reshape([1.0:18.0...], (3,3,2,1))

3×3×2×1 Array{Float64, 4}:
[:, :, 1, 1] =
 1.0  4.0  7.0
 2.0  5.0  8.0
 3.0  6.0  9.0

[:, :, 2, 1] =
 10.0  13.0  16.0
 11.0  14.0  17.0
 12.0  15.0  18.0

In [11]:
# size W = [W1,W2,...,Wd,Cx,Cy] where d is the number of dimensions, Cx is input channels, Cy is output channels
w = reshape([1.0:24.0...], (2,2,2,3))

2×2×2×3 Array{Float64, 4}:
[:, :, 1, 1] =
 1.0  3.0
 2.0  4.0

[:, :, 2, 1] =
 5.0  7.0
 6.0  8.0

[:, :, 1, 2] =
  9.0  11.0
 10.0  12.0

[:, :, 2, 2] =
 13.0  15.0
 14.0  16.0

[:, :, 1, 3] =
 17.0  19.0
 18.0  20.0

[:, :, 2, 3] =
 21.0  23.0
 22.0  24.0

In [12]:
# size Y = [Y1,Y2,...,Yd,Cy,N]  where Yi = 1 + floor((Xi+2Pi-Wi)/Si), Cy is channels, N is instances
y = conv4(w,x)

2×2×3×1 Array{Float64, 4}:
[:, :, 1, 1] =
 328.0  436.0
 364.0  472.0

[:, :, 2, 1] =
 808.0  1108.0
 908.0  1208.0

[:, :, 3, 1] =
 1288.0  1780.0
 1452.0  1944.0

# Pooling

In [14]:
# 1-D pooling example
x = reshape([1.0:6.0...], (6,1,1,1)); @show x
@show pool(x; window=(2,1));

x = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0;;;;]
pool(x; window = (2, 1)) = [2.0; 4.0; 6.0;;;;]


In [15]:
# Window size
x = reshape([1.0:6.0...], (6,1,1,1)); @show x
@show pool(x; window=(3,1));  # size Y = floor(X/W)

x = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0;;;;]
pool(x; window = (3, 1)) = [3.0; 6.0;;;;]


In [16]:
# Padding
x = reshape([1.0:6.0...], (6,1,1,1)); @show x
@show pool(x; window=(2,1), padding=(1,0));  # size Y = floor((X+2P)/W)

x = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0;;;;]
pool(x; window = (2, 1), padding = (1, 0)) = [1.0; 3.0; 5.0; 6.0;;;;]


In [17]:
# Stride
x = reshape([1.0:10.0...], (10,1,1,1)); @show x
@show pool(x; window=(2,1), stride=4);  # size Y = 1 + floor((X+2P-W)/S)

x = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0; 7.0; 8.0; 9.0; 10.0;;;;]
pool(x; window = (2, 1), stride = 4) = [2.0; 6.0; 10.0;;;;]


In [18]:
# More dimensions
x = reshape([1.0:16.0...], (4,4,1,1))

4×4×1×1 Array{Float64, 4}:
[:, :, 1, 1] =
 1.0  5.0   9.0  13.0
 2.0  6.0  10.0  14.0
 3.0  7.0  11.0  15.0
 4.0  8.0  12.0  16.0

In [19]:
pool(x)

2×2×1×1 Array{Float64, 4}:
[:, :, 1, 1] =
 6.0  14.0
 8.0  16.0

In [20]:
# Multiple channels and instances
x = reshape([1.0:32.0...], (4,4,2,1))

4×4×2×1 Array{Float64, 4}:
[:, :, 1, 1] =
 1.0  5.0   9.0  13.0
 2.0  6.0  10.0  14.0
 3.0  7.0  11.0  15.0
 4.0  8.0  12.0  16.0

[:, :, 2, 1] =
 17.0  21.0  25.0  29.0
 18.0  22.0  26.0  30.0
 19.0  23.0  27.0  31.0
 20.0  24.0  28.0  32.0

# Experiment

In [21]:
# Load MNIST data
xtrn,ytrn = MNIST.traindata(Float32); ytrn[ytrn.==0] .= 10
xtst,ytst = MNIST.testdata(Float32);  ytst[ytst.==0] .= 10
dtrn = minibatch(xtrn, ytrn, 100; xsize = (28,28,1,:))
dtst = minibatch(xtst, ytst, 100; xsize = (28,28,1,:));

In [28]:
(x,y) = first(dtst)
println.(summary.((x,y)));

28×28×1×100 Array{Float32, 4}
100-element Vector{Int64}


In [32]:
# For running experiments
function trainresults(file,model; o...)
    if (print("Train from scratch? "); readline()[1]=='y')
        r = ((model(dtrn), model(dtst), 1-accuracy(model,data=dtrn), 1-accuracy(model,data=dtst))
             for x in takenth(progress(sgd(model,ncycle(dtrn,100))),length(dtrn)))
        r = reshape(collect(Float32,flatten(r)),(4,:))
        save(file,"results",r)
        GC.gc(true) # To save gpu memory
    else
        isfile(file) || download("https://github.com/denizyuret/Knet.jl/releases/download/v1.4.9/$file",file)
        r = load(file,"results")
    end
    println(minimum(r,dims=2))
    return r
end

trainresults (generic function with 1 method)

# Model

In [42]:
# Define a convolutional layer:
struct Conv; w; b; f; p; end
(c::Conv)(x) = c.f.(pool(conv4(c.w, dropout(x,c.p)) .+ c.b))
Conv(w1::Int,w2::Int,cx::Int,cy::Int,f=relu;pdrop=0) = Conv(param(w1,w2,cx,cy), param0(1,1,cy,1), f, pdrop)

Conv

In [43]:
# Redefine dense layer (See mlp.ipynb):
struct Dense; w; b; f; p; end
(d::Dense)(x) = d.f.(d.w * mat(dropout(x,d.p)) .+ d.b) # mat reshapes 4-D tensor to 2-D matrix so we can use matmul
Dense(i::Int,o::Int,f=relu;pdrop=0) = Dense(param(o,i), param0(o), f, pdrop)

Dense

In [44]:
# Let's define a chain of layers
struct Chain
    layers
    Chain(layers...) = new(layers)
end
(c::Chain)(x) = (for l in c.layers; x = l(x); end; x)
(c::Chain)(x,y) = nll(c(x),y)
(c::Chain)(d::Data) = mean(c(x,y) for (x,y) in d)

In [45]:
lenet =   Chain(Conv(5,5,1,20), 
                Conv(5,5,20,50), 
                Dense(800,500,pdrop=0.3), 
                Dense(500,10,identity,pdrop=0.3))
summary.(l.w for l in lenet.layers)

4-element Vector{String}:
 "5×5×1×20 AutoGrad.Param{Array{Float32, 4}}"
 "5×5×20×50 AutoGrad.Param{Array{Float32, 4}}"
 "500×800 AutoGrad.Param{Matrix{Float32}}"
 "10×500 AutoGrad.Param{Matrix{Float32}}"

In [46]:
lenet(x,y)

2.294636f0