# Knet CNN Example

In [11]:
# After installing and starting Julia run the following to install the required packages:
# julia> Pkg.init(); for p in ("CUDAdrv","IJulia","Knet"); Pkg.add(p); end

In [12]:
Pkg.checkout("Knet","ilkarman") # make sure we have the right Knet version
Pkg.build("Knet")
using Knet
True=true # so we can read the python params
include("common/params.py");

[1m[36mINFO: [39m[22m[36mChecking out Knet ilkarman...
[39m[1m[36mINFO: [39m[22m[36mPulling Knet latest ilkarman...
[39m[1m[36mINFO: [39m[22m[36mNo packages to install, update or remove
[39m[1m[36mINFO: [39m[22m[36mBuilding Knet
[39m[1m[36mINFO: [39m[22m[36mCompiling CUDA kernels.
[39m

make: `libknet8.so' is up to date.


In [13]:
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed("Knet"))
println("GPU: ", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))

OS: Linux
Julia: 0.6.1
Knet: 0.8.5+
GPU: Tesla K80
Tesla K80



In [14]:
# define model
function initmodel(; atype=KnetArray, dtype=Float32, winit=xavier, binit=zeros)
    w(dims...)=atype(winit(dtype,dims...))
    b(dims...)=atype(binit(dtype,dims...))
    return Any[
        w(3,3,3,50), b(1,1,50,1),
        w(3,3,50,50), b(1,1,50,1),
        w(3,3,50,100), b(1,1,100,1),
        w(3,3,100,100), b(1,1,100,1),
        w(512,6400), b(512,1),
        w(10,512), b(10,1)
    ]
end;

In [15]:
# define loss and its gradient
function predict(w,x)
    convbias(x,w,b) = conv4(w,x;padding=1) .+ b
    fc(x,w,b) = w * mat(x) .+ b;
    x = relu.(convbias(x,w[1],w[2]))
    x = relu.(pool(convbias(x,w[3],w[4])))
    x = dropout(x,0.25)
    x = relu.(convbias(x,w[5],w[6]))
    x = relu.(pool(convbias(x,w[7],w[8])))
    x = dropout(x,0.25)
    x = relu.(fc(x,w[9],w[10]))
    x = dropout(x,0.5)
    return fc(x,w[11],w[12])
end

loss(w,x,y)=nll(predict(w,x),y) # nll: negative log likelihood
lossgradient = grad(loss);

In [16]:
# load data
include(Knet.dir("data","cifar.jl"))
@time (xtrn,ytrn,xtst,ytst,lbls)=cifar10()
for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end

  0.119320 seconds (74.03 k allocations: 3.523 MiB)
32×32×3×50000 Array{Float32,4}
50000-element Array{UInt8,1}
32×32×3×10000 Array{Float32,4}
10000-element Array{UInt8,1}


In [17]:
# prepare for training
model = initmodel()
optim = optimizers(model, Momentum; lr=LR, gamma=MOMENTUM);

In [18]:
# force precompile
x1 = KnetArray(xtrn[:,:,:,1:BATCHSIZE])
y1 = ytrn[1:BATCHSIZE]
@time lossgradient(model,x1,y1);

  0.044195 seconds (12.18 k allocations: 647.599 KiB)


In [19]:
info("Training...")
@time for epoch in 1:EPOCHS
    @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true,xtype=KnetArray)
        grads = lossgradient(model, x, y)
        update!(model, grads, optim)
    end
end

[1m[36mINFO: [39m[22m[36mTraining...
[39m

 15.503366 seconds (2.14 M allocations: 683.569 MiB, 0.62% gc time)
 14.629396 seconds (1.90 M allocations: 670.151 MiB, 0.55% gc time)
 14.634947 seconds (1.90 M allocations: 670.151 MiB, 0.56% gc time)
 14.585767 seconds (1.90 M allocations: 670.151 MiB, 0.55% gc time)
 14.674386 seconds (1.90 M allocations: 670.222 MiB, 0.59% gc time)
 14.705547 seconds (1.90 M allocations: 670.151 MiB, 0.57% gc time)
 14.740821 seconds (1.90 M allocations: 670.151 MiB, 0.54% gc time)
 14.775897 seconds (1.90 M allocations: 670.151 MiB, 0.53% gc time)
 14.911993 seconds (1.90 M allocations: 670.151 MiB, 0.60% gc time)
 14.818849 seconds (1.90 M allocations: 670.151 MiB, 0.58% gc time)
147.982460 seconds (19.22 M allocations: 6.558 GiB, 0.57% gc time)


In [20]:
# test accuracy
testdata = minibatch(xtst,ytst,BATCHSIZE;xtype=KnetArray)
@time accuracy(testdata,model,predict)

  1.040271 seconds (104.82 k allocations: 122.664 MiB, 1.02% gc time)


0.7784455128205128