# Knet CNN Example

In [1]:
using Pkg; haskey(Pkg.installed(),"Knet") || Pkg.add("Knet")
using Knet
import Knet: train!
True=true # so we can read the python params
include("common/params.py")
gpu()

0

In [2]:
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed()["Knet"])
println("GPU: ", replace(read(`nvidia-smi --query-gpu=name --format=csv,noheader`,String),'\n'=>", "))

OS: Linux
Julia: 1.0.0
Knet: 1.1.1
GPU: Tesla K80, 


In [3]:
_atype = gpu() >= 0 ? KnetArray{Float32} : Array{Float64}


# define layers
mutable struct Conv
    w
    b
end


function Conv(ci::Int, co::Int, k::Int; atype=_atype)
    w = param(k, k, ci, co; atype=atype)
    b = param(1, 1, co, 1; atype=atype, init=zeros)
    return Conv(w, b)
end


function (l::Conv)(x)
    conv4(l.w, x; padding=1) .+ l.b
end


mutable struct Block
    layer1
    layer2
end


function Block(ci::Int, ct::Int, co::Int, k::Int; atype=_atype)
    layer1 = Conv(ci, ct, k; atype=atype)
    layer2 = Conv(ct, co, k; atype=atype)
    return Block(layer1, layer2)
end


function (l::Block)(x; pdrop=0.0)
    y1 = relu.(l.layer1(x))
    y2 = relu.(pool(l.layer2(y1)))
    y3 = dropout(y2, pdrop)
end


mutable struct Linear
    w
    b
end


function Linear(input_dim::Int, output_dim::Int; atype=_atype)
    w = param(output_dim, input_dim; atype=atype)
    b = param(output_dim, 1; atype=atype, init=zeros)
    return Linear(w, b)
end


(l::Linear)(x) = l.w * mat(x) .+ l.b

In [4]:
# define model and loss
mutable struct CNN
    conv1
    conv2
    fc1
    fc2
end


function CNN(; atype=_atype)
    conv1 = Block(3, 50, 50, 3; atype=atype)
    conv2 = Block(50, 100, 100, 3; atype=atype)
    fc1 = Linear(6400, 512)
    fc2 = Linear(512, 10)
    return CNN(conv1, conv2, fc1, fc2)
end


function (model::CNN)(x; pdrops=(0.0, 0.0))
    y1 = model.conv1(x; pdrop=pdrops[1])
    y2 = model.conv2(y1; pdrop=pdrops[1])
    y3 = dropout(relu.(model.fc1(y2)), pdrops[2])
    y4 = model.fc2(y3)
end


loss(model, x, y; o...) = nll(model(x; o...), y) # nll: negative log likelihood

loss (generic function with 1 method)

In [5]:
# helper to initialize optimizers
function initopt!(model, optim; options...)
    for par in params(model)
        par.opt = optim(; options...)
    end
end


# helper to update the weights
function train!(model::CNN, x, y; pdrops=(0.25, 0.5))
    J = @diff loss(model, x, y)
    for par in params(model)
        g = grad(J, par)
        update!(value(par), g, par.opt)
    end
end

train! (generic function with 2 methods)

In [6]:
# load data
include(Knet.dir("data","cifar.jl"))
@time (xtrn,ytrn,xtst,ytst,lbls)=cifar10()
dtrn = minibatch(xtrn,ytrn,BATCHSIZE,shuffle=true,xtype=KnetArray)
dtst = minibatch(xtst,ytst,BATCHSIZE,shuffle=false,xtype=KnetArray)
for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end

┌ Info: Reading cifar-10-binary.tar.gz...
└ @ Main /kuacc/users/ikesen16/.julia/packages/Knet/3lzCR/data/cifar.jl:41


  3.733461 seconds (3.06 M allocations: 1.865 GiB, 20.34% gc time)
32×32×3×50000 Array{Float32,4}
50000-element Array{UInt8,1}
32×32×3×10000 Array{Float32,4}
10000-element Array{UInt8,1}


In [7]:
# prepare for training
model = optim = nothing; Knet.gc() # Clear memory from last run
model = CNN()
initopt!(model, Momentum; lr=LR, gamma=MOMENTUM);

In [8]:
# cold start
@time for (x,y) in dtrn
    train!(model, x, y)
end

 30.661693 seconds (23.87 M allocations: 1.724 GiB, 11.90% gc time)


In [9]:
# prepare for training
model = optim = nothing; Knet.gc() # Clear memory from last run
model = CNN()
initopt!(model, Momentum; lr=LR, gamma=MOMENTUM);

In [10]:
# 159s
@info("Training...")
@time for epoch in 1:EPOCHS
    @time for (x,y) in dtrn
        train!(model, x, y)
    end
end

┌ Info: Training...
└ @ Main In[10]:2


 14.000294 seconds (2.38 M allocations: 667.945 MiB, 0.51% gc time)
 13.943929 seconds (2.38 M allocations: 667.881 MiB, 0.52% gc time)
 13.941756 seconds (2.38 M allocations: 667.881 MiB, 0.52% gc time)
 13.935508 seconds (2.38 M allocations: 667.881 MiB, 0.50% gc time)
 13.937715 seconds (2.38 M allocations: 667.881 MiB, 0.50% gc time)
 13.943693 seconds (2.38 M allocations: 667.881 MiB, 0.51% gc time)
 13.939071 seconds (2.38 M allocations: 667.879 MiB, 0.50% gc time)
 13.943764 seconds (2.38 M allocations: 667.879 MiB, 0.52% gc time)
 13.939797 seconds (2.39 M allocations: 667.973 MiB, 0.51% gc time)
 13.940277 seconds (2.38 M allocations: 667.881 MiB, 0.51% gc time)
139.470813 seconds (23.80 M allocations: 6.523 GiB, 0.51% gc time)


In [11]:
# test accuracy 77.54
@time accuracy(model,dtst)

  2.323146 seconds (1.85 M allocations: 208.511 MiB, 13.59% gc time)


0.7268629807692307