# Knet-Flux CNN benchmark based on [Flux/model-zoo](https://github.com/FluxML/model-zoo/blob/master/vision/mnist/conv.jl) conv.jl example

In [1]:
]activate ..; instantiate; st

[32m[1m  Updating[22m[39m registry at `~/.julia/registries/General`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[?25l[2K[?25h

│     — /home/gridsan/dyuret/.julia/registries/General — failed to fetch from repo
└ @ Pkg.API /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.0/Pkg/src/API.jl:157


[32m[1m    Status[22m[39m `~/Klutz.jl/Project.toml`
 [90m [3a865a2d][39m[37m CuArrays v0.8.1[39m
 [90m [587475ba][39m[37m Flux v0.6.8[39m
 [90m [1902f260][39m[37m Knet v1.1.1[39m


In [2]:
# Uncomment this to get Knet profiling info at the end:
# ENV["KNET_TIMER"] = ENV["AUTOGRAD_TIMER"] = "true"
# using Pkg; Pkg.build("AutoGrad"); Pkg.build("Knet")

In [3]:
using Flux, Flux.Data.MNIST, Statistics
using Flux: onehotbatch, onecold, crossentropy, throttle
using Base.Iterators: repeated, partition
using CuArrays
using Knet: Knet, AutoGrad, conv4, pool, KnetArray
Knet.gpu()

0

In [4]:
# Implement Chain, Conv and Dense in Knet
struct kChain; layers; kChain(ls::Tuple)=new(ls); end
kChain(ls...)=kChain(ls)
(c::kChain)(x) = (for l in c.layers; x = l(x); end; x)
struct kDense; w; b; f; end
kDense(nx::Int,ny::Int,fn=identity)=kDense(Knet.param(ny,nx),Knet.param0(ny),fn)
(d::kDense)(x) = d.f.(d.w * Knet.mat(x) .+ d.b)
struct kConv; w; b; f; end
kConv(w1,w2,cx,cy,fn=identity)=kConv(Knet.param(w1,w2,cx,cy),Knet.param0(1,1,cy,1), fn)
(f::kConv)(x) = pool(f.f.(conv4(f.w,x) .+ f.b))

## GPU tests

In [14]:
# Load data
imgs = MNIST.images()
labels = onehotbatch(MNIST.labels(), 0:9)
train = [(cat(float.(imgs[i])..., dims = 4), labels[:,i])
         for i in partition(1:60_000, 1000)]
train = gpu.(train)
klabels = MNIST.labels() .+ 1
ktrain = [(KnetArray{Float32}(cat(float.(imgs[i])..., dims = 4)), klabels[i])
          for i in partition(1:60_000, 1000)]
summary.((train[1]..., ktrain[1]...))

("28×28×1×1000 CuArray{Float32,4}", "10×1000 Flux.OneHotMatrix{CuArray{Flux.OneHotVector,1}}", "28×28×1×1000 KnetArray{Float32,4}", "1000-element Array{Int64,1}")

In [44]:
# Run this several times to get timing for Flux:
# (loss(X, Y), accuracy(X, Y)) = (2.302674f0 (tracked), 0.109)
#   9.770545 seconds (2.27 M allocations: 138.747 MiB, 26.75% gc time)
# (loss(X, Y), accuracy(X, Y)) = (0.19522423f0 (tracked), 0.942)

m = Chain(
  Conv((2,2), 1=>16, relu),
  x -> maxpool(x, (2,2)),
  Conv((2,2), 16=>8, relu),
  x -> maxpool(x, (2,2)),
  x -> reshape(x, :, size(x, 4)),
  Dense(288, 10), softmax) |> gpu

loss(x, y) = crossentropy(m(x), y)
accuracy(x, y) = mean(onecold(m(x)) .== onecold(y))
opt = ADAM(params(m))

X,Y = train[1]
@show loss(X, Y), accuracy(X, Y)
@time for i in 1:10; Flux.train!(loss, train, opt); end
@show loss(X, Y), accuracy(X, Y);

(loss(X, Y), accuracy(X, Y)) = (2.302674f0 (tracked), 0.109)
  9.770545 seconds (2.27 M allocations: 138.747 MiB, 26.75% gc time)
(loss(X, Y), accuracy(X, Y)) = (0.19522423f0 (tracked), 0.942)


In [50]:
# Run this several times to get timing for Knet:
# (Knet.nll(km, kX, kY), Knet.accuracy(km, kX, kY)) = (2.2925608f0, 0.145)
#   2.766763 seconds (1.58 M allocations: 58.009 MiB, 12.79% gc time)
# (Knet.nll(km, kX, kY), Knet.accuracy(km, kX, kY)) = (0.15760595f0, 0.951)

km = kChain(
    kConv(2,2,1,16,Knet.relu),
    kConv(2,2,16,8,Knet.relu),
    kDense(288,10))
kX,kY = ktrain[1]
iters(n)=(J->((n-=1)>=0))
@show Knet.nll(km,kX,kY), Knet.accuracy(km,kX,kY)
@time for i in 1:10; Knet.train!(km, ktrain; optimizer=Knet.Adam(), callback=iters(length(ktrain))); end
@show Knet.nll(km,kX,kY), Knet.accuracy(km,kX,kY);

(Knet.nll(km, kX, kY), Knet.accuracy(km, kX, kY)) = (2.2925608f0, 0.145)
  2.766763 seconds (1.58 M allocations: 58.009 MiB, 12.79% gc time)
(Knet.nll(km, kX, kY), Knet.accuracy(km, kX, kY)) = (0.15760595f0, 0.951)


## CPU tests

In [6]:
# Load data
imgs = MNIST.images()
labels = onehotbatch(MNIST.labels(), 0:9)
train = [(cat(float.(imgs[i])..., dims = 4), labels[:,i])
         for i in partition(1:60_000, 1000)]
# train = gpu.(train)
klabels = MNIST.labels() .+ 1
ktrain = [(cat(float.(imgs[i])..., dims = 4), klabels[i])
          for i in partition(1:60_000, 1000)]
summary.((train[1]..., ktrain[1]...))

("28×28×1×1000 Array{Float64,4}", "10×1000 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}", "28×28×1×1000 Array{Float64,4}", "1000-element Array{Int64,1}")

In [10]:
# Run this several times to get timing for Flux:

m = Chain(
  Conv((2,2), 1=>16, relu),
  x -> maxpool(x, (2,2)),
  Conv((2,2), 16=>8, relu),
  x -> maxpool(x, (2,2)),
  x -> reshape(x, :, size(x, 4)),
  Dense(288, 10), softmax)
m0 = deepcopy(m)
loss(x, y) = crossentropy(m(x), y)
accuracy(x, y) = mean(onecold(m(x)) .== onecold(y))
opt = ADAM(params(m))

X,Y = train[1]
@show loss(X, Y), accuracy(X, Y)
@time Flux.train!(loss, train, opt)
@show loss(X, Y), accuracy(X, Y);

(loss(X, Y), accuracy(X, Y)) = (2.302547213644201 (tracked), 0.098)
 67.319825 seconds (180.23 M allocations: 55.193 GiB, 42.92% gc time)
(loss(X, Y), accuracy(X, Y)) = (1.7696594578438087 (tracked), 0.669)


In [30]:
# Run this several times to get timing for Knet:
f2k(a)=Knet.Param(Array(a))
km = kChain(kConv(f2k(m0.layers[1].weight.data),f2k(reshape(m0.layers[1].bias.data,(1,1,16,1))),Knet.relu), 
            kConv(f2k(m0.layers[3].weight.data),f2k(reshape(m0.layers[3].bias.data,(1,1,8,1))),Knet.relu),
            kDense(f2k(m0.layers[6].W.data),f2k(m0.layers[6].b.data),identity))
kX,kY = ktrain[1]
iters(n)=(J->((n-=1)>=0))
@show Knet.nll(km,kX,kY), Knet.accuracy(km,kX,kY)
@time Knet.train!(km, ktrain; optimizer=Knet.Adam(), callback=iters(length(ktrain)))
@show Knet.nll(km,kX,kY), Knet.accuracy(km,kX,kY);

(Knet.nll(km, kX, kY), Knet.accuracy(km, kX, kY)) = (2.3025472136442007, 0.098)
176.934181 seconds (100.80 k allocations: 31.919 GiB, 17.78% gc time)
(Knet.nll(km, kX, kY), Knet.accuracy(km, kX, kY)) = (1.7539083972962197, 0.63)
