In [1]:
# GPU: 32*40 in 8.00s = 160/s
# CPU: 32*8 in 115.0s = 2/s

In [2]:
# using Pkg; pkg"add Knet"
using Knet
include(Knet.dir("examples","resnet","resnetlib.jl"))
using .ResNetLib: resnet50init, resnet50

In [3]:
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.dependencies()[Base.UUID("1902f260-5fb4-5aff-8c31-6271790ab950")].version)
println("Cores: ", chop(read(pipeline(`cat /proc/cpuinfo`,`grep processor`,`wc -l`),String)))
print("GPUs: ", read(`nvidia-smi --query-gpu=gpu_name --format=csv,noheader`,String))

OS: Linux
Julia: 1.5.0
Knet: 1.4.0
Cores: 12
GPUs: GeForce GTX 1060 with Max-Q Design


In [4]:
const BATCH_SIZE = 32
const RESNET_FEATURES = 2048
const BATCHES_GPU = 40
const BATCHES_CPU = 8

8

In [5]:
# Create batches of fake data
function fakedata(batches; atype=KnetArray)
    x = rand(Float32, 224, 224, 3, BATCH_SIZE * batches)
    minibatch(x, BATCH_SIZE, xtype=atype)
end

fakedata (generic function with 1 method)

In [6]:
# Return features from classifier
function predictfn(weights, moments, data)
    out = []
    for x in data
        pred = resnet50(weights, moments, x; stage=5)
        push!(out, mat(pred))
    end
    return Array(hcat(out...))
end

predictfn (generic function with 1 method)

## 1. GPU

In [7]:
# Initialize resnet weights and fake data
gpuweights = gpumoments = nothing; GC.gc(true) # clear memory from previous run
gpuweights, gpumoments = resnet50init(;stage=5, trained=true, atype=KnetArray);

┌ Info: Loading pretrained weights...
└ @ Main.ResNetLib /home/deniz/.julia/dev/Knet/examples/resnet/resnetlib.jl:314
┌ Info: Loading imagenet-resnet-50-dag.mat...
└ @ Main.ResNetLib /home/deniz/.julia/dev/Knet/data/imagenet.jl:12


In [8]:
@info("Cold start")
gpudata1 = fakedata(BATCHES_GPU, atype=KnetArray)
@time predictfn(gpuweights, gpumoments, gpudata1);

┌ Info: Cold start
└ @ Main In[8]:1


  9.099014 seconds (9.68 M allocations: 1.215 GiB, 2.22% gc time)


In [9]:
@info("Benchmarking")
gpudata = fakedata(BATCHES_GPU, atype=KnetArray)
@time predictfn(gpuweights, gpumoments, gpudata);

┌ Info: Benchmarking
└ @ Main In[9]:1


  5.178209 seconds (309.50 k allocations: 757.228 MiB, 3.32% gc time)


## 2. CPU

In [10]:
# Initialize resnet weights
cpuweights, cpumoments = resnet50init(;stage=5, trained=true, atype=Array);

┌ Info: Loading pretrained weights...
└ @ Main.ResNetLib /home/deniz/.julia/dev/Knet/examples/resnet/resnetlib.jl:314


In [11]:
@info("Cold start")
cpudata1 = fakedata(1, atype=Array);
@time predictfn(cpuweights, cpumoments, cpudata1);

┌ Info: Cold start
└ @ Main In[11]:1
└ @ Knet.Ops20 /home/deniz/.julia/dev/Knet/src/ops20/conv.jl:174


 13.690795 seconds (24.43 M allocations: 4.981 GiB, 2.40% gc time)


In [12]:
@info("Benchmarking")
cpudata = fakedata(BATCHES_CPU, atype=Array);
@time predictfn(cpuweights, cpumoments, cpudata);

┌ Info: Benchmarking
└ @ Main In[12]:1


 36.656735 seconds (291.89 k allocations: 30.161 GiB, 2.48% gc time)
