In [1]:
# GPU: 32*40 in 8.00s = 160/s
# CPU: 32*8 in 115.0s = 2/s

In [2]:
using Pkg; haskey(Pkg.installed(),"Knet") || Pkg.add("Knet")
using Knet
include(Knet.dir("examples","resnet","resnetlib.jl"))
using .ResNetLib: resnet50init, resnet50
gpu()

0

In [3]:
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed()["Knet"])
println("Cores: ", chop(read(pipeline(`cat /proc/cpuinfo`,`grep processor`,`wc -l`),String)))
print("GPUs:\n", read(`nvidia-smi --query-gpu=gpu_name --format=csv,noheader`,String))

OS: Linux
Julia: 1.0.0
Knet: 1.0.1+
Cores: 36
GPUs:
Tesla K80


In [4]:
const BATCH_SIZE = 32
const RESNET_FEATURES = 2048
const BATCHES_GPU = 40
const BATCHES_CPU = 8

8

In [5]:
# Create batches of fake data
function fakedata(batches; atype=KnetArray)
    x = rand(Float32, 224, 224, 3, BATCH_SIZE * batches)
    minibatch(x, BATCH_SIZE, xtype=atype)
end

fakedata (generic function with 1 method)

In [6]:
# Return features from classifier
function predictfn(weights, moments, data)
    out = []
    for x in data
        pred = resnet50(weights, moments, x; stage=5)
        push!(out, mat(pred))
    end
    return Array(hcat(out...))
end

predictfn (generic function with 1 method)

## 1. GPU

In [7]:
# Initialize resnet weights and fake data
gpuweights = gpumoments = nothing; Knet.gc() # clear memory from previous run
gpuweights, gpumoments = resnet50init(;stage=5, trained=true, atype=KnetArray);

┌ Info: Loading pretrained weights...
└ @ Main.ResNetLib /kuacc/users/dyuret/.julia/dev/Knet/examples/resnet/resnetlib.jl:316
┌ Info: Loading imagenet-resnet-50-dag.mat...
└ @ Main.ResNetLib /kuacc/users/dyuret/.julia/dev/Knet/data/imagenet.jl:14


In [8]:
@info("Cold start")
gpudata1 = fakedata(BATCHES_GPU, atype=KnetArray)
@time predictfn(gpuweights, gpumoments, gpudata1);

┌ Info: Cold start
└ @ Main In[8]:1


 21.795631 seconds (12.07 M allocations: 1.327 GiB, 21.86% gc time)


In [9]:
@info("Benchmarking")
gpudata = fakedata(BATCHES_GPU, atype=KnetArray)
@time predictfn(gpuweights, gpumoments, gpudata);

┌ Info: Benchmarking
└ @ Main In[9]:1


  8.230757 seconds (315.90 k allocations: 758.470 MiB, 2.82% gc time)


## 2. CPU

In [10]:
# Initialize resnet weights
cpuweights, cpumoments = resnet50init(;stage=5, trained=true, atype=Array);

┌ Info: Loading pretrained weights...
└ @ Main.ResNetLib /kuacc/users/dyuret/.julia/dev/Knet/examples/resnet/resnetlib.jl:316


In [11]:
@info("Cold start")
cpudata1 = fakedata(1, atype=Array);
@time predictfn(cpuweights, cpumoments, cpudata1);

┌ Info: Cold start
└ @ Main In[11]:1


 22.241204 seconds (6.66 M allocations: 4.087 GiB, 17.71% gc time)


In [12]:
@info("Benchmarking")
cpudata = fakedata(BATCHES_CPU, atype=Array);
@time predictfn(cpuweights, cpumoments, cpudata);

┌ Info: Benchmarking
└ @ Main In[12]:1


169.206697 seconds (275.96 k allocations: 30.160 GiB, 17.10% gc time)
