In [1]:
using Knet, Plots, Images

# Linear regression example with housing data

In [2]:
# Download the housing dataset from the UCI Machine Learning Repository
include(Pkg.dir("Knet","data","housing.jl"))
x,y = housing()
map(summary,(x,y))

("13×506 Array{Float64,2}", "1×506 Array{Float64,2}")

In [3]:
# Define loss
predict(w,x) = w[1]*x .+ w[2]
loss(w,x,y) = mean(abs2,y-predict(w,x))
lossgradient = grad(loss);

In [4]:
# Initialize model
srand(42)
w = [ 0.1*rand(1,13), 0.0 ]

2-element Array{Any,1}:
  [0.0533183 0.0454029 … 0.0160006 0.0422956]
 0.0                                         

In [5]:
loss(w,x,y)

594.435239388768

In [6]:
lossgradient(w,x,y)

2-element Array{Any,1}:
    [7.53003 -6.79923 … -6.369 13.8655]
 -45.0656                              

In [7]:
# Testing the gradient: Increasing w[2] by eps should decrease loss by 45.0656 * eps
w[2]=0.1

0.1

In [8]:
loss(w,x,y)

589.9386781239458

In [9]:
# SGD training loop
function train!(w, data; lr=.1)
    for (x,y) in data
        dw = lossgradient(w, x, y)
        for i in 1:length(w)
            w[i] -= lr * dw[i]
        end
    end
    return w
end

train! (generic function with 1 method)

In [10]:
# Record the weights for 10 epochs
@time weights = [ copy(train!(w, [(x, y)])) for epoch=1:10 ]

  0.146968 seconds (443.46 k allocations: 17.372 MiB, 5.56% gc time)


10-element Array{Array{Any,1},1}:
 Any[[-0.699684 0.725326 … 0.652901 -1.34426], 4.58656]
 Any[[-0.551026 0.513913 … 0.565058 -1.53221], 8.17581]
 Any[[-0.59464 0.518516 … 0.634074 -1.88846], 11.0472] 
 Any[[-0.590579 0.487521 … 0.663802 -2.1303], 13.3443] 
 Any[[-0.594767 0.473771 … 0.698189 -2.33759], 15.182] 
 Any[[-0.596189 0.463224 … 0.727585 -2.50545], 16.6522]
 Any[[-0.598128 0.457414 … 0.754282 -2.64491], 17.8283]
 Any[[-0.600288 0.454846 … 0.777929 -2.7612], 18.7692] 
 Any[[-0.602933 0.455003 … 0.798796 -2.8591], 19.5219] 
 Any[[-0.606062 0.457353 … 0.817038 -2.94219], 20.1241]

In [11]:
losses = [ loss(w,x,y) for w in weights ]

10-element Array{Float64,1}:
 364.412 
 240.165 
 161.982 
 112.161 
  80.3404
  59.9844
  46.9397
  38.5626
  33.1684
  29.6825

In [None]:
plot(losses,xlabel="Epochs",ylabel="Loss") 

# Load and minibatch MNIST data

In [13]:
include(Knet.dir("data","mnist.jl"))
xtrn,ytrn,xtst,ytst = mnist()
Atype = gpu() >= 0 ? KnetArray{Float32} : Array{Float32}
dtst = minibatch(xtst,ytst,100;xtype=Atype); # [ (x1,y1), (x2,y2), ... ] where xi,yi are minibatches of 100
dtrn = minibatch(xtrn,ytrn,100;xtype=Atype); # [ (x1,y1), (x2,y2), ... ] where xi,yi are minibatches of 100

[1m[36mINFO: [39m[22m[36mLoading MNIST...
[39m

In [14]:
# dtrn and dtst are iterables of (x,y) minibatches, each minibatch contains 100 instances
length(dtrn),length(dtst)

(600, 100)

In [None]:
# Take a look at the first three test images and labels
(x,y) = first(dtst)
ax = Array(x)
for i=1:3; display(mnistview(ax,i)); end
y[1:3]

# Softmax classification example with MNIST

## Define softmax loss

In [16]:
# Define loss
predict(w,x) = w[1]*mat(x) .+ w[2]  # Same as linreg except we need mat() to convert input 4D->2D before matmul
loss(w,x,ygold) = nll(predict(w,x),ygold); # nll is negative log likelihood

In [17]:
# Initialize model
wsoft=map(Atype, [ 0.1*randn(10,784), zeros(10,1) ]);

In [18]:
# Average loss for a single (x,y) minibatch
loss(wsoft, x, y)

2.5499191f0

In [19]:
# Average loss for the whole test set
nll(wsoft,dtst,predict)

2.4640574f0

In [20]:
# Accuracy for the whole test set
accuracy(wsoft,dtst,predict)

0.1488

## Train softmax model

In [21]:
@time softmodels = [ copy(train!(wsoft, dtrn)) for epoch=1:60 ];  # ~17 seconds

 19.008759 seconds (15.15 M allocations: 11.134 GiB, 7.24% gc time)


## Plot softmax learning curve

In [22]:
@time trnsoftloss = [ nll(w,dtrn,predict) for w in softmodels ];  # ~13 seconds
@time tstsoftloss = [ nll(w,dtst,predict) for w in softmodels ];  # ~2 seconds

 12.331779 seconds (4.96 M allocations: 10.709 GiB, 9.06% gc time)
  2.060784 seconds (833.65 k allocations: 1.785 GiB, 9.50% gc time)


In [None]:
plot([trnsoftloss tstsoftloss],ylim=(.2,.36),labels=[:trnsoftloss :tstsoftloss],xlabel="Epochs",ylabel="Loss") 

## Plot softmax error rate

In [24]:
@time trnsofterr = [ 1-accuracy(w,dtrn,predict) for w in softmodels ];  # ~12 seconds
@time tstsofterr = [ 1-accuracy(w,dtst,predict) for w in softmodels ];  # ~2 seconds

 11.313936 seconds (3.51 M allocations: 10.989 GiB, 7.73% gc time)
  1.973695 seconds (592.42 k allocations: 1.832 GiB, 7.79% gc time)


In [None]:
plot([trnsofterr tstsofterr],ylim=(.06,.10),labels=[:trnsofterr :tstsofterr],xlabel="Epochs",ylabel="Error")

In [26]:
# Cleanup
wsoft = softmodels = nothing; knetgc()

# Multilayer perceptron example with MNIST

In [27]:
# We only need to change the predict function!
function predict(w,x)
    for i=1:2:length(w)
        x = w[i]*mat(x) .+ w[i+1]
        if i<length(w)-1
            x = max.(0,x)                         
        end
    end
    return x
end

predict (generic function with 1 method)

In [28]:
wmlp=map(Atype, [ 0.1*randn(64,784), zeros(64,1), 
                  0.1*randn(10,64),  zeros(10,1) ])
loss(wmlp, x, y)  # average loss for random model should be close to log(10)=2.3026

2.3692908f0

## Train MLP model

In [29]:
@time mlpmodels = [ copy(train!(wmlp, dtrn)) for epoch=1:60 ]; # ~20 seconds

 21.118867 seconds (22.10 M allocations: 11.377 GiB, 6.54% gc time)


## Compare MLP loss with softmax loss

In [30]:
@time trnmlploss = [ nll(w,dtrn,predict) for w in mlpmodels ]; # ~12 seconds
@time tstmlploss = [ nll(w,dtst,predict) for w in mlpmodels ]; # ~2 seconds

 11.290381 seconds (6.25 M allocations: 10.747 GiB, 6.85% gc time)
  1.998621 seconds (1.05 M allocations: 1.792 GiB, 7.60% gc time)


In [None]:
plot([trnsoftloss tstsoftloss trnmlploss tstmlploss],ylim=(.0,.36),labels=[:trnsoftloss :tstsoftloss :trnmlploss :tstmlploss],xlabel="Epochs",ylabel="Loss") 

## Compare MLP error with softmax error

In [32]:
@time trnmlperr = [ 1-accuracy(w,dtrn,predict) for w in mlpmodels ]; # ~13 seconds
@time tstmlperr = [ 1-accuracy(w,dtst,predict) for w in mlpmodels ]; # ~2 seconds

 10.683939 seconds (4.80 M allocations: 11.027 GiB, 7.23% gc time)
  1.864670 seconds (808.95 k allocations: 1.838 GiB, 7.31% gc time)


In [None]:
plot([trnsofterr tstsofterr trnmlperr tstmlperr],ylim=(.0,.10),labels=[:trnsofterr :tstsofterr :trnmlperr :tstmlperr],xlabel="Epochs",ylabel="Error")

In [34]:
# Cleanup
wmlp = mlpmodels = nothing; knetgc()

# CNN example with MNIST (The LeNet model)

In [35]:
# We only need to change the predict function!
function predict(w,x) # LeNet model
    n=length(w)-4
    for i=1:2:n
        x = pool(relu.(conv4(w[i],x) .+ w[i+1]))
    end
    for i=n+1:2:length(w)-2
        x = relu.(w[i]*mat(x) .+ w[i+1])
    end
    return w[end-1]*x .+ w[end]
end

predict (generic function with 1 method)

In [36]:
wcnn=map(Atype, [ 0.1*randn(5,5,1,20),  zeros(1,1,20,1), 
                  0.1*randn(5,5,20,50), zeros(1,1,50,1),
                  0.1*randn(500,800),  zeros(500,1),
                  0.1*randn(10,500),  zeros(10,1) ])
loss(wcnn, x, y)

3.7416694f0

## Train CNN model

In [37]:
@time cnnmodels = [ copy(train!(wcnn, dtrn)) for epoch=1:60 ]; # ~127 seconds

133.198253 seconds (52.50 M allocations: 12.672 GiB, 2.17% gc time)


## Compare CNN loss with MLP

In [38]:
@time trncnnloss = [ nll(w,dtrn,predict) for w in cnnmodels ]; # ~48 seconds
@time tstcnnloss = [ nll(w,dtst,predict) for w in cnnmodels ]; # ~8 seconds

 49.636573 seconds (15.47 M allocations: 11.124 GiB, 2.15% gc time)
  8.291805 seconds (2.59 M allocations: 1.854 GiB, 2.25% gc time)


In [None]:
plot([trnsoftloss tstsoftloss trnmlploss tstmlploss trncnnloss tstcnnloss],ylim=(.0,.36),labels=[:trnsoftloss :tstsoftloss :trnmlploss :tstmlploss :trncnnloss :tstcnnloss],xlabel="Epochs",ylabel="Loss") 

## Compare CNN error with MLP

In [40]:
@time trncnnerr = [ 1-accuracy(w,dtrn,predict) for w in cnnmodels ]; # ~48 seconds
@time tstcnnerr = [ 1-accuracy(w,dtst,predict) for w in cnnmodels ]; # ~8 seconds

 48.654533 seconds (14.02 M allocations: 11.404 GiB, 2.32% gc time)
  8.125216 seconds (2.34 M allocations: 1.901 GiB, 2.45% gc time)


In [None]:
plot([trnsofterr tstsofterr trnmlperr tstmlperr trncnnerr tstcnnerr],ylim=(.0,.10),labels=[:trnsofterr :tstsofterr :trnmlperr :tstmlperr :trncnnerr :tstcnnerr],xlabel="Epochs",ylabel="Error")

In [42]:
# Cleanup
wcnn = cnnmodels = nothing; knetgc()

# Shakespeare example

In [43]:
# Please see charlm.ipynb for training a character based language model on "The Complete Works of William Shakespeare"

# VGG example

In [44]:
include(Knet.dir("examples/vgg/vgg.jl"));

In [None]:
caturl = "https://github.com/BVLC/caffe/raw/master/examples/images/cat.jpg"
catfile = download(caturl)
load(catfile)

In [47]:
VGG.main(catfile)

vgg.jl (c) Deniz Yuret, İlker Kesen, 2016. Classifying images with the VGG model from http://www.robots.ox.ac.uk/~vgg/research/very_deep.
opts=(:atype, "KnetArray{Float32}")(:top, 5)(:image, "/dev/shm/dyuret/.julia/v0.6/Knet/data/cat.jpg")(:model, "imagenet-vgg-verydeep-16")


[1m[36mINFO: [39m[22m[36mLoading imagenet-vgg-verydeep-16.mat...
[39m

5×2 Array{Any,2}:
 0.27327    "tabby, tabby cat"        
 0.253185   "Egyptian cat"            
 0.248429   "tiger cat"               
 0.060727   "kit fox, Vulpes macrotis"
 0.0561707  "red fox, Vulpes vulpes"  

  5.173264 seconds (61.56 k allocations: 3.428 MiB, 81.30% gc time)


[1m[36mINFO: [39m[22m[36mClassifying
[39m




In [48]:
VGG.main("https://cvimg1.cardekho.com/p/237x156/in/mahindra/torro-25/mahindra-torro-25.jpg")

vgg.jl (c) Deniz Yuret, İlker Kesen, 2016. Classifying images with the VGG model from http://www.robots.ox.ac.uk/~vgg/research/very_deep.
opts=(:atype, "KnetArray{Float32}")(:top, 5)(:image, "https://cvimg1.cardekho.com/p/237x156/in/mahindra/torro-25/mahindra-torro-25.jpg")(:model, "imagenet-vgg-verydeep-16")


[1m[36mINFO: [39m[22m[36mDownloading https://cvimg1.cardekho.com/p/237x156/in/mahindra/torro-25/mahindra-torro-25.jpg
[39m  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13411  100 13411    0     0   6702      0  0:00:02  0:00:02 --:--:--  6705
[1m[36mINFO: [39m[22m[36mClassifying
[39m

5×2 Array{Any,2}:
 0.899679    "garbage truck, dustcart"                                                   
 0.0654105   "moving van"                                                                
 0.0201083   "crane"                                                                     
 0.00583589  "trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi"
 0.00515393  "snowplow, snowplough"                                                      

  0.039080 seconds (1.85 k allocations: 90.297 KiB)

