# Knet RNN example

### Training RNN (GRU) on IMDB - Natural Language Processing (Sentiment Analysis)

#### Comparison with other frameworks from https://github.com/ilkarman/DeepLearningFrameworks

| DL Library                          | Test Accuracy (%) | Training Time (s) | Using CuDNN? |
| ----------------------------------- | ----------------- | ----------------- | ------------ |
| [MXNet](https://github.com/ilkarman/DeepLearningFrameworks/blob/master/MXNet_RNN.ipynb)            | 86                | 29                | Yes          |
| [Knet(Julia)](https://github.com/ilkarman/DeepLearningFrameworks/blob/master/Knet_RNN.ipynb)       | 85                | 29                | Yes          |
| [Tensorflow](https://github.com/ilkarman/DeepLearningFrameworks/blob/master/Tensorflow_RNN.ipynb)  | 86                | 30                | Yes          |
| [Pytorch](https://github.com/ilkarman/DeepLearningFrameworks/blob/master/PyTorch_RNN.ipynb)        | 86                | 31                | Yes          |
| [CNTK](https://github.com/ilkarman/DeepLearningFrameworks/blob/master/CNTK_RNN.ipynb)              | 85                | 32                | Yes          |
| [Keras(TF)](https://github.com/ilkarman/DeepLearningFrameworks/blob/master/Keras_TF_RNN.ipynb)     | 86                | 35                | Yes          |
| [Keras(CNTK)](https://github.com/ilkarman/DeepLearningFrameworks/blob/master/Keras_CNTK_RNN.ipynb) | 86                | 86                | No Available |


#### After installing and starting Julia run the following to install the required packages:
```
julia> Pkg.init(); for p in ("CUDAdrv","IJulia","Knet","PyCall","JLD2"); Pkg.add(p); end
```

In [1]:
using Knet
EPOCHS=3
BATCHSIZE=64
EMBEDSIZE=125
NUMHIDDEN=100
DROPOUT=0.2
LR=0.001
BETA_1=0.9
BETA_2=0.999
EPS=1e-08
MAXLEN=150 #maximum size of the word sequence                                                                              
MAXFEATURES=30000; #vocabulary size                                                                                         

In [2]:
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed("Knet"))
println("GPU: ", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))

OS: Linux
Julia: 0.6.1
Knet: 0.8.5+
GPU: Tesla K80
Tesla K80



In [3]:
# define model
function initmodel()
    rnnSpec,rnnWeights = rnninit(EMBEDSIZE,NUMHIDDEN; rnnType=:gru)
    inputMatrix = KnetArray(xavier(Float32,EMBEDSIZE,MAXFEATURES))
    outputMatrix = KnetArray(xavier(Float32,2,NUMHIDDEN))
    return rnnSpec,(rnnWeights,inputMatrix,outputMatrix)
end;

In [4]:
# define loss and its gradient
function predict(weights, inputs, rnnSpec)
    rnnWeights, inputMatrix, outputMatrix = weights # (1,1,W), (X,V), (2,H)
    indices = hcat(inputs...)' # (B,T)
    rnnInput = inputMatrix[:,indices] # (X,B,T)
    rnnOutput = rnnforw(rnnSpec, rnnWeights, rnnInput)[1] # (H,B,T)
    return outputMatrix * rnnOutput[:,:,end] # (2,H) * (H,B) = (2,B)
end

loss(w,x,y,r)=nll(predict(w,x,r),y)
lossgradient = grad(loss);

In [5]:
# load data
include(Knet.dir("data","imdb.jl"))
@time (xtrn,ytrn,xtst,ytst,imdbdict)=imdb(maxlen=MAXLEN,maxval=MAXFEATURES)
for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end

[1m[36mINFO: [39m[22m[36mLoading IMDB...
[39m

  9.007620 seconds (15.77 M allocations: 826.516 MiB, 3.70% gc time)
25000-element Array{Array{Int32,1},1}
25000-element Array{Int8,1}
25000-element Array{Array{Int32,1},1}
25000-element Array{Int8,1}


In [6]:
# Look at some sample data
vocab = Array{String}(length(imdbdict))
for (k,v) in imdbdict; vocab[v]=k; end
for i in randperm(25000)[1:10]
    println((ytst[i],vocab[xtst[i][end-9:end]]...))
end

(1, "this", "movie", "more", "that", "the", "ones", "who", "couldn't", "c'mon", "spastic")
(2, "and", "shows", "about", "stupid", "doctors", "this", "is", "the", "deal", "oz")
(1, "dropped", "about", "30", "points", "from", "watching", "this", "insert", "expletive", "here")
(2, "a", "highly", "entertaining", "and", "suspenseful", "way", "it", "says", "it", "brilliantly")
(2, "old", "generations", "my", "vote", "is", "nine", "br", "br", "title", "brazil")
(2, "be", "no", "question", "that", "alan", "rickman", "is", "a", "major", "star")
(1, "i", "have", "seen", "worse", "from", "big", "studios", "with", "vast", "budgets")
(1, "beyond", "to", "mean", "anything", "really", "bad", "a", "spastic", "for", "boorman")
(1, "the", "end", "br", "br", "i'm", "off", "to", "have", "a", "salad")
(2, "without", "reservation", "as", "one", "of", "the", "finest", "films", "ever", "made")


In [7]:
# prepare for training
weights = nothing; knetgc(); # Reclaim memory from previous run
rnnSpec,weights = initmodel()
optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);

In [8]:
# force precompile (optional)
info("Precompile...")
(x,y) = first(minibatch(xtrn,ytrn,BATCHSIZE))
@time lossgradient(weights,x,y,rnnSpec);

[1m[36mINFO: [39m[22m[36mPrecompile...
[39m

  3.176993 seconds (1.50 M allocations: 80.635 MiB, 0.76% gc time)


In [9]:
info("Training...")
@time for epoch in 1:EPOCHS
    @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)
        grads = lossgradient(weights,x,y,rnnSpec)
        update!(weights, grads, optim)
    end
end

[1m[36mINFO: [39m[22m[36mTraining...
[39m

 10.149041 seconds (673.79 k allocations: 61.959 MiB, 4.16% gc time)
  9.033643 seconds (353.06 k allocations: 44.302 MiB, 4.46% gc time)
  9.141217 seconds (361.52 k allocations: 44.431 MiB, 5.48% gc time)
 28.325220 seconds (1.39 M allocations: 150.928 MiB, 4.68% gc time)


In [10]:
info("Testing...")
@time accuracy(weights, minibatch(xtst,ytst,BATCHSIZE), (w,x)->predict(w,x,rnnSpec))

[1m[36mINFO: [39m[22m[36mTesting...
[39m

  3.300192 seconds (663.92 k allocations: 66.624 MiB, 3.60% gc time)


0.853125