# Knet RNN example
**TODO**: Use the new RNN interface, add dropout?

In [1]:
# using Pkg; pkg"add Knet"
using Knet; using Knet: rnnforw
True=true # so we can read the python params
include("common/params_lstm.py")

true

In [2]:
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.dependencies()[Base.UUID("1902f260-5fb4-5aff-8c31-6271790ab950")].version)
println("GPU: ", read(`nvidia-smi --query-gpu=name --format=csv,noheader`,String))

OS: Linux
Julia: 1.5.0
Knet: 1.4.0
GPU: GeForce GTX 1060 with Max-Q Design



In [3]:
# define model
function initmodel()
    rnn = RNN(EMBEDSIZE,NUMHIDDEN; rnnType=:gru)
    inputMatrix = KnetArray(xavier(Float32,EMBEDSIZE,MAXFEATURES))
    outputMatrix = KnetArray(xavier(Float32,2,NUMHIDDEN))
    return (rnn,inputMatrix,outputMatrix)
end;

In [4]:
# define loss and its gradient
function predict(model, inputs)
    rnn, inputMatrix, outputMatrix = model # (1,1,W), (X,V), (2,H)
    indices = permutedims(hcat(inputs...)) # (B,T)
    rnnInput = inputMatrix[:,indices] # (X,B,T)
    rnnOutput = rnn(rnnInput) # (H,B,T)
    return outputMatrix * rnnOutput[:,:,end] # (2,H) * (H,B) = (2,B)
end

loss(model,x,y)=nll(predict(model,x),y)

loss (generic function with 1 method)

In [5]:
# load data
include(Knet.dir("data","imdb.jl"))
@time (xtrn,ytrn,xtst,ytst,imdbdict)=imdb(maxlen=MAXLEN,maxval=MAXFEATURES)
for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end

┌ Info: Loading IMDB...
└ @ Main /home/deniz/.julia/dev/Knet/data/imdb.jl:57


  6.557454 seconds (28.96 M allocations: 1.502 GiB, 8.82% gc time)
25000-element Array{Array{Int32,1},1}
25000-element Array{Int8,1}
25000-element Array{Array{Int32,1},1}
25000-element Array{Int8,1}


In [6]:
imdbarray = Array{String}(undef,88584)
for (k,v) in imdbdict; imdbarray[v]=k; end
imdbarray[xtrn[1]]

150-element Array{String,1}:
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 "reilly"
 ⋮
 "straight"
 "story"
 "it's"
 "hidden"
 "on"
 "lynch's"
 "website"
 "and"
 "well"
 "worth"
 "the"
 "search"

In [7]:
# prepare for training
weights = nothing; GC.gc(true); # Reclaim memory from previous run
model = initmodel()
dtrn = minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)

390-element Knet.Train20.Data{Tuple{Array{Array{Int32,1},1},Array{Int8,1}}}

In [8]:
# cold start
@time adam!((x,y)->loss(model,x,y), dtrn, lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS)

 12.439936 seconds (34.25 M allocations: 1.791 GiB, 3.01% gc time)


In [9]:
# prepare for training
model = nothing; GC.gc(true); # Reclaim memory from previous run
model = initmodel()

(GRU(input=125,hidden=100), K32(125,30000)[-0.008025846⋯], K32(2,100)[-0.21115342⋯])

In [10]:
# 29s
@info("Training...")
@time for epoch in 1:EPOCHS
    @time adam!((x,y)->loss(model,x,y), dtrn, lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS)
end

┌ Info: Training...
└ @ Main In[10]:2


  3.162704 seconds (478.15 k allocations: 83.019 MiB, 0.84% gc time)
  2.900317 seconds (467.45 k allocations: 82.758 MiB, 0.53% gc time)
  2.899690 seconds (467.72 k allocations: 82.782 MiB, 0.54% gc time)
  8.963385 seconds (1.41 M allocations: 248.610 MiB, 0.64% gc time)


In [11]:
@info("Testing...")
@time accuracy(x->predict(model,x), data=minibatch(xtst,ytst,BATCHSIZE))

┌ Info: Testing...
└ @ Main In[11]:1


  1.948550 seconds (3.41 M allocations: 224.290 MiB, 1.86% gc time)


0.5669871794871795