# Knet RNN example
**TODO**: Use the new RNN interface, add dropout?

In [1]:
using Pkg; haskey(Pkg.installed(),"Knet") || Pkg.add("Knet")
using Knet
True=true # so we can read the python params
include("common/params_lstm.py")
gpu()

0

In [2]:
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed()["Knet"])
println("GPU: ", read(`nvidia-smi --query-gpu=name --format=csv,noheader`,String))

OS: Linux
Julia: 1.0.0
Knet: 1.0.1+
GPU: Tesla K80



In [4]:
# define model
function initmodel()
    rnnSpec,rnnWeights = rnninit(EMBEDSIZE,NUMHIDDEN; rnnType=:gru)
    inputMatrix = KnetArray(xavier(Float32,EMBEDSIZE,MAXFEATURES))
    outputMatrix = KnetArray(xavier(Float32,2,NUMHIDDEN))
    return rnnSpec,(rnnWeights,inputMatrix,outputMatrix)
end;

In [6]:
# define loss and its gradient
function predict(weights, inputs, rnnSpec)
    rnnWeights, inputMatrix, outputMatrix = weights # (1,1,W), (X,V), (2,H)
    indices = permutedims(hcat(inputs...)) # (B,T)
    rnnInput = inputMatrix[:,indices] # (X,B,T)
    rnnOutput = rnnforw(rnnSpec, rnnWeights, rnnInput)[1] # (H,B,T)
    return outputMatrix * rnnOutput[:,:,end] # (2,H) * (H,B) = (2,B)
end

loss(w,x,y,r)=nll(predict(w,x,r),y)
lossgradient = grad(loss);

In [7]:
# load data
include(Knet.dir("data","imdb.jl"))
@time (xtrn,ytrn,xtst,ytst,imdbdict)=imdb(maxlen=MAXLEN,maxval=MAXFEATURES)
for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end

┌ Info: Loading IMDB...
└ @ Main /kuacc/users/dyuret/.julia/dev/Knet/data/imdb.jl:57


 12.264265 seconds (30.71 M allocations: 1.557 GiB, 7.87% gc time)
25000-element Array{Array{Int32,1},1}
25000-element Array{Int8,1}
25000-element Array{Array{Int32,1},1}
25000-element Array{Int8,1}


In [8]:
imdbarray = Array{String}(undef,88584)
for (k,v) in imdbdict; imdbarray[v]=k; end
imdbarray[xtrn[1]]

150-element Array{String,1}:
 "have"     
 "had"      
 "it"       
 "was"      
 "an"       
 "excellent"
 "script"   
 "anyway"   
 "and"      
 "an"       
 "excellent"
 "direction"
 "a"        
 ⋮          
 "for"      
 "his"      
 "next"     
 "movie"    
 "i'm"      
 "hoping"   
 "the"      
 "best"     
 "for"      
 "all"      
 "of"       
 "them"     

In [9]:
# prepare for training
weights = nothing; Knet.gc(); # Reclaim memory from previous run
rnnSpec,weights = initmodel()
optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);

In [10]:
# cold start
@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)
    grads = lossgradient(weights,x,y,rnnSpec)
    update!(weights, grads, optim)
end

 15.862389 seconds (13.36 M allocations: 748.401 MiB, 6.18% gc time)


In [11]:
# prepare for training
weights = nothing; Knet.gc(); # Reclaim memory from previous run
rnnSpec,weights = initmodel()
optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);

In [12]:
# 29s
@info("Training...")
@time for epoch in 1:EPOCHS
    @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)
        grads = lossgradient(weights,x,y,rnnSpec)
        update!(weights, grads, optim)
    end
end

┌ Info: Training...
└ @ Main In[12]:2


 10.262852 seconds (388.52 k allocations: 74.902 MiB, 5.52% gc time)
  9.253428 seconds (352.37 k allocations: 72.635 MiB, 6.45% gc time)
  9.297393 seconds (353.10 k allocations: 72.646 MiB, 6.71% gc time)
 28.816111 seconds (1.10 M allocations: 220.229 MiB, 6.20% gc time)


In [13]:
@info("Testing...")
@time accuracy(weights, minibatch(xtst,ytst,BATCHSIZE), (w,x)->predict(w,x,rnnSpec))

┌ Info: Testing...
└ @ Main In[13]:1


  3.205257 seconds (1.80 M allocations: 149.280 MiB, 1.25% gc time)


0.8431089743589744