In [None]:
using Knet
import Base: length, size, iterate, eltype, IteratorSize, IteratorEltype, haslength, @propagate_inbounds, repeat, rand, tail
import .Iterators: cycle, Cycle, take, repeat
using Plots; default(fmt=:png,ls=:auto)

STANFORD GLOVE EMBEDDINGS 

In [None]:
f = open("glove.42B.300d.txt")

In [None]:
lines = readlines(f);

In [None]:
embeddingdict = Dict()
embeddingindex = 1
for line in lines
    strword = split(line)
    strname = strword[1]
    embeddingdict[strname] = embeddingindex
    embeddingindex+=1
end

SENTIMENT TREEBANK DATA

In [None]:
txt1 = open("sentences.txt")
lines1 = readlines(txt1)
txt2 = open("labels.txt")
lines2 = readlines(txt2);

In [None]:
all_sentences = (x -> split(x)).(lines1);
all_sentences = [(x -> lowercase(x)).(s) for s in all_sentences]
        
train_sentences2 = all_sentences[1:8534]
test_sentences2 = all_sentences[8535:10744]
val_sentences2 = all_sentences[10745:11844]

train_labels = lines2[1:8534]
train_labels = (x -> parse(Float32, x)).(train_labels)
test_labels = lines2[8535:10744]
val_labels = lines2[10745:11844];

In [None]:
function mapfloat(label)
    if (0 <= label < 0.2); return 1; end;
    #if (0.4 <= label < 0.6); return 2; end;
    #if (0.6 <= label <= 1.0); return 3; end;
    if (0.2 <= label < 0.4); return 2; end;
    if (0.4 <= label < 0.6); return 3; end;
    if (0.6 <= label < 0.8); return 4; end;
    if (0.8 <= label <= 1.0); return 5; end;
end
train_labels = (x->mapfloat(x)).(train_labels);

In [None]:
function mapx(label)
    if label == "very neg" return 1; end;
    if label == "neg" return 2; end;
    if label == "neu" return 3; end;
    if label == "pos" return 4; end;
    if label == "very pos" return 5; end;
end

function map2x(i)
    if i == 1 return "very negative"; end;
    if i == 2 return "negative"; end;
    if i == 3 return "neutral"; end;
    if i == 4 return "positive"; end;
    if i == 5 return "very positive"; end;
end
                    
test_labels = (x->mapx(x)).(test_labels);

In [None]:
vocab = []
for sentence in all_sentences
    for word in sentence
        if !(word in vocab); push!(vocab, word);end
    end
end
push!(vocab, "UNK");

In [None]:
w2i = Dict()
i2w = Dict()
dictindex = 1
for word in vocab
    w2i[word] = dictindex
    i2w[dictindex] = word
    dictindex+=1
end
w2i["UNK"] = 19507
i2w[19507] = "UNK";

In [None]:
all_sentences = [(x -> w2i[x]).(s) for s in all_sentences];

In [None]:
function strings_to_indices(s)
    s = split(s)
    out = []
    for word in s
        word = lowercase(word)
        if !(word in vocab)
            word = w2i["UNK"]
        end
        push!(out, w2i[word])
    end
    hcat(out)
end

strings_to_indices("hey there")

In [None]:
train_sentences = all_sentences[1:8534]
test_sentences = all_sentences[8535:10744]
val_sentences = all_sentences[10745:11844];

In [None]:
maxlength = 56
for sentence in train_sentences
    while length(sentence) != maxlength
        pushfirst!(sentence, w2i["UNK"])
    end
end

In [None]:
maxlength = 56
for sentence in test_sentences
    while length(sentence) != maxlength
        pushfirst!(sentence,w2i["UNK"])
    end
end

CONSTRUCTING THE EMBEDDING MATRIX

In [None]:
embedmatrix = []
no_embeddings = []
count = 0
for word in vocab
    if word in keys(embeddingdict)
        wordvector = (x-> parse(Float32, x)).(split(lines[embeddingdict[word]])[2:301])
        count += 1
    else
        wordvector = xavier(Float32, 300)
        push!(no_embeddings, (word, wordvector))
    end
    push!(embedmatrix, hcat(wordvector))
end

In [None]:
embedmatrix = hcat(embedmatrix...);

In [None]:
size(embedmatrix)

In [None]:
println(count, " out of ", length(vocab), " words are in Stanford Glove Embeddings. The rest is initialized randomly.")

SET UP MODEL

In [None]:
#Hyperparameters of the Model
BATCHSIZE=5               # Number of instances in a minibatch
EMBEDSIZE=300             # Word embedding size
NUMHIDDEN=100             # Hidden layer size
MAXLEN=150                # maximum size of the word sequence, pad shorter sequences, truncate longer ones
VOCABSIZE=length(vocab)   # maximum vocabulary size, keep the most frequent 30K, map the rest to UNK token
NUMCLASS=5                # number of output classes
DROPOUT=0.5               # Dropout rate
LR=0.002                  # Learning rate
BETA_1=0.9                # Adam optimization parameter
BETA_2=0.999              # Adam optimization parameter
EPS=1e-08                 # Adam optimization parameter
MAXLENGTH = 56            # Used for padding

In [None]:
dtrn = minibatch(train_sentences,train_labels,BATCHSIZE;shuffle=true)
dtst = minibatch(test_sentences,test_labels ,BATCHSIZE)
length(dtrn), length(dtst)

In [31]:
d1 = Knet.load("lstm46.jld2")
lstm46 = d1["lstm"]
embeds46 = d1["embeds"]
output46 = d1["model"]

P(KnetArray{Float32,2}(5,100))

In [29]:
#model struct
struct LSTMN
    embeds
    lstm
    output
    pdrop
    Wh
    Wx
    Whh
    memory_tape
    hidden_tape
end

In [42]:
param(xavier(Float32,10,5))

10Ã—5 Param{Array{Float32,2}}:
 -0.296603   -0.0889929  -0.321577   0.316929    0.10842  
 -0.29412    -0.167267   -0.111278  -0.0451668  -0.242608 
 -0.105584    0.116114    0.363484   0.0230823  -0.340264 
 -0.154036    0.303533   -0.232748   0.313824   -0.22249  
 -0.305794   -0.315736    0.225787  -0.0855627   0.127125 
 -0.050081   -0.108686    0.147441  -0.207851    0.0700677
 -0.208448    0.265458    0.270552  -0.116019    0.0671313
  0.0497417  -0.345457    0.28639   -0.280132   -0.0079236
 -0.290121    0.363355   -0.326749   0.351861   -0.274391 
  0.310482    0.0208921  -0.268303  -0.306375    0.184115 

In [43]:
#model constructor
function LSTMN(input::Int, embed::Int, hidden::Int, output::Int; pdrop=0)
    #embeds = param(KnetArray(embedmatrix))
    embeds = embeds46
    #lstm = RNN(embed,hidden)
    lstm = lstm46
    #output = param(output, hidden)
    output = output46
    Wh = param(xavier(1,100))
    Wx = param(xavier(1,300))
    Whh = param(xavier(1,100))
    memory_tape = KnetArray(zeros(Float32, 100,5,52))
    hidden_tape = KnetArray(zeros(Float32, 100,5,52))
    LSTMN(embeds, lstm, output, pdrop, Wh, Wx, Whh, memory_tape, hidden_tape)
end

LSTMN

In [62]:
function (lstmn::LSTMN)(input)
    embed = lstmn.embeds[:, permutedims(hcat(input...))]
    embed = dropout(embed,lstmn.pdrop)
    hidden = lstmn.lstm(embed)
    hidden = dropout(hidden,lstmn.pdrop)
    return lstmn.output * hidden[:,:,end]   
end

#=
    embed = lstmn.embeds[:, permutedims(hcat(input...))]
    embed = dropout(embed, lstmn.pdrop)
    
    #println("embed: ", summary(embed))
    
    
    memory_tape = KnetArray(zeros(Float32,100,5,52))
    hidden_tape = KnetArray(zeros(Float32,100,5,52))
    
    #println("memory_tape", summary(memory_tape))
    #println("hidden_tape", summary(hidden_tape))

    
    lstmn.lstm.h = KnetArray(zeros(Float32,100,5))
    lstmn.lstm.c = KnetArray(zeros(Float32,100,5))
    
    hprev = zeros(100,5)
    xt = embed[:,:,1]
    
    #println("xt:", summary(xt))
    
    lstmn.lstm(xt)
    ht = lstmn.lstm.h[:,:,1]
    ct = lstmn.lstm.c[:,:,1]
    
    #println("ht:", summary(ht))
    #println("ct:", summary(ct))
    
    #println("hidden_tape:", summary(memory_tape))
    #println("memory_tape:", summary(memory_tape))
    #memory_tape[:,:,1] = ct
    #hidden_tape[:,:,1] = ht

    #println("memory_tape:", summary(memory_tape))
    #println("hidden_tape:", summary(memory_tape))

    for t in 2:52
        #println("t: ",t)
        
        h = hidden_tape[:,:,1:t-1]
        c = memory_tape[:,:,1:t-1]
        
        #println("h:", summary(h))
        #println("c:", summary(c))

        
        #println("Wh:", summary(lstmn.Wh))
        dot1 = lstmn.Wh * reshape(h, 100, 5*(t-1))
        #println("dot1: ", summary(dot1))
        dot1 = reshape(dot1, (t-1), 5)
        #println("dot1: ", summary(dot1))
        
         
        xt = embed[:,:,t]
        #println("Wx:", summary(lstmn.Wx))
        #println("xt:", summary(xt))
        dot2 = lstmn.Wx * xt
        #println("dot2: ", summary(dot2))
        
        hprev = hidden_tape[:,:,t-1]
        #println("Whh: ", summary(lstmn.Whh))
        #println("hprev: ", summary(hprev))
        dot3 = lstmn.Whh * hprev
        #println("dot3: ", summary(dot3))

        
        at = tanh.(dot1 .+ dot2 .+ dot3)        
        #println("at: ", summary(at))
        #display(at)
        
        soft = softmax(at; dims=1)
        soft = permutedims(soft)
        soft = reshape(soft, 5*(t-1))
        #println("softmax:", summary(soft))
        
        h = reshape(h, 100, 5*(t-1))
        h = permutedims(h)
        #println("h:", summary(h))
        
        c = reshape(c, 100, 5*(t-1))
        c = permutedims(c)
        #println("c:", summary(c))    
    
                
        new_h = soft .* h
        new_h = reshape(new_h, 5, (t-1), 100)
        new_h = sum(new_h; dims = 2)
        new_h = reshape(new_h, 5,100)
        #println("new_h: ", summary(new_h))
        

        new_c = soft .* c
        new_c = reshape(new_c, 5, (t-1), 100)
        new_c = sum(new_c; dims = 2)
        new_c = reshape(new_c, 5,100)
        #println("new_c: ", summary(new_c))
        
        #println("NEW h!!!!!:", summary(lstmn.lstm.h))
        #println("NEW c!!!!!:", summary(lstmn.lstm.c))
        ht = lstmn.lstm(xt)
        #println("ht:", summary(ht))
        ct = lstmn.lstm.c[:,:,1]
        #println("ct:", summary(ct))
        
        #memory_tape[:,:,t] = ct
        #println("memo done")
        
        #hidden_tape[:,:,t] = ht
        #println("hidden done")
        
    end
    
    hidden = dropout(ht,lstmn.pdrop)
    #println("hidden:", summary(hidden))
    #println("return:", summary(lstmn.output * hidden))
    return lstmn.output * hidden    
end
=#

#model(input,output)
(l::LSTMN)(input,output) = nll(l(input),output)
#model(data)
(l::LSTMN)(d::Knet.Data) = Knet.mean(l(x,y) for (x,y) in d)

In [45]:
model = LSTMN(VOCABSIZE,EMBEDSIZE,NUMHIDDEN,NUMCLASS,pdrop=DROPOUT);

In [46]:
x1, y1 = first(dtrn)
model(x1, y1)

0.9522072f0

In [48]:
accuracy(model, dtst)

0.4601809954751131

In [None]:
    embeds
    lstm
    output
    pdrop
    Wh
    Wx
    Whh

In [61]:
Knet.save("lstmn46.jld2", "embeds", model.embeds, "lstm", model.lstm, "output", model.output, "Wh", model.Wh, "Wx", model.Wx, "Whh", model.Whh)

In [57]:
function userinput(lstmn::LSTMN, sentence)
    input = strings_to_indices(sentence)
    out = lstmn(input)[:,end]
    maxi = 1
    maxout = out[1]
    for i in 1:5
        if out[i] > maxout
            maxi = i
            maxout = out[i]
        end
    end
    map2x(maxi)
end

userinput(model, "I like this film very much")

"negative"

In [None]:
models = []

In [None]:
function fasttrain!(lstmn::LSTMN, dtrn, dtst, max_iters=500)
    a = adam(lstmn, take(cycle(dtrn), max_iters+1);lr=LR,beta1=BETA_1,beta2=BETA_2,eps=EPS)
    progress!(a)
    push!(models, lstmn)
end            

In [None]:
function mytrain!(lstmn::LSTMN, dtrn, dtst,valid=10, max_iters=500)
        
    function pusher(lstmn::LSTMN,dtrn,dtst,trnloss,tstloss)
        push!(trnloss, lstmn(dtrn))
        push!(tstloss, lstmn(dtst))
    end
        
    trnloss = []
    tstloss = []
    
    takeevery(n,itr) = (x for (i,x) in enumerate(itr) if i % n == 1)            
    #progress!(adam(model,repeat(dtrn,EPOCHS);lr=LR,beta1=BETA_1,beta2=BETA_2,eps=EPS))
    #change the optimizer here: sgd, adam, ... @doc Knet.sgd to see other options :
    #a = sgd(sc, take(cycle(dtrn), max_iters+1))        
    a = adam(lstmn, take(cycle(dtrn), max_iters+1);lr=LR,beta1=BETA_1,beta2=BETA_2,eps=EPS)                   
    b = (pusher(lstmn,dtrn,dtst,trnloss,tstloss) for x in takeevery(valid, a))
    progress!(b)    
    return 0:valid:max_iters, trnloss, tstloss
end

In [None]:
function tgraph(lstmn::LSTMN, dtrn, dtst, valid=10, max_iters=500)
    #Training_Accuracy = accuracy(lstmn, dtrn)
    #Test_Accuracy = accuracy(lstmn, dtst)
    #println("Training Accuracy: ", accuracy(lstmn, dtrn))
    #println("Test Accuracy: ", accuracy(lstmn, dtst))
    
    iters, trnloss, tstloss = mytrain!(lstmn,dtrn,dtst,valid,max_iters)
    
    println("Training Accuracy: ", accuracy(lstmn, dtrn))
    println("Test Accuracy: ", accuracy(lstmn, dtst))
    
    push!(models, (model, accuracy))
    
    plot(iters, [trnloss, tstloss], labels=[:trn, :tst], xlabel="iterations", ylabel="loss")
end