In [2]:
using Knet
import Base: length, size, iterate, eltype, IteratorSize, IteratorEltype, haslength, @propagate_inbounds, repeat, rand, tail
import .Iterators: cycle, Cycle, take, repeat
using Plots; default(fmt=:png,ls=:auto)

In [3]:
f = open("glove.42B.300d.txt")

IOStream(<file glove.42B.300d.txt>)

In [4]:
lines = readlines(f);

In [5]:
embeddingdict = Dict()
embeddingindex = 1
for line in lines
    strword = split(line)
    strname = strword[1]
    embeddingdict[strname] = embeddingindex
    embeddingindex+=1
end

PREPARE DATA

In [6]:
txt1 = open("sentences.txt")
lines1 = readlines(txt1)
txt2 = open("labels.txt")
lines2 = readlines(txt2);

In [7]:
all_sentences = (x -> split(x)).(lines1);
all_sentences = [(x -> lowercase(x)).(s) for s in all_sentences];

In [8]:
train_sentences = all_sentences[1:8534]
test_sentences = all_sentences[8535:10744]
val_sentences = all_sentences[10745:11844];

In [9]:
train_labels = lines2[1:8534]
test_labels = lines2[8535:10744]
val_labels = lines2[10745:11844]
train_labels = (x -> parse(Float32, x)).(train_labels);

In [10]:
function mapfloat(label)
    if (0 <= label < 0.5); return 1; end;
    if (0.5 < label <= 1.0); return 2; end;
end
function mapstring(p)
    if p == "neg" || p == "very neg"; return 1; end;
    if p == "pos" || p == "very pos"; return 2; end;
end

mapstring (generic function with 1 method)

In [11]:
train_sentences2 = []
train_labels2 = []
test_sentences2 = []
test_labels2 = []
val_sentences2 = []
val_labels2 = [];

In [12]:
for i in 1:8534
    if train_labels[i] != 0.5
        push!(train_labels2, mapfloat(train_labels[i]))
        push!(train_sentences2, train_sentences[i])
    end
end

for i in 1:2210
    if test_labels[i] != "neu"
        push!(test_labels2, mapstring(test_labels[i]))
        push!(test_sentences2, test_sentences[i])
    end
end        

for i in 1:1100
    if val_labels[i] != "neu"
        push!(val_labels2, mapstring(val_labels[i]))
        push!(val_sentences2, val_sentences[i])
    end
end

In [13]:
length(train_sentences), length(test_sentences), length(val_sentences)

(8534, 2210, 1100)

In [14]:
length(train_sentences2), length(test_sentences2), length(val_sentences2)

(8328, 1821, 872)

In [15]:
train_sentences2 = vcat(train_sentences2, val_sentences2)
train_labels2 = vcat(train_labels2, val_labels2);
train_labels2 = hcat(train_labels2)[:,1]
test_labels2 = hcat(test_labels2)[:,1];

In [16]:
length(train_sentences2), length(test_sentences2), length(val_sentences2)

(9200, 1821, 872)

In [17]:
vocab = []
for sentence in all_sentences
    for word in sentence
        if !(word in vocab); push!(vocab, word);end
    end
end
push!(vocab, "UNK");

In [18]:
w2i = Dict()
i2w = Dict()
dictindex = 1
for word in vocab
    w2i[word] = dictindex
    i2w[dictindex] = word
    dictindex+=1
end
w2i["UNK"] = 19507
i2w[19507] = "UNK";

In [19]:
train_sentences2 = [(x -> w2i[x]).(s) for s in train_sentences2];
test_sentences2 = [(x -> w2i[x]).(s) for s in test_sentences2];
all_sentences = [(x -> w2i[x]).(s) for s in all_sentences];

In [20]:
function strings_to_indices(s)
    s = split(s)
    out = []
    for word in s
        word = lowercase(word)
        if !(word in vocab)
            word = w2i["UNK"]
        end
        push!(out, w2i[word])
    end
    hcat(out)
end

strings_to_indices("hey there")

2×1 Array{Int64,2}:
 1804
   54

In [21]:
maxlength = 56
for sentence in train_sentences2
    while length(sentence) != maxlength
        pushfirst!(sentence, w2i["UNK"])
    end
end

In [22]:
maxlength = 56
for sentence in test_sentences2
    while length(sentence) != maxlength
        pushfirst!(sentence,w2i["UNK"])
    end
end

In [23]:
@doc xavier

```
xavier(a...)
```

Xavier initialization returns uniform random weights in the range `±sqrt(2 / (fanin + fanout))`.  The `a` arguments are passed to `rand`.  See ([Glorot and Bengio 2010](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf)) for a description. [Caffe](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1XavierFiller.html#details) implements this slightly differently. [Lasagne](http://lasagne.readthedocs.org/en/latest/modules/init.html#lasagne.init.GlorotUniform) calls it `GlorotUniform`.


In [24]:
embedmatrix = []
no_embeddings = []
count = 0
for word in vocab
    if word in keys(embeddingdict)
        wordvector = (x-> parse(Float32, x)).(split(lines[embeddingdict[word]])[2:301])
        count += 1
    else
        wordvector = xavier(Float32, 300)
        push!(no_embeddings, (word, wordvector))
    end
    push!(embedmatrix, hcat(wordvector))
end

In [25]:
embedmatrix = hcat(embedmatrix...);

In [26]:
size(embedmatrix)

(300, 19507)

In [27]:
println(count, " out of ", length(vocab), " words are in Stanford Glove Embeddings. The rest is initialized randomly.")

18593 out of 19507 words are in Stanford Glove Embeddings. The rest is initialized randomly.


In [28]:
#Hyperparameters of the Model
BATCHSIZE=5               # Number of instances in a minibatch
EMBEDSIZE=300             # Word embedding size
NUMHIDDEN=100             # Hidden layer size
MAXLEN=150                # maximum size of the word sequence, pad shorter sequences, truncate longer ones
VOCABSIZE=length(vocab)   # maximum vocabulary size, keep the most frequent 30K, map the rest to UNK token
NUMCLASS=2                # number of output classes
DROPOUT=0.5               # Dropout rate
LR=0.002                  # Learning rate
BETA_1=0.9                # Adam optimization parameter
BETA_2=0.999              # Adam optimization parameter
EPS=1e-08                 # Adam optimization parameter
MAXLENGTH = 56            # Used for padding

56

In [29]:
dtrn = minibatch(train_sentences2, train_labels2,BATCHSIZE;shuffle=true)
dtst = minibatch(test_sentences2, test_labels2 ,BATCHSIZE)
length(dtrn), length(dtst)

(1840, 364)

In [30]:
#model struct
mutable struct LSTMN
    embeds
    lstm
    output
    pdrop
    Wh
    Wx
    Whh
    memory_tape
    hidden_tape
end

In [31]:
#model constructor
function LSTMN(input::Int, embed::Int, hidden::Int, output::Int; pdrop=0)
    embeds = param(KnetArray(embedmatrix))
    lstm = RNN(embed,hidden)
    output = param(output, hidden)
    #lstm.h = KnetArray(zeros(Float32, 100,5))
    #lstm.c = KnetArray(zeros(Float32, 100,5))
    Wh = param(KnetArray(xavier(Float32, 1,100)))
    Wx = param(KnetArray(xavier(Float32, 1,300)))
    Whh = param(KnetArray(xavier(Float32, 1,100)))
    memory_tape = []
    hidden_tape = []
    LSTMN(embeds, lstm, output, pdrop, Wh, Wx, Whh, memory_tape, hidden_tape)
end

LSTMN

In [32]:
function (lstmn::LSTMN)(input)
    embed = lstmn.embeds[:, permutedims(hcat(input...))]
    embed = dropout(embed,lstmn.pdrop)
    
    memory_tape = []
    hidden_tape = []
 
    xt = embed[:,:,1]
    
    lstmn.lstm.h = KnetArray(zeros(Float32,100,5))
    lstmn.lstm.c = KnetArray(zeros(Float32,100,5))
    ht = lstmn.lstm.h
    ct = lstmn.lstm.c[:,:,1]
    hprev = ht
    
    xt = embed[:,:,1]
    ht = lstmn.lstm(xt)
    ct = lstmn.lstm.c[:,:,1]
    
    push!(hidden_tape, ht)
    push!(memory_tape, ct)
    
    for t in 2:56
        xt = embed[:,:,t]
        ht = lstmn.lstm.h 
        ct = lstmn.lstm.c 
        
        h = hcat(hidden_tape[1:t-1]...)
        c = hcat(memory_tape[1:t-1]...)
    
        dot1 = lstmn.Wh * h
        dot1 = reshape(dot1, (t-1), 5)
        
        dot2 = lstmn.Wx * xt        

        dot3 = lstmn.Whh * hprev
        
        at = tanh.(dot1 .+ dot2 .+ dot3) 
        
        soft = softmax(at; dims=1)
        soft = reshape(soft, 5*(t-1))
        
        new_h = soft .* (h')
        new_h = reshape(new_h, 5, (t-1), 100)
        new_h = sum(new_h; dims = 2)
        new_h = reshape(new_h, 5,100)
        new_h = new_h'
                
        hprev = new_h
        
        new_c = soft .* (c')
        new_c = reshape(new_c, 5, (t-1), 100)
        new_c = sum(new_c; dims = 2)
        new_c = reshape(new_c, 5,100)
        new_c = new_c'
    
        lstmn.lstm.h = reshape(new_h, 100, 5, 1)
        lstmn.lstm.c = reshape(new_c, 100, 5, 1)
        
        ht = lstmn.lstm(xt)
        ct = lstmn.lstm.c[:,:,1]
        
        push!(hidden_tape, ht)
        push!(memory_tape, ct)
    end

    hidden = dropout(ht,lstmn.pdrop)
    return lstmn.output * hidden    
end

#model(input,output)
(l::LSTMN)(input,output) = nll(l(input),output)
#model(data)
(l::LSTMN)(d::Knet.Data) = Knet.mean(l(x,y) for (x,y) in d)

In [33]:
model = LSTMN(VOCABSIZE,EMBEDSIZE,NUMHIDDEN,NUMCLASS,pdrop=DROPOUT);

In [34]:
x1, y1 = first(dtrn)
model(x1,y1)

0.6965291f0

In [35]:
J1 = @diff model(x1,y1)
for p in params(model)
    @show typeof(grad(J1, p))
end

typeof(grad(J1, p)) = KnetArray{Float32,2}
typeof(grad(J1, p)) = KnetArray{Float32,3}
typeof(grad(J1, p)) = KnetArray{Float32,2}
typeof(grad(J1, p)) = KnetArray{Float32,2}
typeof(grad(J1, p)) = KnetArray{Float32,2}
typeof(grad(J1, p)) = KnetArray{Float32,2}


In [36]:
models = []
function fasttrain!(lstmn::LSTMN, dtrn, dtst, max_iters=500)
    a = adam(lstmn, take(cycle(dtrn), max_iters+1);lr=LR,beta1=BETA_1,beta2=BETA_2,eps=EPS)
    progress!(a)
    push!(models, lstmn)
end            

fasttrain! (generic function with 2 methods)

In [37]:
function trainuntil!(lstmn::LSTMN, dtrn, dtst, until=0.85)
    acc = accuracy(model, dtst)
    iter = 30
    while (acc < until)
        if acc < 0.809; iter = 30; end
        if acc >= 0.809; iter = 3; end
        fasttrain!(lstmn, dtrn, dtst, iter)
        acc = accuracy(model, dtst)
        println(acc)
    end
    acc
end

trainuntil! (generic function with 2 methods)

In [38]:
function besttrain!(lstmn::LSTMN, dtrn, dtst, max_iter=100)
    best_acc = 0
    best_model = model
    acc = accuracy(model, dtst)
    i = 0
    while (i < max_iter)
        fasttrain!(lstmn, dtrn, dtst,10)
        acc = accuracy(model, dtst)
        if(acc > best_acc)
            best_model = model
            best_acc = acc
        end
        println("current accurancy:", acc, "best accuracy:", best_acc)
        i+=1
    end
    best_model
end

besttrain! (generic function with 2 methods)

Make sure you download the weights from my github:
https://github.com/egeersu/LSTMN/tree/master/weights

In [39]:
bestd = Knet.load("lstmn-knet.jld2?raw=true")

Dict{String,Any} with 6 entries:
  "lstm"   => LSTM(input=300,hidden=100)
  "Wh"     => P(KnetArray{Float32,2}(1,100))
  "output" => P(KnetArray{Float32,2}(2,100))
  "embeds" => P(KnetArray{Float32,2}(300,19507))
  "Whh"    => P(KnetArray{Float32,2}(1,100))
  "Wx"     => P(KnetArray{Float32,2}(1,300))

In [40]:
model.lstm = bestd["lstm"]
model.Wh = bestd["Wh"]
model.output = bestd["output"]
model.embeds = bestd["embeds"]
model.Whh = bestd["Whh"]
model.Wx = bestd["Wx"]

P(KnetArray{Float32,2}(1,300))

In [42]:
function predict(input)
    input = split(lowercase.(input))
    for i in 1:length(input) 
        if !(input[i] in vocab)
            input[i] = "UNK"
        end
    end
    input = (i->w2i[i]).(input)
    while length(input) != maxlength
        pushfirst!(input, w2i["UNK"])
    end
    toybatch = [input]
    push!(toybatch, ones(56))
    push!(toybatch, ones(56))
    push!(toybatch, ones(56))
    push!(toybatch, ones(56))
    r = argmax(Array(model(toybatch)), dims=1)[1][1]
    ["negative", "positive"][r]
end

predict (generic function with 1 method)

The LSTMN seems to take "but" into account

In [43]:
userinput = "the acting was good but the script and everything else was terrible"
predict(userinput)

"negative"

In [44]:
userinput = "looks bad at first but it is actually good"
predict(userinput)

"positive"

The LSTMN does not 'understand' what "not" does

In [45]:
userinput = "this is a good film"
predict(userinput)

"positive"

In [46]:
userinput = "this is not a good film"
predict(userinput)

"positive"

In [47]:
userinput = "this film was a disaster"
predict(userinput)

"negative"

In [48]:
userinput = "this film was not a disaster"
predict(userinput)

"negative"

Here you can try your own inputs!

In [62]:
userinput = readline(stdin)
predict(userinput)

stdin> David Benioff and D.B. Weiss have proven themselves to be woefully incompetent writers when they have no source material (i.e. the books) to fall back on. 


"negative"