In [1]:
import torch
import torch.nn as nn
import torch.nn.init as init
from torch import optim
import numpy as np

import random
import os

seed = 10

random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

In [2]:
TEST_FILE = "data/test.txt"
TRAIN_FILE = "data/train.txt"
WHOLE_FILE = "data/whole.txt"
F_VOCAB_FILE = "data/vocab.f.txt"
Q_VOCAB_FILE = "data/vocab.q.txt"

In [3]:
class Options:
    def __init__(self):
        self.rnn_size = 50
        self.dropout = False
        self.init_weight = 0.08
        self.decay_rate = 0.985
        self.learning_rate = 0.01
        self.plot_every = 10
        self.print_every = 50
        self.grad_clip = 5
        self.dropout = 0
        self.dropoutrec = 0
        self.learning_rate_decay = 0.985
        self.learning_rate_decay_after = 5
        
        
opt = Options()

In [4]:
class LSTM(nn.Module):
    def __init__(self, opt):
        super(LSTM, self).__init__()
        self.opt = opt
        self.i2h = nn.Linear(opt.rnn_size, 4 * opt.rnn_size)
        self.h2h = nn.Linear(opt.rnn_size, 4 * opt.rnn_size)
        if opt.dropoutrec > 0:
            self.dropout = nn.Dropout(opt.droputrec)
            
    def forward(self, x, prev_c, prev_h):
        gates = self.i2h(x) + self.h2h(prev_h)
        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
        ingate = torch.sigmoid(ingate)
        forgetgate = torch.sigmoid(forgetgate)
        cellgate = torch.tanh(cellgate)
        outgate = torch.sigmoid(outgate)
        if self.opt.dropoutrec > 0:
            cellgate = self.dropout(cellgate)
        cy = (forgetgate * prev_c) + (ingate * cellgate)
        hy = outgate * torch.tanh(cy)  # n_b x hidden_dim
        return cy, hy


In [5]:
class Encoder(nn.Module):
    def __init__(self, opt, input_size):
        super(Encoder, self).__init__()
        self.opt = opt
        self.hidden_size = opt.rnn_size
        self.embedding = nn.Embedding(input_size, self.hidden_size)
        self.lstm = LSTM(self.opt)
        if opt.dropout > 0:
            self.dropout = nn.Dropout(opt.dropout)
        self.__initParameters()

    def __initParameters(self):
        for name, param in self.named_parameters():
            if param.requires_grad:
                init.uniform_(param, -opt.init_weight, opt.init_weight)
                
    def forward(self, input_src, prev_c, prev_h):
        src_emb = self.embedding(input_src) # batch_size x src_length x emb_size
        if self.opt.dropout > 0:
            src_emb = self.dropout(src_emb)
        prev_cy, prev_hy = self.lstm(src_emb, prev_c, prev_h)
        return prev_cy, prev_hy

In [6]:
class Decoder(nn.Module):
    def __init__(self, opt, output_size):
        super(Decoder, self).__init__()
        self.opt = opt
        self.hidden_size = opt.rnn_size

        self.embedding = nn.Embedding(output_size, self.hidden_size)
        self.lstm = LSTM(self.opt)
        self.linear = nn.Linear(self.hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        if opt.dropout > 0:
            self.dropout = nn.Dropout(opt.dropout)
        self.__initParameters()

    def __initParameters(self):
        for name, param in self.named_parameters():
            if param.requires_grad:
                init.uniform_(param, -opt.init_weight, opt.init_weight)
                
    def forward(self, input, prev_c, prev_h):
        output = self.embedding(input)
        if self.opt.dropout > 0:
            output = self.dropout(output)
        next_c, next_h = self.lstm(output, prev_c, prev_h)
        if self.opt.dropout > 0:
            next_h = self.dropout(next_h)
        h2y = self.linear(next_h)
        pred = self.softmax(h2y)
        return pred, next_c, next_h

In [7]:
def read_data(fh):
    for line in fh:
        sentence, lf = line.strip().split("\t")
        sentence = sentence.split()
        lf = lf.split()
        yield sentence, lf

def read_vocab(filename):
    t2i = {"<s>": 0, "</s>":1, "UNK": 2}
    with open(filename) as target:
        for line in target:
            token = line.strip().split()[0]
            if token not in t2i:
                t2i[token] = len(t2i)
    return t2i

def is_equal(gold, predictions):
    total_correct = 0.0
    if len(gold) == len(predictions):
        equal = True
        for g, p in zip(gold, predictions):
            if g != p:
                equal = False
        return equal
    return False

In [8]:
def preprare_data(file_name):
    shuffledData = None
    with open(TRAIN_FILE, 'r') as train:
        shuffledData = list(read_data(train))
        random.shuffle(shuffledData)
    sentence_index_tensors = []
    form_index_tensors = []
    for sentence in shuffledData:
        text_tensor = torch.zeros((1, len(sentence[0]) + 2), dtype=torch.long)
        text_tensor[0][0] = w2i["<s>"]
        for idx, word in enumerate(sentence[0]):
            word_index = w2i[word] if word in w2i else w2i["UNK"]
            text_tensor[0][idx+1] = word_index
        text_tensor[0][-1] = w2i["</s>"]
        sentence_index_tensors.append(text_tensor)
        form_tensor = torch.zeros((1, len(sentence[1]) + 2), dtype=torch.long)
        form_tensor[0][0] = lf2i["<s>"]
        for idx, form in enumerate(sentence[1]):
            form_index = lf2i[form] if form in lf2i else lf2i["UNK"]
            form_tensor[0][idx+1] = form_index
        form_tensor[0][-1] = lf2i["</s>"]
        form_index_tensors.append(form_tensor)
    return shuffledData, sentence_index_tensors, form_index_tensors

In [9]:
def train(opt, criterion, encoder_optimizer, decoder_optimizer, encoder, decoder, s1, f1):
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    c = torch.zeros((1, opt.rnn_size), dtype=torch.float, requires_grad=True)
    h = torch.zeros((1, opt.rnn_size), dtype=torch.float, requires_grad=True)
    for i in range(s1.size(1)):
        c, h = encoder(s1[:, i], c, h)

    #for dec_in in f1:
    loss = 0
    for i in range(f1.size(1)-1):
        pred, c, h = decoder(f1[:, i], c, h)
        loss += criterion(pred, f1[:, i+1])
    loss.backward()
    if opt.grad_clip != -1:
        torch.nn.utils.clip_grad_value_(encoder.parameters(),opt.grad_clip)
        torch.nn.utils.clip_grad_value_(decoder.parameters(),opt.grad_clip)
    encoder_optimizer.step()
    decoder_optimizer.step()
    return loss

In [10]:
def predict(opt, s1, lf2i, encoder, decoder):
    c = torch.zeros((1, opt.rnn_size), dtype=torch.float, requires_grad=True)
    h = torch.zeros((1, opt.rnn_size), dtype=torch.float, requires_grad=True)

    for i in range(s1.size(1)):
        c, h = encoder(s1[:, i], c, h)

    prev = torch.tensor([lf2i['<s>']], dtype=torch.long)
    predicted_form = []
    counter = 0
    while True:
        counter += 1
        pred, c, h = decoder(prev, c, h)
        form_id = pred.argmax().item()
        prev = torch.tensor([form_id], dtype=torch.long)
        if form_id == lf2i["</s>"] or counter >= 100:
            break
        predicted_form.append(form_id)
    return predicted_form

In [11]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker


def showPlot(points, fig_name, extra_info):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.title(extra_info) 
    plt.plot(points)
    plt.savefig("{}.png".format(fig_name))
    plt.close('all')

In [12]:
def train_and_test(epoch_num, directory):
    train_data, sentence_index_tensors_train, form_index_tensors_train = preprare_data(TRAIN_FILE)
    test_data, sentence_index_tensors_test, form_index_tensors_test = preprare_data(TEST_FILE)
    
    encoder = Encoder(opt, len(w2i))
    decoder = Decoder(opt, len(lf2i))

    optim_state = {"learningRate" : opt.learning_rate, "alpha" :  opt.decay_rate}
    encoder_optimizer = optim.RMSprop(encoder.parameters(),  lr=optim_state["learningRate"], alpha=optim_state["alpha"])
    decoder_optimizer = optim.RMSprop(decoder.parameters(),  lr=optim_state["learningRate"], alpha=optim_state["alpha"])
    criterion = nn.NLLLoss(ignore_index=0)

    losses = []
    max_acc = 0
    maxAccEpochId = 0
    accuracies = []
    for epoch in range(epoch_num):
        print("---Epoch {}---\n".format(epoch+1))
        print("Training...")
        encoder.train()
        decoder.train()
        plot_data = []
        for index, (sentence, form) in enumerate(zip(sentence_index_tensors_train, form_index_tensors_train)):
            loss = train(opt, criterion, encoder_optimizer, decoder_optimizer, encoder, decoder, sentence, form)
            if index != 0:
                if index % opt.plot_every == 0:     
                    plot_data.append(np.mean(losses[epoch*len(train_data)+index-opt.plot_every:]))
                if index % opt.print_every == 0:
                    print("Index {} Loss {}".format(index, np.mean(losses[epoch*len(train_data)+index-opt.print_every:])))
            losses.append(loss.item())
        
        if opt.learning_rate_decay < 1:
            if epoch >= opt.learning_rate_decay_after:
                decay_factor = opt.learning_rate_decay
                optim_state["learningRate"] = optim_state["learningRate"] * decay_factor #decay it
                for param_group in encoder_optimizer.param_groups:
                    param_group['lr'] = optim_state["learningRate"]
                for param_group in decoder_optimizer.param_groups:
                    param_group['lr'] = optim_state["learningRate"]
        
        print("Predicting..")
        encoder.eval()
        decoder.eval()
        correct = 0.0
        with torch.no_grad():
            for index, (sentence, form) in enumerate(zip(sentence_index_tensors_test, form_index_tensors_test)):
                prediction = predict(opt, sentence, lf2i, encoder, decoder)
                prediction = [i2lf[p] for p in prediction]
                #print(test_data[index][1])
                #print(prediction)
                same = True
                for g, p in zip(test_data[index][1], prediction):
                    if g != p:
                        same = False
                if same:
                    correct += 1
                    #print("Correct match ", prediction)
                    
        accuracy = 100*(correct/len(test_data))
        accuracies.append(accuracy)
        if accuracy > max_acc:
            max_acc = accuracy
            maxAccEpochId = epoch
            
        print("Accuracy: {} Max Accuracy {}".format(accuracy, max_acc))
        
        if not os.path.exists(directory):
            os.makedirs(directory)
            
        file_name = "{}/epoch.{}".format(directory, epoch)
        extra = "Mean Loss {0:.2f}".format(np.mean(losses))
        showPlot(plot_data, file_name, extra)
        
    file_name = "{}/{}".format(directory, "accuracies")
    extra = "Maximum Accuracy {0:.2f} at epoch {1}".format(np.max(accuracies), maxAccEpochId)
    showPlot(accuracies, file_name, extra)
    file_name = "{}/{}".format(directory, "all_losses")
    
    extra = "Mean Loss {0:.2f}".format(np.mean(losses))
    showPlot(losses, file_name, extra)

In [13]:
w2i = read_vocab(Q_VOCAB_FILE)
lf2i = read_vocab(F_VOCAB_FILE)
i2lf = {lf2i[i] : i for i in lf2i}

In [14]:
train_and_test(100, "out/WithGradientClippingAndLearningRateDecay")

---Epoch 1---

Training...
Index 50 Loss 37.2561768913269
Index 100 Loss 19.44763627052307
Index 150 Loss 18.315035662651063
Index 200 Loss 14.876395363807678
Index 250 Loss 11.175154457092285
Index 300 Loss 13.80512481212616
Index 350 Loss 13.507837109565735
Index 400 Loss 15.254737734794617
Index 450 Loss 13.086537480354309
Index 500 Loss 13.567912964820861
Index 550 Loss 10.704102435111999
Predicting..
Accuracy: 0.0 Max Accuracy 0
---Epoch 2---

Training...
Index 50 Loss 14.09244921207428
Index 100 Loss 10.579448757171631
Index 150 Loss 11.842061352729797
Index 200 Loss 10.05725989818573
Index 250 Loss 7.757682881355286
Index 300 Loss 9.947220759391785
Index 350 Loss 10.450408263206482
Index 400 Loss 12.043740105628967
Index 450 Loss 10.402203435897826
Index 500 Loss 11.001408300399781
Index 550 Loss 8.704847469329835
Predicting..
Accuracy: 14.000000000000002 Max Accuracy 14.000000000000002
---Epoch 3---

Training...
Index 50 Loss 11.739828634262086
Index 100 Loss 8.768062434196473


Index 50 Loss 4.326855003237724
Index 100 Loss 3.574752473682165
Index 150 Loss 3.507942192554474
Index 200 Loss 2.6949082659184933
Index 250 Loss 1.8945275038480758
Index 300 Loss 3.363847492039204
Index 350 Loss 3.372338815033436
Index 400 Loss 4.008630764484406
Index 450 Loss 3.795336416363716
Index 500 Loss 4.250645526796579
Index 550 Loss 3.123820932507515
Predicting..
Accuracy: 56.00000000000001 Max Accuracy 56.00000000000001
---Epoch 20---

Training...
Index 50 Loss 4.40869882568717
Index 100 Loss 3.251460372507572
Index 150 Loss 3.6407403522729873
Index 200 Loss 2.409482210576534
Index 250 Loss 1.817229561805725
Index 300 Loss 2.8694618052244185
Index 350 Loss 3.2721267706155777
Index 400 Loss 3.61155677318573
Index 450 Loss 3.5079111662507056
Index 500 Loss 4.188482012450695
Index 550 Loss 2.8789446180686356
Predicting..
Accuracy: 56.99999999999999 Max Accuracy 56.99999999999999
---Epoch 21---

Training...
Index 50 Loss 4.0348757691681385
Index 100 Loss 3.4215333861112596
Inde

Predicting..
Accuracy: 68.5 Max Accuracy 68.5
---Epoch 37---

Training...
Index 50 Loss 2.8895100378990173
Index 100 Loss 2.048700543195009
Index 150 Loss 2.3639625853300092
Index 200 Loss 1.9277950072288512
Index 250 Loss 1.3613172046840192
Index 300 Loss 1.4654687881469726
Index 350 Loss 2.0119307724013926
Index 400 Loss 2.646899258643389
Index 450 Loss 2.1706523153185846
Index 500 Loss 2.404862190634012
Index 550 Loss 1.799284826517105
Predicting..
Accuracy: 67.0 Max Accuracy 68.5
---Epoch 38---

Training...
Index 50 Loss 2.286091637760401
Index 100 Loss 1.965377458781004
Index 150 Loss 1.9935125696659088
Index 200 Loss 1.5432980056107044
Index 250 Loss 1.011393016949296
Index 300 Loss 1.7126077952980996
Index 350 Loss 2.1806417142599823
Index 400 Loss 2.505485379360616
Index 450 Loss 2.234365695398301
Index 500 Loss 2.616557237356901
Index 550 Loss 1.9664887382090093
Predicting..
Accuracy: 67.5 Max Accuracy 68.5
---Epoch 39---

Training...
Index 50 Loss 2.5537195575237273
Index 100

Index 500 Loss 1.4515743459761143
Index 550 Loss 1.2447323723882437
Predicting..
Accuracy: 75.33333333333333 Max Accuracy 76.33333333333333
---Epoch 55---

Training...
Index 50 Loss 1.631635300256312
Index 100 Loss 1.382250092625618
Index 150 Loss 1.6392260468006135
Index 200 Loss 1.0253857869654894
Index 250 Loss 0.43124590400606394
Index 300 Loss 1.140376991070807
Index 350 Loss 1.176067685186863
Index 400 Loss 1.3395071709901094
Index 450 Loss 1.3286887465417385
Index 500 Loss 1.929704193621874
Index 550 Loss 1.2995143616199494
Predicting..
Accuracy: 75.66666666666667 Max Accuracy 76.33333333333333
---Epoch 56---

Training...
Index 50 Loss 1.5402183204889297
Index 100 Loss 1.4731886114180088
Index 150 Loss 1.7782043355703354
Index 200 Loss 0.911522022113204
Index 250 Loss 0.4659833639860153
Index 300 Loss 1.067187822163105
Index 350 Loss 1.381802463978529
Index 400 Loss 1.5719930894300342
Index 450 Loss 1.707066018730402
Index 500 Loss 1.8029854693636298
Index 550 Loss 1.17559180513

Index 450 Loss 0.8058886916935444
Index 500 Loss 1.0665354489535093
Index 550 Loss 0.7910762571543455
Predicting..
Accuracy: 81.5 Max Accuracy 84.0
---Epoch 73---

Training...
Index 50 Loss 1.0412812910974025
Index 100 Loss 1.0181272365152836
Index 150 Loss 0.9273949883133173
Index 200 Loss 0.5626162414252758
Index 250 Loss 0.3338226106017828
Index 300 Loss 1.149529070854187
Index 350 Loss 0.7832854217290879
Index 400 Loss 1.1975147091597318
Index 450 Loss 0.9325569921731949
Index 500 Loss 1.0362485575675964
Index 550 Loss 0.7196508722938597
Predicting..
Accuracy: 82.0 Max Accuracy 84.0
---Epoch 74---

Training...
Index 50 Loss 0.9363411308825016
Index 100 Loss 0.8997220373339951
Index 150 Loss 1.1160902282595635
Index 200 Loss 0.6125768936797976
Index 250 Loss 0.23946116536855697
Index 300 Loss 0.8997005689889193
Index 350 Loss 1.071540941298008
Index 400 Loss 1.1174194356799125
Index 450 Loss 0.952515535056591
Index 500 Loss 1.2168913081288337
Index 550 Loss 0.6724256294965744
Predic

Index 150 Loss 0.48626781195402147
Index 200 Loss 0.5822588342428208
Index 250 Loss 0.21947176933288573
Index 300 Loss 0.3068792118877173
Index 350 Loss 0.39048449993133544
Index 400 Loss 0.6295893357694149
Index 450 Loss 0.5188748905062676
Index 500 Loss 0.6390524123609066
Index 550 Loss 0.27665542270988225
Predicting..
Accuracy: 90.16666666666666 Max Accuracy 90.16666666666666
---Epoch 91---

Training...
Index 50 Loss 0.6129041120409966
Index 100 Loss 0.5023269996792078
Index 150 Loss 0.5330218122154474
Index 200 Loss 0.36090585559606553
Index 250 Loss 0.11604600310325623
Index 300 Loss 0.4246287904307246
Index 350 Loss 0.5221255350112916
Index 400 Loss 0.7066303116083145
Index 450 Loss 0.5293475347012282
Index 500 Loss 0.6839321202039719
Index 550 Loss 0.2238583093881607
Predicting..
Accuracy: 90.5 Max Accuracy 90.5
---Epoch 92---

Training...
Index 50 Loss 0.36333993980661033
Index 100 Loss 0.7295299792289733
Index 150 Loss 0.6189747361838818
Index 200 Loss 0.3580424251407385
Index

In [15]:
"Maximum Accuracy {0:.2f} at epoch {1}".format(2.7888, 10)

'Maximum Accuracy 2.79 at epoch 10'