In [1]:
import torch
import torch.nn as nn
import torch.nn.init as init
from torch import optim
import random

seed = 10

random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x10ea1c4d0>

In [2]:
class LSTM(nn.Module):
    def __init__(self, opt):
        super(LSTM, self).__init__()
        self.opt = opt
        self.i2h = nn.Linear(opt.rnn_size, 4 * opt.rnn_size)
        self.h2h = nn.Linear(opt.rnn_size, 4 * opt.rnn_size)

    def forward(self, x, prev_c, prev_h):
        gates = self.i2h(x) + self.h2h(prev_h)
        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
        ingate = torch.sigmoid(ingate)
        forgetgate = torch.sigmoid(forgetgate)
        cellgate = torch.tanh(cellgate)
        outgate = torch.sigmoid(outgate)
        cy = (forgetgate * prev_c) + (ingate * cellgate)
        hy = outgate * torch.tanh(cy)  # n_b x hidden_dim
        return cy, hy


In [3]:
class EncoderRNN(nn.Module):
    def __init__(self, opt, input_size):
        super(EncoderRNN, self).__init__()
        self.opt = opt
        self.hidden_size = opt.rnn_size
        self.embedding = nn.Embedding(input_size, self.hidden_size)
        self.lstm = LSTM(self.opt)
    def forward(self, input_src, prev_c, prev_h):
        src_emb = self.embedding(input_src) # batch_size x src_length x emb_size
        prev_cy, prev_hy = self.lstm(src_emb, prev_c, prev_h)
        return prev_cy, prev_hy

In [4]:
class DecoderRNN(nn.Module):
    def __init__(self, opt, output_size):
        super(DecoderRNN, self).__init__()
        self.opt = opt
        self.hidden_size = opt.rnn_size

        self.embedding = nn.Embedding(output_size, self.hidden_size)
        self.lstm = LSTM(self.opt)
        self.linear = nn.Linear(self.hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, prev_c, prev_h):
        output = self.embedding(input)
        next_c, next_h = self.lstm(output, prev_c, prev_h)
        h2y = self.linear(next_h)
        pred = self.softmax(h2y)
        return pred, next_c, next_h

In [5]:
class Options:
    def __init__(self):
        self.rnn_size = 50
        self.dropout = False
        self.init_weight = 0.08
        self.decay_rate = 0.985
        self.learning_rate = 0.01
        
opt = Options()

In [6]:
def read_data(fh):
    for line in fh:
        sentence, lf = line.strip().split("\t")
        sentence = sentence.split()
        lf = lf.split()
        yield sentence, lf

def read_vocab(filename):
    t2i = {"<s>": 0, "</s>":1, "UNK": 2}
    with open(filename) as target:
        for line in target:
            token = line.strip().split()[0]
            if token not in t2i:
                t2i[token] = len(t2i)
    return t2i

def is_equal(gold, predictions):
    total_correct = 0.0
    if len(gold) == len(predictions):
        equal = True
        for g, p in zip(gold, predictions):
            if g != p:
                equal = False
        return equal
    return False

In [7]:
def preprare_data(file_name):
    shuffledData = None
    with open(TRAIN_FILE, 'r') as train:
        shuffledData = list(read_data(train))
        random.shuffle(shuffledData)
    sentence_index_tensors = []
    form_index_tensors = []
    for sentence in shuffledData:
        text_tensor = torch.zeros((1, len(sentence[0]) + 2), dtype=torch.long)
        text_tensor[0][0] = w2i["<s>"]
        for idx, word in enumerate(sentence[0]):
            word_index = w2i[word] if word in w2i else w2i["UNK"]
            text_tensor[0][idx+1] = word_index
        text_tensor[0][-1] = w2i["</s>"]
        sentence_index_tensors.append(text_tensor)
        form_tensor = torch.zeros((1, len(sentence[1]) + 2), dtype=torch.long)
        form_tensor[0][0] = lf2i["<s>"]
        for idx, form in enumerate(sentence[1]):
            form_index = lf2i[form] if form in lf2i else lf2i["UNK"]
            form_tensor[0][idx+1] = form_index
        form_tensor[0][-1] = lf2i["</s>"]
        form_index_tensors.append(form_tensor)
    return shuffledData, sentence_index_tensors, form_index_tensors

In [8]:
def train(opt, encoder_optimizer, decoder_optimizer, encoder, decoder, s1, f1):
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    c = torch.zeros((1, opt.rnn_size), dtype=torch.float, requires_grad=True)
    h = torch.zeros((1, opt.rnn_size), dtype=torch.float, requires_grad=True)
    for i in range(s1.size(1)):
        c, h = encoder(s1[:, i], c, h)

    #for dec_in in f1:
    loss = 0
    for i in range(f1.size(1)-1):
        pred, c, h = decoder(f1[:, i], c, h)
        loss += criterion(pred, f1[:, i+1])
    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()
    return loss

In [9]:
def predict(opt, s1, lf2i, encoder, decoder):
    c = torch.zeros((1, opt.rnn_size), dtype=torch.float, requires_grad=True)
    h = torch.zeros((1, opt.rnn_size), dtype=torch.float, requires_grad=True)

    for i in range(s1.size(1)):
        c, h = encoder(s1[:, i], c, h)

    prev = torch.tensor([lf2i['<s>']], dtype=torch.long)
    predicted_form = []
    counter = 0
    while True:
        counter += 1
        pred, c, h = decoder(prev, c, h)
        form_id = pred.argmax().item()
        prev = torch.tensor([form_id], dtype=torch.long)
        if form_id == lf2i["</s>"] or counter >= 100:
            break
        predicted_form.append(form_id)
    return predicted_form

In [10]:
TEST_FILE = "data/test.txt"
TRAIN_FILE = "data/train.txt"
WHOLE_FILE = "data/whole.txt"
F_VOCAB_FILE = "data/vocab.f.txt"
Q_VOCAB_FILE = "data/vocab.q.txt"

w2i = read_vocab(Q_VOCAB_FILE)
lf2i = read_vocab(F_VOCAB_FILE)
i2lf = {lf2i[i] : i for i in lf2i}

In [11]:
encoder = EncoderRNN(opt, len(w2i))
decoder = DecoderRNN(opt, len(lf2i))

for name, param in encoder.named_parameters():
        if param.requires_grad:
            init.uniform_(param, -opt.init_weight, opt.init_weight)
for name, param in decoder.named_parameters():
    if param.requires_grad:
        init.uniform_(param, -opt.init_weight, opt.init_weight)

optim_state = {"learningRate" : opt.learning_rate, "alpha" :  opt.decay_rate}
encoder_optimizer = optim.RMSprop(encoder.parameters(),  lr=optim_state["learningRate"], alpha=optim_state["alpha"])
decoder_optimizer = optim.RMSprop(decoder.parameters(),  lr=optim_state["learningRate"], alpha=optim_state["alpha"])
criterion = nn.NLLLoss(ignore_index=0)


In [14]:
def train_and_test():
    train_data, sentence_index_tensors_train, form_index_tensors_train = preprare_data(TRAIN_FILE)
    test_data, sentence_index_tensors_test, form_index_tensors_test = preprare_data(TEST_FILE)

    EPOCH_NUM = 30
    for epoch in range(EPOCH_NUM):
        print("---Epoch {}---\n".format(epoch+1))
        print("Training...")
        encoder.train()
        decoder.train()
        loss = 0
        for index, (sentence, form) in enumerate(zip(sentence_index_tensors_train, form_index_tensors_train)):
            loss += train(opt, encoder_optimizer, decoder_optimizer, encoder, decoder, sentence, form)
            if index % 50 == 0:
                print("Index {} Loss {}".format(index, loss/(index+1)))
                
        print("Predicting..")
        encoder.eval()
        decoder.eval()
        correct = 0.0
        for index, (sentence, form) in enumerate(zip(sentence_index_tensors_test, form_index_tensors_test)):
            prediction = predict(opt, sentence, lf2i, encoder, decoder)
            prediction = [i2lf[p] for p in prediction]
            #print(test_data[index][1])
            #print(prediction)
            same = True
            for g, p in zip(test_data[index][1], prediction):
                if g != p:
                    same = False
            if same:
                correct += 1
                #print("Correct match ", prediction)
        accuracy = 100*(correct/len(test_data))
        print("Accuracy: ", accuracy)

In [15]:
train_and_test()

---Epoch 1---

Training...
Index 0 Loss 9.44290542602539
Index 50 Loss 6.246608734130859
Index 100 Loss 6.586971282958984
Index 150 Loss 7.199075698852539
Index 200 Loss 6.89987850189209
Index 250 Loss 6.614319324493408
Index 300 Loss 6.813511371612549
Index 350 Loss 6.847531795501709
Index 400 Loss 6.925313472747803
Index 450 Loss 6.873419761657715
Index 500 Loss 6.678918838500977
Index 550 Loss 6.796503067016602
Predicting..
Accuracy:  24.5
---Epoch 2---

Training...
Index 0 Loss 11.227838516235352
Index 50 Loss 6.821059226989746
Index 100 Loss 6.8647332191467285
Index 150 Loss 7.330733776092529
Index 200 Loss 6.865607738494873
Index 250 Loss 6.5261993408203125
Index 300 Loss 6.574824333190918
Index 350 Loss 6.552728176116943
Index 400 Loss 6.615856647491455
Index 450 Loss 6.501195430755615
Index 500 Loss 6.291408538818359
Index 550 Loss 6.357675075531006
Predicting..
Accuracy:  25.333333333333336
---Epoch 3---

Training...
Index 0 Loss 9.560908317565918
Index 50 Loss 6.0691223144531

Index 500 Loss 4.308001518249512
Index 550 Loss 4.379534721374512
Predicting..
Accuracy:  49.833333333333336
---Epoch 19---

Training...
Index 0 Loss 7.376361846923828
Index 50 Loss 4.811140060424805
Index 100 Loss 4.671398639678955
Index 150 Loss 4.790704250335693
Index 200 Loss 4.548469543457031
Index 250 Loss 4.296502590179443
Index 300 Loss 4.383448600769043
Index 350 Loss 4.462375640869141
Index 400 Loss 4.527151584625244
Index 450 Loss 4.39990234375
Index 500 Loss 4.21815299987793
Index 550 Loss 4.320772171020508
Predicting..
Accuracy:  48.5
---Epoch 20---

Training...
Index 0 Loss 13.02700424194336
Index 50 Loss 5.282227993011475
Index 100 Loss 5.159090995788574
Index 150 Loss 5.240074634552002
Index 200 Loss 4.793017864227295
Index 250 Loss 4.4663166999816895
Index 300 Loss 4.456430435180664
Index 350 Loss 4.406853199005127
Index 400 Loss 4.422591686248779
Index 450 Loss 4.349965572357178
Index 500 Loss 4.120241165161133
Index 550 Loss 4.254065036773682
Predicting..
Accuracy:  