In [1]:
import sys
import random
import math
from collections import Counter
import numpy as np

In [11]:
f = open('./tasksv11/en/qa1_single-supporting-fact_train.txt', 'r')
raw = f.readlines()
f.close()

tokens = list()
for line in raw[0:1000]:
    tokens.append(line.lower().replace("\n", "").split(" ")[1:])

In [12]:
print(tokens[0:3])

[['mary', 'moved', 'to', 'the', 'bathroom.'], ['john', 'went', 'to', 'the', 'hallway.'], ['where', 'is', 'mary?', '\tbathroom\t1']]


In [14]:
vocab = set()
for sent in tokens:
    for word in sent:
        vocab.add(word)

vocab = list(vocab)

word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i

In [15]:
def words2indices(sentence):
    idx = list()
    for word in sentence:
        idx.append(word2index[word])
    return idx

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

In [20]:
np.random.seed(1)
embed_size = 10
embed = (np.random.rand(len(vocab), embed_size) - 0.5) * 0.1
recurrent = np.eye(embed_size)
start = np.zeros(embed_size)
decoder = (np.random.rand(embed_size, len(vocab)) - 0.5) * 0.1
one_hot = np.eye(len(vocab))

In [23]:
def predict(sent):
    layers = list()
    layer = {}
    layer['hidden'] = start
    layers.append(layer)
    
    loss = 0
    
    preds = list()
    for target_i in range(len(sent)):
        layer = {}
        layer['pred'] = softmax(layers[-1]['hidden'].dot(decoder))
        loss += -np.log(layer['pred'][sent[target_i]])
        layer['hidden'] = layers[-1]['hidden'].dot(recurrent) + \
                          embed[sent[target_i]]
        layers.append(layer)
    return layers, loss

In [29]:
for iter in range(30000):
    alpha = 0.001
    sent = words2indices(tokens[iter%len(tokens)][1:])
    layers, loss = predict(sent)
    for layer_idx in reversed(range(len(layers))):
        layer = layers[layer_idx]
        target = sent[layer_idx-1]
        
        if(layer_idx > 0):
            layer['output_delta'] = layer['pred'] - one_hot[target]
            new_hidden_delta = layer['output_delta'].dot(decoder.transpose())
            if(layer_idx == len(layers)-1):
                layer['hidden_delta'] = new_hidden_delta
            else:
                layer['hidden_delta'] = new_hidden_delta + \
                                        layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())
        else:
            layer['hidden_delta'] = layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())
        
    start -= layers[0]['hidden_delta'] * alpha / float(len(sent))
    for layer_idx, layer in enumerate(layers[1:]):
        decoder -= np.outer(layers[layer_idx]['hidden'],
                            layer['output_delta']) * alpha / float(len(sent))
        embed_idx = sent[layer_idx]
        embed[embed_idx] -= layers[layer_idx]['hidden_delta'] * \
                             alpha / float(len(sent))
        recurrent -= np.outer(layers[layer_idx]['hidden'],
                              layer['hidden_delta']) * alpha /float(len(sent))
    if(iter % 1000 == 0):
        print("perplexity: " + str(np.exp(loss/len(sent))))

perplexity: 81.9696433994313
perplexity: 81.79598723570629
perplexity: 81.52450780454359
perplexity: 80.99586521548164
perplexity: 79.86900330313478
perplexity: 77.20048304819166
perplexity: 69.07372820534131
perplexity: 40.321888708580516
perplexity: 24.203354611386363
perplexity: 19.67468473568944
perplexity: 18.476482241471874
perplexity: 17.283464549218092
perplexity: 15.544117112046044
perplexity: 12.818550057299666
perplexity: 9.495359337252145
perplexity: 7.433730869415573
perplexity: 6.359237160084933
perplexity: 5.66728613404888
perplexity: 5.256542093796923
perplexity: 4.977988762691367
perplexity: 4.794043667543352
perplexity: 4.687249891380066
perplexity: 4.628527717844853
perplexity: 4.574922316767583
perplexity: 4.505928107713292
perplexity: 4.421426749687874
perplexity: 4.327162103272879
perplexity: 4.228574421200357
perplexity: 4.1291457968397545
perplexity: 4.023536289952996


In [30]:
sent_index = 4
l, _ = predict(words2indices(tokens[sent_index]))
print(tokens[sent_index])

for i, each_layer in enumerate(l[1:-1]):
    input = tokens[sent_index][i]
    true = tokens[sent_index][i+1]
    pred = vocab[each_layer['pred'].argmax()]
    print("Prev Input:" + input + (' '*(12 - len(input))) + \
          "True:" + true + (' '*(15 - len(true))) + "Pred:" + pred)

['sandra', 'moved', 'to', 'the', 'garden.']
Prev Input:sandra      True:moved          Pred:is
Prev Input:moved       True:to             Pred:to
Prev Input:to          True:the            Pred:the
Prev Input:the         True:garden.        Pred:bedroom.
