In [18]:
import numpy as np
np.random.seed(1)

In [19]:
def softmax(x_):
    x = np.atleast_2d(x_) #turns it from 1d vector to 2d
    temp = np.exp(x)
    return temp / np.sum(temp, axis =1, keepdims = True) #axis = 1 is sum the columns

In [25]:
word_vects = {}
word_vect_template  = np.array([[0.,0.,0.]])

word_list = ['yankees','bears','braves','red','sox','lose','defeat','beat','tie']

for w in word_list:
    word_vects[w] = word_vect_template.copy()

In [26]:
sent2output = np.random.rand(3, len(word_vects)) #[3,9]
identity = np.eye(3)

In [27]:
alpha = 0.1
y = np.array([1,0,0,0,0,0,0,0,0])

for i in range(100):
    
    layer_0 = word_vects['red']
    layer_1 = layer_0.dot(identity) + word_vects['sox']
    layer_2 = layer_1.dot(identity) + word_vects['defeat']
    
    pred = softmax(layer_2.dot(sent2output))
    
    pred_delta = pred - y

    layer_2_delta = pred_delta.dot(sent2output.T)
    defeat_delta = layer_2_delta*1

    layer_1_delta = layer_2_delta.dot(identity.T)
    sox_delta = layer_1_delta*1

    layer_0_delta = layer_1_delta.dot(identity.T)

    word_vects['red'] -= layer_0_delta*alpha
    word_vects['sox'] -= sox_delta*alpha
    word_vects['defeat'] -= defeat_delta*alpha

    identity -= np.outer(layer_1_delta, layer_0) * alpha
    identity -= np.outer(layer_2_delta, layer_1) * alpha

    sent2output -= np.outer(layer_2, pred_delta) * alpha



In [28]:
print(f"Pred:{pred}")

Pred:[[9.98823603e-01 4.20314950e-04 1.93267980e-05 1.01757538e-05
  2.07021787e-04 2.67410218e-05 1.72270358e-05 1.73633972e-05
  4.58226611e-04]]


In [29]:
word_vects

{'yankees': array([[0., 0., 0.]]),
 'bears': array([[0., 0., 0.]]),
 'braves': array([[0., 0., 0.]]),
 'red': array([[ 0.26963643,  0.09524509, -0.95404756]]),
 'sox': array([[ 0.21187229,  0.07510011, -0.748406  ]]),
 'lose': array([[0., 0., 0.]]),
 'defeat': array([[ 0.17292935,  0.06149844, -0.61046878]]),
 'beat': array([[0., 0., 0.]]),
 'tie': array([[0., 0., 0.]])}

In [30]:
identity

array([[ 1.0654789 ,  0.02360239, -0.22343479],
       [ 0.02289487,  1.00825382, -0.07813104],
       [-0.24134894, -0.08691185,  1.82438203]])

In [31]:
sent2output

array([[ 0.72811039,  0.64169343, -0.01999513,  0.28553359,  0.09586716,
         0.07018994,  0.1667832 ,  0.32603584,  0.31325793],
       [ 0.65213613,  0.39069632,  0.67782664,  0.19826064,  0.85961047,
         0.01925624,  0.66330383,  0.41012379,  0.5284361 ],
       [-0.90985744,  0.46482497,  0.86804662,  1.0243646 ,  0.48545377,
         0.76653727,  0.94154838,  0.95992813,  0.36843507]])

# What is normalize

In [121]:
w01 = np.random.rand(3,4)

In [122]:
(w01 * w01) #element wise multiplication 

array([[3.95219721e-04, 6.87015833e-04, 8.01257264e-04, 6.06198898e-02],
       [7.39648073e-01, 2.90338916e-01, 3.05612140e-01, 7.09016024e-01],
       [1.54190122e-02, 7.79435266e-02, 3.43113924e-01, 9.40115915e-01]])

In [123]:
norms = np.sum(w01*w01, axis = 1) #[3,1]
norms.resize(norms.shape[0],1)

normed_weights = w01 * norms #ew multiplation/scaled by the column sum ; give you 0-1 for each column 

# RNN with dictionary

In [53]:
import sys,random,math
from collections import Counter
import numpy as np

f = open('../original/tasksv11/en/qa1_single-supporting-fact_train.txt','r')
raw = f.readlines()
f.close()


In [63]:
for line in raw[0:5]:
    print(line.lower().
          replace("\n","").
          replace("\t","").
          replace(".","").
          replace("?","").
          replace("1","").split(" ")[1:]) 

['mary', 'moved', 'to', 'the', 'bathroom']
['john', 'went', 'to', 'the', 'hallway']
['where', 'is', 'mary', 'bathroom']
['daniel', 'went', 'back', 'to', 'the', 'hallway']
['sandra', 'moved', 'to', 'the', 'garden']


In [64]:
tokens = list()
for line in raw[0:5]:
    tokens.append(line.lower().
          replace("\n","").
          replace("\t","").
          replace(".","").
          replace("?","").
          replace("1","").split(" ")[1:]
                 )
    
print(tokens)

[['mary', 'moved', 'to', 'the', 'bathroom'], ['john', 'went', 'to', 'the', 'hallway'], ['where', 'is', 'mary', 'bathroom'], ['daniel', 'went', 'back', 'to', 'the', 'hallway'], ['sandra', 'moved', 'to', 'the', 'garden']]


In [65]:
vocab = set()
wrdcnt = 0
for sent in tokens:
    for word in sent:
        vocab.add(word)
        wrdcnt += 1

vocab = list(vocab)
 
print(len(vocab))
print(wrdcnt)

14
25


In [66]:
word2index = {}
for i,word in enumerate(vocab):
    word2index[word] = i

In [67]:
vocab

['bathroom',
 'where',
 'to',
 'mary',
 'moved',
 'back',
 'went',
 'daniel',
 'is',
 'sandra',
 'hallway',
 'the',
 'garden',
 'john']

In [68]:
def words2indices(sentence):
    idx = list()
    for word in sentence:
        idx.append(word2index[word])
    return idx 

In [69]:
embed_size = 10

embed = (np.random.rand(len(vocab), embed_size) - 0.5)*0.1
recurrent = np.eye(embed_size)
start = np.zeros(embed_size)
decoder = (np.random.rand(embed_size, len(vocab)) - 0.5) * 0.1
one_hot = np.eye(len(vocab))

In [73]:
def predict(sent):
    
    layers = list()
    layer = {}
    layer['hidden'] = start
    layers.append(layer)

    loss = 0

    # forward propagate
    preds = list()
    for target_i in range(len(sent)):

        layer = {}

        # try to predict the next term
        layer['pred'] = softmax(layers[-1]['hidden'].dot(decoder)) #[10,][10,19] = [1,19]

        loss += -np.log(layer['pred'][sent[target_i]]) #will be zero if 1

        # generate the next hidden state
        layer['hidden'] = layers[-1]['hidden'].dot(recurrent) + embed[sent[target_i]] #[10,1][10,10] + [1,10]
        layers.append(layer)
        
    return layers, loss

In [75]:
# forward
for iter in range(8):
    alpha = 0.001
    sent = words2indices(tokens[iter%len(tokens)][1:]) #Sentence less the first word, with an interator 
    
    
    
    layers,loss = predict(sent) #returns multiple pred layers for each word [19,]

    # back propagate
    for layer_idx in reversed(range(len(layers))):
        print(layer_idx)
        layer = layers[layer_idx]
        target = sent[layer_idx-1] #because the sent is reduced by 1 in length and there is a start layer added when 
        #passed to predict(sent)

        if(layer_idx > 0):  # if not the first layer
            layer['output_delta'] = layer['pred'] - one_hot[target] #takes a particular row away [19,]
            new_hidden_delta = layer['output_delta'].dot(decoder.transpose()) #[19,][10,19] = [10,]
            
            # if the last layer - don't pull from a later one becasue it doesn't exist
            if(layer_idx == len(layers)-1):
                layer['hidden_delta'] = new_hidden_delta
            else:
                layer['hidden_delta'] = new_hidden_delta + layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())
        else: # if the first layer
            layer['hidden_delta'] = layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())

0


IndexError: index 4 is out of bounds for axis 0 with size 1