In [1]:
import numpy as np
from chp13 import *

In [2]:
np.random.seed(0)

In [3]:
data = np.array([[0,0],[0,1],[1,0],[1,1]])
target = np.array([[0],[1],[0],[1]])

In [4]:
w_0_1 = np.random.rand(2,3)
w_1_2 = np.random.rand(3,1)

In [5]:
for i in range(10):
    l1 = data.dot(w_0_1)
    l2 = l1.dot(w_1_2)
    diff = (l2-target)
    sqdiff = (diff * diff)
    loss = sqdiff.sum(0) #MSE loss
    l1_grad = diff.dot(w_1_2.transpose()) # this is the backprop piece
    w_1_2_update = l1.transpose().dot(diff)
    w_0_1_update = data.transpose().dot(l1_grad)
    
    w_1_2 -= w_1_2_update * 0.1 # 0.1 is the dropout
    w_0_1 -= w_0_1_update * 0.1
    print(loss[0])
    
    

5.066439994622395
0.4959907791902342
0.4180671892167177
0.35298133007809646
0.2972549636567377
0.2492326038163328
0.20785392075862477
0.17231260916265176
0.14193744536652986
0.11613979792168384


# my version

In [6]:
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

In [7]:
w = list()
w.append(Tensor(np.random.rand(2,3), autograd=True))
w.append(Tensor(np.random.rand(3,1), autograd=True))

In [8]:
for i in range(10):
    pred = data.mm(w[0]).mm(w[1]) # predict
    loss = ((pred - target) * (pred-target)).sum(0) # compare
    loss.backward(Tensor(np.ones_like(loss.data)))# learn
    
    for w_ in w:
        w_.data -= w_.grad.data * 0.1
        w_.grad.data *= 0
    print(loss)
    

[1.19432764]
[0.69555247]
[0.41681785]
[0.26511189]
[0.16699383]
[0.10237465]
[0.06048975]
[0.03436548]
[0.0188295]
[0.00999548]


In [9]:
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

w = list()
w.append(Tensor(np.random.rand(2,3), autograd=True))
w.append(Tensor(np.random.rand(3,1), autograd=True))

optim = SGD(parameters=w,alpha=0.1)

for i in range(10):
    pred = data.mm(w[0]).mm(w[1])
    loss = ((pred- target) * (pred-target)).sum(0)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[2.60308555]
[0.90713554]
[0.23779431]
[0.10544739]
[0.06594947]
[0.04025429]
[0.0238281]
[0.01372652]
[0.00772696]
[0.00426826]


In [10]:

import numpy
np.random.seed(0)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

model = Sequential([Linear(2,3), Linear(3,1)])

optim = SGD(parameters=model.get_parameters(), alpha=0.05)

for i in range(10):
    
    # Predict
    pred = model.forward(data)
    
    # Compare
    loss = ((pred - target)*(pred - target)).sum(0)
    
    # Learn
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[2.33428272]
[0.06743796]
[0.0521849]
[0.04079507]
[0.03184365]
[0.02479336]
[0.01925443]
[0.01491699]
[0.01153118]
[0.00889602]


In [11]:
import numpy as np
np.random.seed(0)
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

model = Sequential([Linear(2,3), Linear(3,1)])
criterion = MSELoss()
optim = SGD(parameters=model.get_parameters(), alpha= 0.05)
for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[2.33428272]
[0.06743796]
[0.0521849]
[0.04079507]
[0.03184365]
[0.02479336]
[0.01925443]
[0.01491699]
[0.01153118]
[0.00889602]


In [12]:
import numpy as np
np.random.seed(0)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

model = Sequential([Linear(2,3),Tanh(), Linear(3,1), Sigmoid()])
criterion = MSELoss()
optim = SGD(parameters=model.get_parameters(),alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)


[1.06372865]
[0.75148144]
[0.57384259]
[0.39574294]
[0.2482279]
[0.15515294]
[0.10423398]
[0.07571169]
[0.05837623]
[0.04700013]


In [13]:
(np.random.rand(1000, 5) -0.5) / 5

array([[ 0.03627183, -0.06432653, -0.0860282 , -0.09806237,  0.07840993],
       [-0.07330696,  0.05583839,  0.08503269,  0.04303575, -0.00183628],
       [-0.00610034,  0.07654186, -0.00231799, -0.0170865 , -0.06517761],
       ...,
       [-0.05424691,  0.03699274, -0.01992327, -0.07552934,  0.069757  ],
       [-0.03305273,  0.0928904 ,  0.06222329,  0.06001063,  0.06538392],
       [-0.01837391, -0.08355829,  0.09900976,  0.09283013, -0.00645979]])

In [14]:
x = Tensor(np.eye(5), autograd=True)
x.index_select(Tensor([[1,2,3],[2,3,4]])).backward()
print(x.grad)

[[0. 0. 0. 0. 0.]
 [1. 1. 1. 1. 1.]
 [2. 2. 2. 2. 2.]
 [2. 2. 2. 2. 2.]
 [1. 1. 1. 1. 1.]]


In [15]:
data = Tensor(np.array([1,2,1,2]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

embed = Embedding(5,3)
model = Sequential([embed, Tanh(), Linear(3,1), Sigmoid()])
criterion = MSELoss()
optim = SGD(parameters=model.get_parameters(), alpha=0.5)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[0.96438459]
[0.30384689]
[0.15730965]
[0.10253432]
[0.07482662]
[0.05837488]
[0.04758131]
[0.04000142]
[0.03440938]
[0.03012687]


In [16]:
import numpy as np
np.random.seed(0)
# data indices
data = Tensor(np.array([1,2,1,2]), autograd=True)
# target indices
target = Tensor(np.array([0,1,0,1]), autograd=True)
model = Sequential([Embedding(3,3),Tanh(), Linear(3,4)])
criterion = CrossEntropyLoss()
optim = SGD(parameters=model.get_parameters(), alpha=0.1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

1.3885032434928422
0.9558181509266036
0.6823083585795604
0.509525996749312
0.39574491472895856
0.31752527285348264
0.2617222861964216
0.22061283923954225
0.18946427334830068
0.16527389263866668


# now lets use them

In [17]:
import sys,random,math
from collections import Counter
import numpy as np

f = open('data/tasksv11/en/qa1_single-supporting-fact_train.txt','r')
raw = f.readlines()
f.close()

tokens = list()
for line in raw[0:1000]:
    tokens.append(line.lower().replace("\n","").split(" ")[1:])

new_tokens = list()
for line in tokens:
    new_tokens.append(['-'] * (6 - len(line)) + line)

tokens = new_tokens

vocab = set()
for sent in tokens:
    for word in sent:
        vocab.add(word)

vocab = list(vocab)

word2index = {}
for i,word in enumerate(vocab):
    word2index[word]=i
    
def words2indices(sentence):
    idx = list()
    for word in sentence:
        idx.append(word2index[word])
    return idx

indices = list()
for line in tokens:
    idx = list()
    for w in line:
        idx.append(word2index[w])
    indices.append(idx)

data = np.array(indices)

In [18]:
embed = Embedding(vocab_size=len(vocab), dim=16)
model = RNNCell(n_inputs=16,n_hidden=16, n_output=len(vocab))
criterion = CrossEntropyLoss()
params = model.get_parameters() + embed.get_parameters()
optim = SGD(parameters=params, alpha=0.05)

In [19]:
for iter in range(1000):
    batch_size = 100
    total_loss = 0
    
    hidden = model.init_hidden(batch_size=batch_size)

    for t in range(5):
        input = Tensor(data[0:batch_size,t], autograd=True)
        rnn_input = embed.forward(input=input)
        output, hidden = model.forward(input=rnn_input, hidden=hidden)

    target = Tensor(data[0:batch_size,t+1], autograd=True)    
    loss = criterion.forward(output, target)
    loss.backward()
    optim.step()
    total_loss += loss.data
    if(iter % 200 == 0):
        p_correct = (target.data == np.argmax(output.data,axis=1)).mean()
        print("Loss:",total_loss / (len(data)/batch_size),"% Correct:",p_correct)

Loss: 0.4867304084830028 % Correct: 0.0
Loss: 0.1782472256417358 % Correct: 0.21
Loss: 0.16358863350759348 % Correct: 0.3
Loss: 0.14174073412286864 % Correct: 0.36
Loss: 0.13615299451500776 % Correct: 0.37


In [20]:
batch_size = 1
hidden = model.init_hidden(batch_size=batch_size)
for t in range(5):
    input = Tensor(data[0:batch_size,t], autograd=True)
    rnn_input = embed.forward(input=input)
    output, hidden = model.forward(input=rnn_input,hidden=hidden)
target = Tensor(data[0:batch_size,t+1],autograd=True)
loss = criterion.forward(output, target)
ctx = ""
for idx in data[0:batch_size][0][0:-1]:
    ctx += vocab[idx] + " "
    print(ctx)
print("Context:",ctx)
print("Pred:",vocab[output.data.argmax()])

- 
- mary 
- mary moved 
- mary moved to 
- mary moved to the 
Context: - mary moved to the 
Pred: garden.
