In [1]:
# This notebook implements a neural net to apprximate XOR function using PyTorch
from __future__ import print_function
import numpy as np
import torch
from torch.autograd import Variable

In [3]:
X = torch.tensor([[1.0,0.0,0.0,1.0],[0.0,0.0,1.0,1.0]],dtype=torch.float32) 
X = torch.transpose(X,0,1) # 4x2 matrix
Y = torch.tensor([[1.0,0.0,1.0,0.0]],dtype=torch.float32)                   
Y = torch.transpose(Y,0,1) # 4x1 vector
print("input: ", X)
print("output: ", Y)


input:  tensor([[1., 0.],
        [0., 0.],
        [0., 1.],
        [1., 1.]])
output:  tensor([[1.],
        [0.],
        [1.],
        [0.]])


In [11]:
# parameters of neural net
W1 = Variable(torch.torch.FloatTensor(2, 8).uniform_(-1, 1), requires_grad=True) # 2x8 matrix
b1 = Variable(torch.zeros((1,8)), requires_grad=True)                            # 1x8 matrix
W2 = Variable(torch.torch.FloatTensor(8, 1).uniform_(-1, 1), requires_grad=True) # 8x1 matrix
b2 = Variable(torch.zeros([1]), requires_grad=True)                              # scalar

learning_rate = 1e-2 # hyper parameter

for step in range(50000):

  # forward pass
  Z1 = torch.mm(X,W1)    # 4x8 matrix
  Z2 = Z1 + b1           # 4x8 matrix
  Z3 = torch.relu(Z2)    # 4x8 matrix
  Z4 = torch.mm(Z3,W2)   # 4x1 vector
  Z5 = Z4 + b2           # 4x1 vector
  Yp = torch.sigmoid(Z5) # 4x1 vector; Forcing the output to be between 0 and 1

  # backward pass
  dYp = Yp-Y # 4x1 vector
  dZ5 = torch.sigmoid(Z5)*(1.0-torch.sigmoid(Z5))*dYp # 4x1 vector
  dZ4 = dZ5  # 4x1 vector
  dZ3 = torch.mm(dZ4,torch.transpose(W2,0,1)) # 4x8 matrix
  dZ2 = torch.sign(Z3)*dZ3 # 4x8 matrix; if y = relu(x), then derivative of relu is sign(y)
  dZ1 = dZ2 # 4x8 matrix
  
  dW1 = torch.mm(torch.transpose(X,0,1),dZ1)
  db1 = torch.sum(dZ2,0,True)
  dW2 = torch.mm(torch.transpose(Z3,0,1),dZ4)
  db2 = torch.sum(dZ5)
  
  # adjust parameters by gradient descent
  W1 = W1 - learning_rate*dW1
  b1 = b1 - learning_rate*db1
  W2 = W2 - learning_rate*dW2
  b2 = b2 - learning_rate*db2
  
  if step%1000 == 0:
    loss = torch.sum((Yp-Y)**2)
    print("loss:",loss.item())

print(Yp)
print(Y)




loss: 1.0884015560150146
loss: 0.4088684320449829
loss: 0.16048265993595123
loss: 0.0790882408618927
loss: 0.047324713319540024
loss: 0.03204113617539406
loss: 0.02351776324212551
loss: 0.018234873190522194
loss: 0.014707228168845177
loss: 0.012215147726237774
loss: 0.010378334671258926
loss: 0.008977625519037247
loss: 0.00788001250475645
loss: 0.00700012082234025
loss: 0.006281333044171333
loss: 0.005684740375727415
loss: 0.005182324908673763
loss: 0.004754583816975355
loss: 0.004386661108583212
loss: 0.0040670838207006454
loss: 0.0037872791290283203
loss: 0.003540691453963518
loss: 0.0033215077128261328
loss: 0.0031258277595043182
loss: 0.0029502147808670998
loss: 0.002791829640045762
loss: 0.002648192923516035
loss: 0.002517632907256484
loss: 0.0023983188439160585
loss: 0.002289105672389269
loss: 0.0021883829031139612
loss: 0.002095692791044712
loss: 0.002009882591664791
loss: 0.0019303483422845602
loss: 0.0018563944613561034
loss: 0.001787447021342814
loss: 0.0017231636447831988
lo