In [1]:
# Ref https://cs230-stanford.github.io/pytorch-getting-started.html

In [2]:
# Tensor and Variables

In [3]:
import torch

In [4]:
a = torch.Tensor([[1,2],[3,4]])

In [5]:
a

tensor([[ 1.,  2.],
        [ 3.,  4.]])

In [6]:
a ** 2

tensor([[  1.,   4.],
        [  9.,  16.]])

In [7]:
torch.sum(a ** 3)

tensor(100.)

In [8]:
a ** 3

tensor([[  1.,   8.],
        [ 27.,  64.]])

In [9]:
# pytorch variables allows you to wrap a tensor so you cand record operations perfomed on it
# i.e automatic differentiation

In [10]:
from torch.autograd import Variable

In [11]:
a = Variable(torch.Tensor([[1,2],[3,4]]))

In [12]:
a

tensor([[ 1.,  2.],
        [ 3.,  4.]])

In [13]:
a ** 2

tensor([[  1.,   4.],
        [  9.,  16.]])

In [14]:
torch.sum(a ** 2)

tensor(30.)

In [15]:
y = torch.sum( a ** 2)

In [16]:
y

tensor(30.)

In [17]:
# y.backward()  we will try to compute gradients , but we can not

In [18]:
a = Variable(torch.Tensor([[1,2],[3,4]]), requires_grad=True) # we can do gradient computation

In [19]:
a

tensor([[ 1.,  2.],
        [ 3.,  4.]])

In [20]:
z = torch.sum(a ** 2)

In [21]:
z

tensor(30.)

In [22]:
z.backward() # now, we can do it

In [23]:
z

tensor(30.)

In [24]:
a.grad

tensor([[ 2.,  4.],
        [ 6.,  8.]])

In [25]:
print(a.grad)

tensor([[ 2.,  4.],
        [ 6.,  8.]])


In [26]:
# Ref https://pytorch.org/tutorials/beginner/pytorch_with_examples.html

In [27]:
import torch

In [28]:
# defining some values

In [29]:
dtype = torch.float
device = torch.device("cpu")

In [30]:
N, D_in, H, D_out = 64, 1000, 100, 10

In [32]:
# create random input and output data
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

In [33]:
# randomly initialize weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)

In [34]:
learning_rate = 1e-6

In [36]:
for t in range(10): # we only want to see so we will use 10 iterations
    # forward pass, computed the predicted y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)
    
    # compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    print(t, loss)
    
    # backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)
    
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 3.1470695830648765e-05
1 3.1092011340660974e-05
2 3.072843173868023e-05
3 3.0280922146630473e-05
4 2.9804657970089465e-05
5 2.9504826670745388e-05
6 2.9228523999336176e-05
7 2.8895696232211776e-05
8 2.8522914362838492e-05
9 2.8303453291300684e-05


In [37]:
# now, we will use automatic differentiation in neural networks

In [38]:
# the autograd package provides this functionatily

In [39]:
# remember nodes in the graph are tensor and edges are functions that produces tensors

In [40]:
# backpropagation through this graph - through edges - allows easily compute gradients

In [41]:
import torch

dtype = torch.float
device = torch.device('cpu')

In [42]:
N, D_in, H, D_out = 64, 1000, 100, 10 # batch size, input dim, hidden dim, output dim

In [43]:
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

In [44]:
# setting for weights need requires_grad=True, to compute gradients
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

In [45]:
learning_rate = 1e-6

In [47]:
for t in range(10):
    
    # forward pass
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())
    
    # backward pass with autograd
    loss.backward()
    
    # updating the weights
    # excluding for autograd for this action
    with torch.no_grad():
        
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        
        w1.grad.zero_()
        w2.grad.zero_()   

0 1695516.875
1 1165139.125
2 870647.125
3 691773.0625
4 572514.6875
5 486085.0
6 419479.5625
7 365898.96875
8 321554.40625
9 284237.03125


In [48]:
torch.__version__

'0.4.0'