In [37]:

import numpy as np
import torch

In [2]:
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')


In [3]:
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [5]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 0.7944,  0.2623,  0.1017],
        [ 0.6978,  0.4452, -0.1775]], requires_grad=True)
tensor([0.2089, 1.1996], requires_grad=True)


In [6]:
def model(x):
    return x @ w.t() + b

In [7]:
preds = model(inputs)
print(preds)

tensor([[ 80.1444,  74.3349],
        [102.0869,  92.5170],
        [110.3633, 111.2709],
        [ 96.2769,  84.9503],
        [ 87.3189,  79.6626]], grad_fn=<AddBackward0>)


In [8]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [9]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [10]:
loss = mse(preds, targets)
print(loss)

tensor(1127.4738, grad_fn=<DivBackward0>)


In [11]:
loss.backward()

  Variable._execution_engine.run_backward(


In [12]:
print(w)
print(w.grad)

tensor([[ 0.7944,  0.2623,  0.1017],
        [ 0.6978,  0.4452, -0.1775]], requires_grad=True)
tensor([[ 1884.8619,   800.9053,   707.4827],
        [  -33.8562, -1016.4574,  -519.2507]])


In [13]:
w
w.grad

tensor([[ 1884.8619,   800.9053,   707.4827],
        [  -33.8562, -1016.4574,  -519.2507]])

In [14]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5

In [15]:
loss = mse(preds, targets)
print(loss)

tensor(1127.4738, grad_fn=<DivBackward0>)


In [16]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [17]:
preds = model(inputs)
print(preds)

tensor([[ 77.9275,  75.2639],
        [ 99.2139,  93.7747],
        [107.2397, 112.9637],
        [ 93.7480,  85.6141],
        [ 84.7540,  81.0252]], grad_fn=<AddBackward0>)


In [18]:
loss = mse(preds, targets)
print(loss)

tensor(1071.8344, grad_fn=<DivBackward0>)


In [19]:
print(w)

tensor([[ 0.7755,  0.2543,  0.0946],
        [ 0.6981,  0.4554, -0.1723]], requires_grad=True)


In [20]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[1658.8710,  565.9277,  560.7870],
        [  64.3959, -904.6373, -451.5382]])
tensor([16.3766, -2.2717])


In [21]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [22]:
print(w)
print(b)

tensor([[ 0.7589,  0.2486,  0.0890],
        [ 0.6975,  0.4644, -0.1678]], requires_grad=True)
tensor([0.2085, 1.1997], requires_grad=True)


In [23]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(1030.6450, grad_fn=<DivBackward0>)


In [32]:
for i in range(20):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [33]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(119.2059, grad_fn=<DivBackward0>)


In [34]:
preds

tensor([[ 60.4901,  73.5721],
        [ 81.1427,  96.2753],
        [115.7687, 137.7118],
        [ 39.8759,  54.1523],
        [ 89.1413, 101.7276]], grad_fn=<AddBackward0>)

In [35]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])