In [1]:
import numpy as np
import torch

In [2]:
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [3]:
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [5]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 0.2295, -0.7795, -0.3196],
        [-1.9622, -0.4984, -1.5213]], requires_grad=True)
tensor([ 1.0759, -0.8477], requires_grad=True)


In [6]:
def model(x):
    return x @ w.t() + b

In [7]:
preds = model(inputs)
print(preds)

tensor([[ -48.1416, -242.8923],
        [ -67.0920, -320.6242],
        [-101.9499, -326.5725],
        [ -20.8610, -278.7069],
        [ -80.2941, -290.5710]], grad_fn=<AddBackward0>)


In [8]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [9]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [11]:
loss = mse(preds, targets)
print(loss)

tensor(87132.4141, grad_fn=<DivBackward0>)


In [12]:
loss.backward()

  Variable._execution_engine.run_backward(


In [13]:
print(w)
print(w.grad)

tensor([[ 0.2295, -0.7795, -0.3196],
        [-1.9622, -0.4984, -1.5213]], requires_grad=True)
tensor([[-11464.0947, -13811.2256,  -8237.5039],
        [-32330.8535, -34508.7305, -21488.9316]])


In [14]:
w
w.grad

tensor([[-11464.0947, -13811.2256,  -8237.5039],
        [-32330.8535, -34508.7305, -21488.9316]])

In [15]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5

In [16]:
loss = mse(preds, targets)
print(loss)

tensor(87132.4141, grad_fn=<DivBackward0>)


In [17]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [18]:
preds = model(inputs)
print(preds)

tensor([[-2.6976e+01, -1.8693e+02],
        [-3.9232e+01, -2.4708e+02],
        [-6.8690e+01, -2.3974e+02],
        [-1.7955e-01, -2.2294e+02],
        [-5.3357e+01, -2.2009e+02]], grad_fn=<AddBackward0>)


In [19]:
loss = mse(preds, targets)
print(loss)

tensor(59015.6953, grad_fn=<DivBackward0>)


In [20]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[ -9275.6768, -11450.8633,  -6782.9023],
        [-26553.8613, -28304.2500, -17659.4648]])
tensor([-113.8870, -315.3529])


In [21]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [22]:
print(w)
print(b)

tensor([[ 0.4369, -0.5269, -0.1694],
        [-1.3733,  0.1298, -1.1298]], requires_grad=True)
tensor([ 1.0784, -0.8407], requires_grad=True)


In [23]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(40064.8281, grad_fn=<DivBackward0>)
