### Pytorch教程：Linear Regression的Autograd实现

In [3]:
import torch

X = torch.tensor([[1,1], [2,2], [3,3], [4,4], [5,1], [4,3]], dtype=torch.float32)
y = torch.tensor([[10], [15], [20], [25], [18], [22]], dtype=torch.float32)
# y = 2 * x1 + 3 * x2 + 5

X.shape

torch.Size([6, 2])

In [4]:
w = torch.randn(2, 1, requires_grad=True)
w
w.shape

torch.Size([2, 1])

In [5]:
b = torch.randn(1, 1, requires_grad=True)
b

tensor([[0.5717]], requires_grad=True)

In [6]:
loss = torch.nn.MSELoss()
loss

MSELoss()

In [7]:
def forword(x):
    return torch.matmul(x, w) + b

In [8]:
iter_count = 10000
lr = 0.002

In [9]:
for iter in range(iter_count):
    y_pred = forword(X)
    l = loss(y_pred, y)
    print(f'iter: {iter} loss: {l}')
    l.backward()
    with torch.no_grad():
        w -= lr * w.grad
        b -= lr * b.grad
        w.grad.zero_()
        b.grad.zero_()

iter: 0 loss: 368.9737854003906
iter: 1 loss: 317.5539245605469
iter: 2 loss: 273.4143371582031
iter: 3 loss: 235.52357482910156
iter: 4 loss: 202.9962158203125
iter: 5 loss: 175.0723419189453
iter: 6 loss: 151.09970092773438
iter: 7 loss: 130.5184783935547
iter: 8 loss: 112.84817504882812
iter: 9 loss: 97.67635345458984
iter: 10 loss: 84.64901733398438
iter: 11 loss: 73.46239471435547
iter: 12 loss: 63.85567092895508
iter: 13 loss: 55.60504150390625
iter: 14 loss: 48.51839065551758
iter: 15 loss: 42.430816650390625
iter: 16 loss: 37.200809478759766
iter: 17 loss: 32.70688247680664
iter: 18 loss: 28.844789505004883
iter: 19 loss: 25.52504539489746
iter: 20 loss: 22.67081642150879
iter: 21 loss: 20.216203689575195
iter: 22 loss: 18.104612350463867
iter: 23 loss: 16.287464141845703
iter: 24 loss: 14.723095893859863
iter: 25 loss: 13.375717163085938
iter: 26 loss: 12.214608192443848
iter: 27 loss: 11.213419914245605
iter: 28 loss: 10.349529266357422
iter: 29 loss: 9.603522300720215
iter: 

In [10]:
w

tensor([[2.0069],
        [3.0071]], requires_grad=True)

In [11]:
b

tensor([[4.9569]], requires_grad=True)

### 使用Optimizer

In [12]:
w = torch.randn(2, 1, requires_grad=True)
b = torch.randn(1, 1, requires_grad=True)

In [13]:
iter_count = 10000
lr = 0.001

In [14]:
loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD([w, b], lr=lr)

In [15]:
for iter in range(iter_count):
    y_pred = forword(X)
    l = loss(y_pred, y)
    print(f'iter: {iter} loss: {l}')
    l.backward()
    optimizer.step()
    optimizer.zero_grad()
    # with torch.no_grad():
    #     w -= lr * w.grad
    #     b -= lr * b.grad
    #     w.grad.zero_()
    #     b.grad.zero_()

iter: 0 loss: 308.9229431152344
iter: 1 loss: 286.7101135253906
iter: 2 loss: 266.1009826660156
iter: 3 loss: 246.9797821044922
iter: 4 loss: 229.2390899658203
iter: 5 loss: 212.77919006347656
iter: 6 loss: 197.5076446533203
iter: 7 loss: 183.33863830566406
iter: 8 loss: 170.1925811767578
iter: 9 loss: 157.99562072753906
iter: 10 loss: 146.67919921875
iter: 11 loss: 136.17977905273438
iter: 12 loss: 126.4383544921875
iter: 13 loss: 117.40019989013672
iter: 14 loss: 109.0145492553711
iter: 15 loss: 101.2342758178711
iter: 16 loss: 94.01567840576172
iter: 17 loss: 87.31822967529297
iter: 18 loss: 81.10425567626953
iter: 19 loss: 75.33887481689453
iter: 20 loss: 69.98970031738281
iter: 21 loss: 65.02667999267578
iter: 22 loss: 60.42195129394531
iter: 23 loss: 56.14959716796875
iter: 24 loss: 52.1856689453125
iter: 25 loss: 48.50788497924805
iter: 26 loss: 45.09556198120117
iter: 27 loss: 41.929569244384766
iter: 28 loss: 38.99209213256836
iter: 29 loss: 36.26665496826172
iter: 30 loss: 33

In [16]:
w,b

(tensor([[2.0588],
         [3.0602]], requires_grad=True),
 tensor([[4.6334]], requires_grad=True))

### Module

In [17]:
class MyModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.w = torch.nn.Parameter(torch.randn(2, 1, dtype=torch.float32))
        self.b = torch.nn.Parameter(torch.randn(1, 1, dtype=torch.float32))

    def forward(self, x):
        return torch.matmul(x, self.w) + self.b
        


In [18]:
model = MyModel()
loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [19]:
for iter in range(iter_count):
    y_pred = model(X)
    l = loss(y_pred, y)
    print(f'iter: {iter} loss: {l}')
    l.backward()
    optimizer.step()
    optimizer.zero_grad()
    # with torch.no_grad():
    #     w -= lr * w.grad
    #     b -= lr * b.grad
    #     w.grad.zero_()
    #     b.grad.zero_()

iter: 0 loss: 523.2731323242188
iter: 1 loss: 485.6503601074219
iter: 2 loss: 450.7437438964844
iter: 3 loss: 418.3572692871094
iter: 4 loss: 388.3090515136719
iter: 5 loss: 360.4302673339844
iter: 6 loss: 334.564208984375
iter: 7 loss: 310.5655822753906
iter: 8 loss: 288.299560546875
iter: 9 loss: 267.6410827636719
iter: 10 loss: 248.47406005859375
iter: 11 loss: 230.6907501220703
iter: 12 loss: 214.19139099121094
iter: 13 loss: 198.88316345214844
iter: 14 loss: 184.68006896972656
iter: 15 loss: 171.5023956298828
iter: 16 loss: 159.2760467529297
iter: 17 loss: 147.932373046875
iter: 18 loss: 137.40760803222656
iter: 19 loss: 127.64263916015625
iter: 20 loss: 118.5826187133789
iter: 21 loss: 110.1766357421875
iter: 22 loss: 102.37751007080078
iter: 23 loss: 95.14139556884766
iter: 24 loss: 88.42764282226562
iter: 25 loss: 82.19853210449219
iter: 26 loss: 76.41907501220703
iter: 27 loss: 71.05680084228516
iter: 28 loss: 66.08162689208984
iter: 29 loss: 61.4655647277832
iter: 30 loss: 57

In [20]:
model.w

Parameter containing:
tensor([[2.0782],
        [3.0801]], requires_grad=True)

In [21]:
model.b

Parameter containing:
tensor([[4.5124]], requires_grad=True)

### 使用linear来

In [22]:
class MyModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # self.w = torch.nn.Parameter(torch.randn(2, 1, dtype=torch.float32))
        # self.b = torch.nn.Parameter(torch.randn(1, 1, dtype=torch.float32))
        self.linear = torch.nn.Linear(2, 1, bias=True)

    def forward(self, x):
        return self.linear(x)

In [23]:
model = MyModel()
loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [24]:
for iter in range(iter_count):
    y_pred = model(X)
    l = loss(y_pred, y)
    print(f'iter: {iter} loss: {l}')
    l.backward()
    optimizer.step()
    optimizer.zero_grad()
    # with torch.no_grad():
    #     w -= lr * w.grad
    #     b -= lr * b.grad
    #     w.grad.zero_()
    #     b.grad.zero_()

iter: 0 loss: 498.3236389160156
iter: 1 loss: 462.682861328125
iter: 2 loss: 429.614501953125
iter: 3 loss: 398.9327392578125
iter: 4 loss: 370.46533203125
iter: 5 loss: 344.0523376464844
iter: 6 loss: 319.5455322265625
iter: 7 loss: 296.8072509765625
iter: 8 loss: 275.70977783203125
iter: 9 loss: 256.1346740722656
iter: 10 loss: 237.97203063964844
iter: 11 loss: 221.11988830566406
iter: 12 loss: 205.4835968017578
iter: 13 loss: 190.9753875732422
iter: 14 loss: 177.5138397216797
iter: 15 loss: 165.0233612060547
iter: 16 loss: 153.43385314941406
iter: 17 loss: 142.6802520751953
iter: 18 loss: 132.7022247314453
iter: 19 loss: 123.44379425048828
iter: 20 loss: 114.85298919677734
iter: 21 loss: 106.881591796875
iter: 22 loss: 99.48492431640625
iter: 23 loss: 92.6214828491211
iter: 24 loss: 86.25273895263672
iter: 25 loss: 80.34300994873047
iter: 26 loss: 74.85916137695312
iter: 27 loss: 69.77044677734375
iter: 28 loss: 65.0483169555664
iter: 29 loss: 60.66633224487305
iter: 30 loss: 56.599

In [26]:
model.linear.weight, model.linear.bias

(Parameter containing:
 tensor([[2.0724, 3.0742]], requires_grad=True), Parameter containing:
 tensor([4.5485], requires_grad=True))