In [0]:
# Change directory to VSCode workspace root so that relative path loads work correctly. Turn this addition off with the DataScience.changeDirOnImportExport setting
import os
try:
	os.chdir(os.path.join(os.getcwd(), '..'))
	print(os.getcwd())
except:
	pass


In [26]:
import numpy as np
import torch
import torch.nn as nn


In [27]:
"""
requires_grad=Falseだと微分の対象にならず勾配はNoneが返る
Fie-tuningで層のパラメータを固定したいときに便利
計算グラフを構築してbackward()を実行するとグラフを構築する各変数のgradに勾配が入る
"""
x = torch.tensor(1.0, requires_grad=True)
w = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(3.0, requires_grad=True)

# 計算グラフを構築
y = w * x + b
# 勾配を計算
y.backward()

# 勾配の表示
print(x.grad)
print(w.grad)
print(b.grad)


tensor(2.)
tensor(1.)
tensor(1.)


In [28]:
x = torch.tensor(2.0, requires_grad=True)
y = x ** 2
y.backward()
print(x.grad)


tensor(4.)


In [29]:
x = torch.tensor(2.0, requires_grad=True)
y = torch.exp(x)
y.backward()
print(x.grad)


tensor(7.3891)


In [30]:
x = torch.tensor(np.pi, requires_grad=True)
y = torch.sin(x)
y.backward()
print(x.grad)


tensor(-1.)


In [31]:
x = torch.tensor(0.0, requires_grad=True)
y = (x - 4) * ( x ** 2 + 6)
y.backward()
print(x.grad)


tensor(6.)


In [32]:
x = torch.tensor(2.0, requires_grad=True)
y = (torch.sqrt(x) + 1) ** 3
y.backward()
print(x.grad)


tensor(6.1820)


In [33]:
x = torch.tensor(1.0, requires_grad=True)
y = torch.tensor(2.0, requires_grad=True)
z = (x + 2*y)**2
z.backward()
print(x.grad) # dz/dx
print(y.grad) # dz/dy

# lossを微分する
# バッチサンプル数=5, 入力の特徴量の次元数=3
x = torch.randn(5, 3)
# バッチサンプル数=5, 出力の特徴量の次元数=2
y = torch.randn(5, 2)

# Liner層を作成
linear = nn.Linear(3, 2)
# Linear層のパラメータ
print('w:', linear.weight)
print('b:', linear.bias)

# lossとoptimier
criterion = nn.MSELoss()
optimzier = torch.optim.SGD(linear.parameters(), lr=0.01)

# forward
pred = linear(x)

# loss = L
loss = criterion(pred, y)
print('loss:', loss)

# backpropagation
loss.backward()

# 勾配を表示
print('dL/dw:', linear.weight.grad)
print('dL/db:', linear.bias.grad)

# 勾配を用いてパラメータを更新
print('*** by hand')
print(linear.weight.sub(0.01 * linear.weight.grad))
print(linear.bias.sub(0.01 * linear.bias.grad))

# 勾配降下法
optimzier.step()

# 1ステップ更新後のパラメータを表示
# 上の式と結果が一致することがわかる
print('*** by optimizer.step(')
print(linear.weight)
print(linear.bias)

tensor(10.)
tensor(20.)
w: Parameter containing:
tensor([[-0.2650, -0.4728,  0.4202],
        [ 0.2455, -0.2916,  0.5430]], requires_grad=True)
b: Parameter containing:
tensor([0.3882, 0.2735], requires_grad=True)
loss: tensor(1.2829, grad_fn=<MseLossBackward>)
dL/dw: tensor([[-0.0124,  0.2090,  0.1433],
        [ 0.3311,  0.3485,  0.4343]])
dL/db: tensor([0.4206, 0.1741])
*** by hand
tensor([[-0.2649, -0.4749,  0.4188],
        [ 0.2422, -0.2951,  0.5387]], grad_fn=<SubBackward0>)
tensor([0.3840, 0.2717], grad_fn=<SubBackward0>)
*** by optimizer.step(
Parameter containing:
tensor([[-0.2649, -0.4749,  0.4188],
        [ 0.2422, -0.2951,  0.5387]], requires_grad=True)
Parameter containing:
tensor([0.3840, 0.2717], requires_grad=True)
