In [1]:
import torch

## TorchのTensorになれる
https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py
を見ても問題ないです。

In [2]:
x = torch.empty(5, 3)
print(x)

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [2.8026e-45, 0.0000e+00, 1.1210e-44],
        [0.0000e+00, 1.4013e-45, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 4.7924e-43]])


In [3]:
x = torch.rand(5, 3)
print(x)

tensor([[0.0654, 0.9549, 0.6570],
        [0.8909, 0.2726, 0.5984],
        [0.7001, 0.5908, 0.8317],
        [0.5684, 0.3025, 0.2815],
        [0.4277, 0.0446, 0.9130]])


In [4]:
y = torch.zeros(5, 3, dtype=torch.double)
print(y)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float64)


In [5]:
y + x

tensor([[0.0654, 0.9549, 0.6570],
        [0.8909, 0.2726, 0.5984],
        [0.7001, 0.5908, 0.8317],
        [0.5684, 0.3025, 0.2815],
        [0.4277, 0.0446, 0.9130]], dtype=torch.float64)

In [6]:
x = torch.tensor([5.5, 3])
print(x)
x = x.new_ones(5, 3, dtype=torch.double)      # 5×3の1を作成,引数の指定がない場合,dtype,deviceを引き継ぐ
print(x)
x = torch.randn_like(x, dtype=torch.float)    # override dtype!
print(x)                        

tensor([5.5000, 3.0000])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[ 0.6208,  2.3227,  0.1859],
        [ 1.5298,  0.2337,  1.1288],
        [-1.3912, -1.3513, -0.5153],
        [-1.7013,  0.8664,  0.7836],
        [-0.1695, -0.6868,  0.3619]])


###  足し算/掛け算

In [31]:
y = torch.rand(5, 3)
print(x + y)

result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)
x * 7

tensor([[1.8528, 2.1026, 3.4816],
        [1.6767, 2.8259, 3.3977],
        [1.6185, 2.0488, 3.4558],
        [1.1889, 2.1277, 3.0367],
        [1.0141, 2.3540, 3.6196]])
tensor([[1.8528, 2.1026, 3.4816],
        [1.6767, 2.8259, 3.3977],
        [1.6185, 2.0488, 3.4558],
        [1.1889, 2.1277, 3.0367],
        [1.0141, 2.3540, 3.6196]])


tensor([ 7., 14., 21.])

### 足し算(inplace)

In [51]:
y.add_(x)
print(y)

tensor([[ 0.5759,  1.5743, -0.2412],
        [ 0.0179, -0.5719,  1.1309],
        [ 0.2939,  1.7642,  0.8788],
        [-1.3670,  1.2322, -0.5794],
        [ 0.1912, -0.2151, -0.5160]])


In [53]:
# 型の変更
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [54]:
# Pytorch TensorからPythonの数値型の要素を取得
x = torch.randn(1)
print(x)
print(x.item())

tensor([0.5627])
0.5626869201660156


In [12]:

a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


## 微分の計算

In [59]:
X1 = torch.tensor(3., requires_grad=True)
X2 = torch.tensor(2., requires_grad=True)

Y1 = 3*X1 + 2*X2
Y2 = 4*X1 + X2

In [60]:
print(torch.autograd.grad(Y1, [X1, X2], retain_graph=True))
print(torch.autograd.grad(Y2, [X1, X2], retain_graph=True))

(tensor(3.), tensor(2.))
(tensor(4.), tensor(1.))


In [61]:
X1 = torch.tensor([3., 2.] , requires_grad=True)
X2 = torch.tensor(2., requires_grad=True)

Y1 = 3*X1 + 2*X2
Z = Y1.sum()

In [62]:
Z.grad_fn

<SumBackward0 at 0x13074b208>

In [64]:
Z.grad_fn.next_functions[0]

(<AddBackward0 at 0x13074b5f8>, 0)

In [37]:
print(torch.autograd.grad(Z, X1, retain_graph=True, allow_unused=True))

(tensor([3., 3.]),)


In [38]:
print(torch.autograd.grad(Z, [X1, X2], retain_graph=True, allow_unused=True))

(tensor([3., 3.]), tensor(4.))


In [39]:
print(torch.autograd.grad(Y1, [X1, X2], retain_graph=True))
print(torch.autograd.grad(Y2, [X1, X2], retain_graph=True))

RuntimeError: grad can be implicitly created only for scalar outputs

## backwardする方法も

In [57]:
Y1.backward()

In [59]:
X1.grad

tensor(3.)

## ざっくりした機械学習の動作

In [60]:
# 定数の準備
dtype = torch.float
device = torch.device("cpu")
N, D_in, H, D_out = 64, 1000, 100, 10

# データの作成
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# パラメータの初期化
# requires_grad = Trueで勾配を覚える設定を追加
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

In [16]:
# 損失を最小になるように変更する

In [61]:
learning_rate = 1e-6
for t in range(500):
    # 1. 行列w1ををかける
    # 2. 活性化関数ReLUを合成
    # 3. もう一度行列w2をかける
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # 二乗の誤差を評価
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        # 進捗を表示
        print(t, loss.item())

    # lossに対する偏微分を実行
    # requires_grad = Trueにしたものに偏微分が設定される
    loss.backward()

    # パラメータの更新
    # no_gradにするのはこの操作に対する勾配が必要ないので
    # 実際に実装する時はSGDで行う事が多い
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()

99 360.54083251953125
199 0.96071457862854
299 0.003686616662889719
399 0.00010556292545516044
499 2.3295864593819715e-05


In [18]:
x.grad

# NNのテンプレ(一旦こんな感じというのさえわかっていただければ良いです)

## NNの作成
- Dataset/DataLoaderでデータを作成
- nn.Moduleでネットワーク作成
- 最適化の設定
- 学習
- 評価


In [1]:
# パッケージimport
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## データ読み込み

In [20]:
batch_size = 32
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)

## Networkの作成

In [21]:
class Net(nn.Module):
    def __init__(self, input_size, output_size):
        super(Net, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.linear = nn.Linear(input_size, output_size)
    
    def forward(self, x):
        x = x.view(-1, self.input_size) # reshape
        return self.linear(x)

## 最適化の設定

In [22]:
input_size = 28 * 28
output_size = 10
model = Net(input_size, output_size)
criterion = nn.CrossEntropyLoss() # 損失の定義
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # (確率的)勾配降下法

## 学習

In [23]:
model.train() # 学習用のモード
for epoch, (data, target) in enumerate(train_loader): # 入力と正解
     optimizer.zero_grad() #Weightの初期化
     output = model(data) # 仮説で値代入
     loss = criterion(output, target) # 損失
     loss.backward() # 微分の計算
     optimizer.step() # パラメータの更新
     if epoch % 600 == 0:
         print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
             epoch, epoch * len(data), len(train_loader.dataset),
             100. * epoch / len(train_loader), loss.item()))



## 評価

In [24]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Test set: Average loss: -0.1025, Accuracy: 1068/10000 (11%)



今回はデータをすべて一度しか使っておらず、学習は収束していない。 そのため、何回か学習を繰り返すことで評価データの精度も向上する。

## 演習1

In [25]:
print(torch.cuda.is_available())

False


## 演習2

In [26]:
print(torch.autograd.grad(torch.exp(X1), X1))
print(torch.autograd.grad(torch.cos(X1), X1))
print(torch.autograd.grad(torch.sin(X1), X1))
print(torch.autograd.grad(X1 ** 2, X1))
print(torch.autograd.grad(torch.exp(3 *X1)/(torch.exp(3*X1) + torch.exp(2*X1)), X1))

(tensor(20.0855),)
(tensor(-0.1411),)
(tensor(-0.9900),)
(tensor(6.),)
(tensor(0.0452),)


## 演習3

In [27]:
y = torch.exp(3 *X1)/(torch.exp(3*X1) + torch.exp(2*X1))
z1 = torch.exp(y)
z2 = torch.cos(y)
z3 = torch.sin(y)
z4 = y * y
z5 = torch.exp(3 *y)/(torch.exp(3*y) + torch.exp(2*y))

In [28]:
print(torch.autograd.grad(z1, X1,retain_graph=True))
print(torch.autograd.grad(z2, X1,retain_graph=True))
print(torch.autograd.grad(z3, X1,retain_graph=True))
print(torch.autograd.grad(z4, X1,retain_graph=True))
print(torch.autograd.grad(z5, X1,retain_graph=True))

(tensor(0.1171),)
(tensor(-0.0368),)
(tensor(0.0262),)
(tensor(0.0861),)
(tensor(0.0091),)


## 調査

In [26]:
class Net(nn.Module):
    def __init__(self, input_size, output_size):
        # type: Net, object selfを明記した形
        super(Net, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.linear = nn.Linear(input_size, output_size)
    
    def forward(self, x):
        x = x.view(-1, self.input_size) # reshape
        return self.linear(x)

In [27]:
net = Net(3, 5)

In [30]:
type("s")

str

In [29]:
type(net)

__main__.Net

In [5]:
import torch

In [21]:
x = torch.tensor([1, 2,3]).float()

In [22]:
y = net(x)

In [11]:
criterion = nn.CrossEntropyLoss()

In [14]:
criterion(y, torch.tensor([1]))

tensor(1.8976, grad_fn=<NllLossBackward>)

## Tensorについてさらに詳しく

In [78]:
x = torch.tensor([1.,2.,3.], requires_grad=True, device="cpu")
print(x)

tensor([1., 2., 3.], requires_grad=True)


In [80]:
print(x.data)
print(x.dtype)
print(x.device)
print(x.grad)
print(x.grad_fn)

tensor([1., 2., 3.])
torch.float32
cpu
None
None


## Autogradの計算

In [65]:
# w=1.0, b=0.0で初期化
param_w = torch.tensor([1.0], requires_grad=True)
param_b = torch.tensor([0.0], requires_grad=True)
print(param_w.is_leaf, param_b.is_leaf)  # True True

# 1次関数を仮定して計算 (Step 1)
y_p = param_w * x + param_b
print(y_p, y_p.is_leaf)
# tensor([-1.5000, -1.0000, -0.1000,  0.9000,  1.8000,  2.2000,  3.1000],
#        grad_fn=<AddBackward0>) False

# 平均2乗誤差を計算 (Step 2)
loss = torch.mean((y_p - y)**2)
print(loss)
# tensor(137.6195, grad_fn=<MeanBackward0>)

print(y_p.grad_fn)
# <AddBackward0 object at 0x7f33db45f588>
print(loss.grad_fn.next_functions[0][0].next_functions[0][0].next_functions[0][0])
# <AddBackward0 object at 0x7f33db45fac8>

print("before:", param_w.grad, param_b.grad)
# before: None None

# 誤差を伝播 (Step 3)
loss.backward()

print("after:", param_w.grad, param_b.grad)
# after: tensor([-33.8909]) tensor([-20.4400])

True True
tensor([1., 2., 3.], grad_fn=<AddBackward0>) False
tensor(3.3958, grad_fn=<MeanBackward0>)
<AddBackward0 object at 0x13074b780>
<AddBackward0 object at 0x1315601d0>
before: None None
after: tensor([7.8343]) tensor([3.2265])


In [66]:
loss.grad_fn.next_functions[0][0]

<PowBackward0 at 0x13074be10>

In [67]:
loss.grad_fn.next_functions[0][0].next_functions[0][0]

<SubBackward0 at 0x13074bcf8>

In [73]:
loss.grad_fn.next_functions[0][0].next_functions[0][0].next_functions

((<AddBackward0 at 0x13074b9b0>, 0), (None, 0))