In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()

In [2]:
import torch
from torch import nn
import torch.utils.data

In [3]:
import numpy as np

## 実装
- クロスエントロピー
- 仮説

In [4]:
criterion = nn.CrossEntropyLoss() # 損失の定義

In [5]:
preds = torch.tensor([[0.2, 0.8]]) # 二値分類
labels = torch.tensor([1]) # 正解ラベル

loss = criterion(preds, labels)
print(loss)

tensor(0.4375)


## 演習
- 3×3のモデル
- クロスエントロピー

In [6]:
X = torch.tensor(np.arange(12).reshape(4, 3)).float()
Y = torch.tensor([1, 2, 0, 1])
model = nn.Linear(3, 3)

In [7]:
criterion(model(X), Y)

tensor(5.0542, grad_fn=<NllLossBackward>)

## Pytorchで
- 勾配降下
- ニュートン法

で ルート2を求める

- 勾配降下の場合
  - $f(x) = x^3 - 6x$で計算する

In [8]:
def F(x):
    return x ** 3 -  6 * x

In [9]:
x = torch.tensor([0.01], requires_grad=True)
y = F(x)

In [10]:
learning_rate = 0.1

In [11]:
for i in range(10):
    y = F(x)
    z = torch.autograd.grad(y, x)
    x = x - learning_rate * z[0]
    print(i, "loop", x)

0 loop tensor([0.6100], grad_fn=<SubBackward0>)
1 loop tensor([1.0984], grad_fn=<SubBackward0>)
2 loop tensor([1.3364], grad_fn=<SubBackward0>)
3 loop tensor([1.4006], grad_fn=<SubBackward0>)
4 loop tensor([1.4121], grad_fn=<SubBackward0>)
5 loop tensor([1.4139], grad_fn=<SubBackward0>)
6 loop tensor([1.4142], grad_fn=<SubBackward0>)
7 loop tensor([1.4142], grad_fn=<SubBackward0>)
8 loop tensor([1.4142], grad_fn=<SubBackward0>)
9 loop tensor([1.4142], grad_fn=<SubBackward0>)


- ニュートン法の場合
  - $f(x) = x^2 - 2$で計算する

In [13]:
def G(x):
    return x ** 2 - 2

In [25]:
x = torch.tensor([0.1], requires_grad=True)
y = G(x)

In [26]:
y

tensor([-1.9900], grad_fn=<SubBackward0>)

In [27]:
for i in range(10):
    y = G(x)
    y.backward()
    # backwardで値を入れる時はdataに代入
    # xに代入すると,backwardの対象でなくなる
    x.data = x.data - y/ x.grad
    print(i, "lootp", x)
    x.grad.zero_()

0 lootp tensor([10.0500], requires_grad=True)
1 lootp tensor([5.1245], requires_grad=True)
2 lootp tensor([2.7574], requires_grad=True)
3 lootp tensor([1.7414], requires_grad=True)
4 lootp tensor([1.4449], requires_grad=True)
5 lootp tensor([1.4145], requires_grad=True)
6 lootp tensor([1.4142], requires_grad=True)
7 lootp tensor([1.4142], requires_grad=True)
8 lootp tensor([1.4142], requires_grad=True)
9 lootp tensor([1.4142], requires_grad=True)


## Pytorchで実装するべきこと
0. データ準備
1. モデル設計
    - パラメータ/仮説
    - 損失関数
2. 学習
    - 勾配を計算
    - 勾配降下法によってパラメータ更新
  

## データの準備

In [60]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [65]:
print(iris["feature_names"])

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


## Datasetとは
- データの集まり
- サイズ
- 特定のインデクスでデータを取れる
- 前処理を上から加えられる

## Datasetの抽象クラス
- 実際こういう定義のよう
  - `__len__` (すらない

In [64]:
class Dataset(object):
    def __getitem__(self, index):
        raise NotImplementedError

    def __add__(self, other):
        # ConcatDatasetの定義が必要だが省略
        return ConcatDataset([self, other])
    # No `def __len__(self)` default?
    # See NOTE [ Lack of Default `__len__` in Python Abstract Base Classes ]
    # in pytorch/torch/utils/data/sampler.py
    # https://github.com/pytorch/pytorch/blob/master/torch/utils/data/sampler.py#L24
    # lenを決めるデフォルトのルールが決めづらい(元のデータのlenではだめな場合に何で決める?)
    # lenが`raise NotImplementedError()`になることを使ってる処理が存在する
    # 一旦はlenをデフォルトにいれないことに

In [65]:
class IrisDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self, index):
        
        data = self.data[index]
        label = self.labels[index]
        
        if not self.transform is None:
            data, label = self.transform(data, label)
        
        return data, label
        

In [66]:
def iris_transform(data, label):
    return torch.tensor(data).float(), torch.tensor(label)

In [67]:
iris = load_iris()
X_train, X_valid, y_train, y_valid = train_test_split(iris.data, iris.target, test_size=0.2)

iris_train_dataset = IrisDataset(X_train, y_train, iris_transform)
iris_valid_dataset = IrisDataset(X_valid, y_valid, iris_transform)

## DataLoader

```python
torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=None, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None, multiprocessing_context=None)
```

- dataset: datasetならOK(`__len__`, `__add__`)が必要
- `batch_size`: 一つのデータ数
- `shuffle, sampler, batch_sampler`: データの選び方
- `num_workers`: プロセスの数
- `collate_fn`: `DataLoeader`での後処理
- `pin_memory`: `CUDA`用の設定、GPUのメモリでページングしない設定
- `drop_last`: 最後のデータを使うか
- `timeout, worker_init_fn, multiprocessing_context`: 今回は省略(最初は使わないはず)

基本的にはデータを連結してiteratorとしてくれるもの

In [9]:
for x in iris_train_dataset:
    print(x)
    break
    # batchにはなっていない

(tensor([5.1000, 3.7000, 1.5000, 0.4000]), tensor(0))


In [71]:
batch_size  = 24 # ミニバッチのデータの数
iris_train_dataloader = torch.utils.data.DataLoader(iris_train_dataset, batch_size=batch_size, shuffle=True)
iris_valid_dataloader = torch.utils.data.DataLoader(iris_valid_dataset, batch_size=batch_size, shuffle=True)

## 自作datasetを使わない実装

In [17]:
iris = load_iris()
X_train, X_valid, y_train, y_valid = train_test_split(iris.data, iris.target, test_size=0.2)

X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train)
X_valid = torch.tensor(X_valid).float()
y_valid = torch.tensor(y_valid)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
valid_dataset = torch.utils.data.TensorDataset(X_valid, y_valid)

train_loader = torch.utils.data.DataLoader(train_dataset, 
                   batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(train_dataset, 
                   batch_size=batch_size, shuffle=True)

## Softmax回帰の実装
- irisに対し,softmax回帰で実装する

In [18]:
model = nn.Linear(4, 3)

batch_size  = 120 # ミニバッチのデータの数
max_epoch = 100 #

criterion = nn.CrossEntropyLoss() # 損失の定義
optimizer = torch.optim.SGD(model.parameters(), lr=0.01) #(確率的)勾配降下法

In [19]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

In [91]:
optimizer.param_groups

[{'params': [Parameter containing:
   tensor([[ 1.4900,  2.3964, -3.5998, -1.9053],
           [ 0.8754,  0.2335, -0.4544, -1.2168],
           [-1.7652, -2.8074,  3.5882,  3.6563]], requires_grad=True),
   Parameter containing:
   tensor([ 0.2773,  0.8967, -1.8454], requires_grad=True)],
  'lr': 0.01,
  'momentum': 0,
  'dampening': 0,
  'weight_decay': 0,
  'nesterov': False}]

## 学習のサンプル

In [72]:
model.train()
for data, target in iris_train_dataloader: # 入力と正解
    optimizer.zero_grad() # Weightの初期化
    output = model(data) # 仮説で値代入
    loss = criterion(output, target) # 損失
    loss.backward() # 微分の計算
    optimizer.step() # パラメータの更新
    break

In [73]:
pred = output.argmax(axis=1).cpu().detach()
ans = target.cpu()
print("accuracy", accuracy_score(ans, pred))

accuracy 1.0


## Optimizer

In [31]:
optimizer.state_dict()

{'state': {},
 'param_groups': [{'lr': 0.01,
   'momentum': 0,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'params': [5331858128, 5331857840]}]}

In [28]:
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)


## モデルのweight

In [83]:
print(model.weight)
print(model.weight.grad)
old_weight = model.weight.detach().clone()

Parameter containing:
tensor([[ 1.4900,  2.3964, -3.5997, -1.9053],
        [ 0.8753,  0.2335, -0.4544, -1.2167],
        [-1.7652, -2.8074,  3.5881,  3.6562]], requires_grad=True)
tensor([[-0.0010, -0.0028,  0.0037,  0.0018],
        [ 0.1136,  0.0479,  0.0916,  0.0365],
        [-0.1126, -0.0452, -0.0953, -0.0383]])


In [84]:
model.train()
for data, target in train_loader: # 入力と正解
    optimizer.zero_grad() # Weightの初期化
    output = model(data) # 仮説で値代入
    loss = criterion(output, target) # 損失
    loss.backward() # 微分の計算
    optimizer.step() # パラメータの更新
    break

In [85]:
# after
print(model.weight)
print(model.weight.grad)

Parameter containing:
tensor([[ 1.4900,  2.3964, -3.5998, -1.9053],
        [ 0.8754,  0.2335, -0.4544, -1.2168],
        [-1.7652, -2.8074,  3.5882,  3.6563]], requires_grad=True)
tensor([[-0.0013, -0.0033,  0.0048,  0.0022],
        [-0.0027, -0.0005, -0.0008,  0.0020],
        [ 0.0040,  0.0039, -0.0040, -0.0042]])


In [86]:
old_weight

tensor([[ 1.4900,  2.3964, -3.5997, -1.9053],
        [ 0.8753,  0.2335, -0.4544, -1.2167],
        [-1.7652, -2.8074,  3.5881,  3.6562]])

In [87]:
model.weight + optimizer.state_dict()['param_groups'][0]["lr"] * model.weight.grad

tensor([[ 1.4900,  2.3964, -3.5997, -1.9053],
        [ 0.8753,  0.2335, -0.4544, -1.2167],
        [-1.7652, -2.8074,  3.5881,  3.6562]], grad_fn=<AddBackward0>)

In [89]:
optimizer.zero_grad()

In [90]:
# モデルの勾配が消える
model.weight.grad

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [41]:
output = model(data) # 仮説で値代入
loss = criterion(output, target) # 損失

In [42]:
loss.backward() # 微分の計算

In [43]:
model.weight.grad

tensor([[ 3.4398,  1.0046,  4.0171,  1.5467],
        [-3.3215, -1.5426, -2.4041, -0.7414],
        [-0.1183,  0.5379, -1.6130, -0.8053]])

In [44]:
model.weight

Parameter containing:
tensor([[ 0.0349,  0.2804,  0.2858,  0.1865],
        [-0.4398,  0.2456,  0.1448,  0.4426],
        [ 0.4975,  0.1777, -0.3319, -0.3443]], requires_grad=True)

In [45]:
optimizer.step() # パラメータの更新

In [49]:
model.weight

Parameter containing:
tensor([[-0.0339,  0.2603,  0.2054,  0.1555],
        [-0.3733,  0.2764,  0.1929,  0.4575],
        [ 0.4999,  0.1670, -0.2996, -0.3282]], requires_grad=True)

## Accuracyの計算

In [76]:
# 二次元配列
# データの個数 ×出力の次元
output.shape

torch.Size([24, 3])

In [78]:
# argmaxを計算する
pred = output.argmax(axis=1)
# pytorchっぽい情報を落とす
# detachは計算グラフの情報を落とす
pred = pred.cpu().detach()

Accuracyの計算は以下を使えばよい
- `sklearn.metrics.accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None)[source]`

In [79]:
accuracy_score(ans, pred)

1.0

In [81]:
precision_score([0, 1], [0, 1])

1.0

In [82]:
precision_score([0, 0], [0, 0])

0.0

## 学習・評価

In [57]:
def train_epoch(model, data_loader, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(data_loader): # 入力と正解
         optimizer.zero_grad() # Weightの初期化
         output = model(data) # 仮説で値代入
         loss = criterion(output, target) # 損失
         loss.backward() # 微分の計算
         optimizer.step() # パラメータの更新
    if epoch % 10 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        batch_idx, batch_idx * len(data), len(data_loader.dataset),
        100. * batch_idx / len(data_loader), loss.item()))

def valid_epoch(model, data_loader, epoch):
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(data_loader): # 入力と正解
             optimizer.zero_grad() # Weightの初期化
             output = model(data) # 仮説で値代入
             output.dtype
             loss = criterion(output, target) # 損失
             # 本来は全体でロスを数えて荷重平均を取る,accuracyを計算する

        if epoch % 10 == 0:
            print('Test Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                batch_idx, batch_idx * len(data), len(data_loader.dataset),
                100. * batch_idx / len(data_loader), loss.item()))
                
            pred = output.argmax(axis=1).cpu().detach().numpy()
            ans = target.cpu().numpy()
            print("accuracy", accuracy_score(ans, pred))

In [70]:
for epoch in range(max_epoch * 10):
    train_epoch(model, iris_train_dataloader, epoch)
    valid_epoch(model, iris_valid_dataloader, epoch)

accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 0.8333333333333334
accuracy 0.8333333333333334
accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 0.8333333333333334
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
accuracy 1.0
acc

## 全然関係ないもの

## AdaptivePooling
毎回挙動が怪しくなるので確認

In [94]:
m = nn.AdaptiveMaxPool2d(3)

In [89]:
x = torch.tensor(np.arange(24).reshape(1, 3, 8).astype(float))
output = m(x)

In [90]:
input.shape

torch.Size([1, 3, 8])

In [91]:
output.shape

torch.Size([1, 3, 3])

In [92]:
x

tensor([[[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11., 12., 13., 14., 15.],
         [16., 17., 18., 19., 20., 21., 22., 23.]]], dtype=torch.float64)

In [93]:
output

tensor([[[ 1.0000,  3.5000,  6.0000],
         [ 9.0000, 11.5000, 14.0000],
         [17.0000, 19.5000, 22.0000]]], dtype=torch.float64)