In [21]:
# モデルパラメータの最適化

import os

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [22]:
# デバイスの設定
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f'using {device} device')

using mps device


In [23]:
# ニューラルネットワーククラス
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10))
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [24]:
# 手元になかったらDownloadする
training_data = datasets.FashionMNIST(
    root='data',
    train=True,
    download=not os.path.exists('data/FashionMNIST/raw/train-images-idx3-ubyte.gz'),
    transform=transforms.ToTensor())

test_data = datasets.FashionMNIST(
    root='data',
    train=False,
    download=not os.path.exists('data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz'),
    transform=transforms.ToTensor())

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

In [25]:
model = NeuralNetwork().to(device)

In [26]:
learning_rate = 1e-3
# 一度の学習に使用するデータの数
# なのでパラメータの更新はbatch_size / data_size回となる
batch_size = 64
# データセットを何周するか
epochs = 10

In [27]:
# 損失関数
loss_fn = nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.SGD(
    model.parameters(), lr=learning_rate)

In [28]:
# 訓練ループ
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        optimizer.zero_grad()  # 勾配の初期化
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}")

In [29]:
# テストループ
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred, y.to(device)).item()
            correct += (pred.argmax(1) == y.to(device)).type(torch.float).sum().item()
            
        test_loss /= num_batches
        correct /= size
        print(f"test errer: \n accuracy: {(100*correct):>0.1f}%, avg loss: {test_loss:>8f} \n")

In [30]:
# Do it
for t in range(epochs):
    print(f"epoch {t + 1}\n----------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
    
print('done')

epoch 1
----------------
loss: 2.297617 [    0/60000
loss: 2.285514 [ 6400/60000
loss: 2.265848 [12800/60000
loss: 2.266934 [19200/60000
loss: 2.237666 [25600/60000
loss: 2.211973 [32000/60000
loss: 2.218372 [38400/60000
loss: 2.185524 [44800/60000
loss: 2.180389 [51200/60000
loss: 2.147156 [57600/60000
test errer: 
 accuracy: 47.5%, avg loss: 2.144106 

epoch 2
----------------
loss: 2.151291 [    0/60000
loss: 2.146365 [ 6400/60000
loss: 2.083490 [12800/60000
loss: 2.110091 [19200/60000
loss: 2.050111 [25600/60000
loss: 1.978322 [32000/60000
loss: 2.011634 [38400/60000
loss: 1.930329 [44800/60000
loss: 1.932594 [51200/60000
loss: 1.864717 [57600/60000
test errer: 
 accuracy: 55.5%, avg loss: 1.863238 

epoch 3
----------------
loss: 1.888216 [    0/60000
loss: 1.867606 [ 6400/60000
loss: 1.743552 [12800/60000
loss: 1.803104 [19200/60000
loss: 1.672205 [25600/60000
loss: 1.621556 [32000/60000
loss: 1.653956 [38400/60000
loss: 1.555304 [44800/60000
loss: 1.575045 [51200/60000
loss: 1.4

In [31]:
# modelの保存
print(model.state_dict().keys())
torch.save(model.state_dict(), 'model_weights.pth')

odict_keys(['linear_relu_stack.0.weight', 'linear_relu_stack.0.bias', 'linear_relu_stack.2.weight', 'linear_relu_stack.2.bias', 'linear_relu_stack.4.weight', 'linear_relu_stack.4.bias'])


In [32]:
# modelのload
# 状態だけを保存した場合は，その形状までは保存されないので，対応するモデルの初期化をした上で実施することになる
# モデルをまるごと保存するならtorch.save(mode, 'model.pth')とする
# そして，model = torch.load('model.pth')すればよい
# 使い分け方法はおそらく，環境の違いによるエラーを避けるため，状態のみを保存するなど？
loaded_model = NeuralNetwork().to(device)
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [33]:
# test

with torch.no_grad():
    for X, y in test_dataloader:
        X, y = X.to(device), y.to(device)
        pred = model(X)
    print(pred.argmax(1))
    print(y)

tensor([9, 2, 1, 1, 6, 1, 6, 4, 7, 7, 4, 5, 7, 3, 4, 1, 2, 2, 8, 0, 6, 7, 7, 7,
        1, 2, 4, 3, 9, 3, 8, 8, 3, 3, 8, 0, 7, 7, 7, 9, 0, 1, 3, 9, 4, 9, 2, 1,
        4, 2, 2, 2, 7, 6, 4, 2, 8, 4, 8, 0, 7, 7, 8, 5], device='mps:0')
tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5], device='mps:0')
tensor([1, 1, 3, 3, 7, 8, 7, 0, 6, 0, 4, 3, 1, 2, 8, 2, 1, 8, 5, 9, 5, 0, 3, 4,
        0, 6, 5, 3, 4, 7, 1, 8, 0, 1, 2, 2, 3, 4, 7, 4, 7, 8, 7, 9, 9, 4, 2, 7,
        7, 0, 5, 2, 8, 4, 7, 8, 0, 0, 9, 9, 3, 0, 8, 4], device='mps:0')
tensor([1, 1, 2, 3, 9, 8, 7, 0, 2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2,
        0, 6, 5, 3, 6, 7, 1, 8, 0, 1, 4, 2, 3, 6, 7, 2, 7, 8, 5, 9, 9, 4, 2, 5,
        7, 0, 5, 2, 8, 6, 7, 8, 0, 0, 9, 9, 3, 0, 8, 4], device='mps:0')
tensor([1, 5, 4, 1, 9, 1, 8, 4, 2, 1, 4, 7, 1, 0, 0, 0, 1, 6, 1, 3, 