In [1]:
# モデルパラメータの最適化

import os

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
# デバイスの設定
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f'using {device} device')

using mps device


In [3]:
# ニューラルネットワーククラス
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10))
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [6]:
# 手元になかったらDownloadする
training_data = datasets.FashionMNIST(
    root='data',
    train=True,
    download=not os.path.exists('data/FashionMNIST/raw/train-images-idx3-ubyte.gz'),
    transform=transforms.ToTensor())

test_data = datasets.FashionMNIST(
    root='data',
    train=False,
    download=not os.path.exists('data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz'),
    transform=transforms.ToTensor())

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

In [7]:
model = NeuralNetwork().to(device)

In [31]:
learning_rate = 1e-3
# 一度の学習に使用するデータの数
# なのでパラメータの更新はbatch_size / data_size回となる
batch_size = 64
# データセットを何周するか
epochs = 10

In [32]:
# 損失関数
loss_fn = nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.SGD(
    model.parameters(), lr=learning_rate)

In [33]:
# 訓練ループ
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        pred = model(X.to(device))
        loss = loss_fn(pred, y.to(device))
        
        optimizer.zero_grad()  # 勾配の初期化
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}")

In [34]:
# テストループ
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred, y.to(device)).item()
            correct += (pred.argmax(1) == y.to(device)).type(torch.float).sum().item()
            
        test_loss /= num_batches
        correct /= size
        print(f"test errer: \n accuracy: {(100*correct):>0.1f}%, avg loss: {test_loss:>8f} \n")

In [36]:
# Do it
for t in range(epochs):
    print(f"epoch {t + 1}\n----------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
    
print('done')

epoch 1
----------------
loss: 0.616057 [    0/60000
loss: 0.717811 [ 6400/60000
loss: 0.496503 [12800/60000
loss: 0.733524 [19200/60000
loss: 0.651932 [25600/60000
loss: 0.627056 [32000/60000
loss: 0.691469 [38400/60000
loss: 0.724219 [44800/60000
loss: 0.713780 [51200/60000
loss: 0.650887 [57600/60000
test errer: 
 accuracy: 77.0%, avg loss: 0.654379 

epoch 2
----------------
loss: 0.597769 [    0/60000
loss: 0.700455 [ 6400/60000
loss: 0.481417 [12800/60000
loss: 0.721592 [19200/60000
loss: 0.641839 [25600/60000
loss: 0.617039 [32000/60000
loss: 0.675729 [38400/60000
loss: 0.715144 [44800/60000
loss: 0.703622 [51200/60000
loss: 0.638787 [57600/60000
test errer: 
 accuracy: 77.6%, avg loss: 0.641870 

epoch 3
----------------
loss: 0.581329 [    0/60000
loss: 0.684406 [ 6400/60000
loss: 0.467848 [12800/60000
loss: 0.710500 [19200/60000
loss: 0.632703 [25600/60000
loss: 0.608112 [32000/60000
loss: 0.661012 [38400/60000
loss: 0.707094 [44800/60000
loss: 0.694742 [51200/60000
loss: 0.6

In [39]:
# modelの保存
print(model.state_dict().keys())
torch.save(model.state_dict(), 'model_weights.pth')

odict_keys(['linear_relu_stack.0.weight', 'linear_relu_stack.0.bias', 'linear_relu_stack.2.weight', 'linear_relu_stack.2.bias', 'linear_relu_stack.4.weight', 'linear_relu_stack.4.bias'])


In [42]:
# modelのload
# 状態だけを保存した場合は，その形状までは保存されないので，対応するモデルの初期化をした上で実施することになる
# モデルをまるごと保存するならtorch.save(mode, 'model.pth')とする
# そして，model = torch.load('model.pth')すればよい
# 使い分け方法はおそらく，環境の違いによるエラーを避けるため，状態のみを保存するなど？
loaded_model = NeuralNetwork().to(device)
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)