# 1.Setup

In [30]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [31]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose

import torch.optim as optim

import matplotlib.pyplot as plt

print(torch.cuda.is_available())
print(torch.__version__)

False
1.8.0


# 2.数据、模型全局变量

In [32]:
# 超参数
learning_rate = 1e-3
batch_size = 64
epochs = 5

# get cpu or gpu device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


# 3.数据下载、加载

## 3.1 数据下载

In [33]:
training_data = torchvision.datasets.FashionMNIST(
    root = "data",
    train = True,
    # download = True,
    download = False,
    transform = ToTensor(),
)
test_data = torchvision.datasets.FashionMNIST(
    root = "data",
    train = False,
    # download = True,
    download = False,
    transform = ToTensor(),
)

## 3.2 数据加载--创建 data loader

In [34]:
train_dataloader = DataLoader(training_data, batch_size = batch_size, shuffle = True)#, num_workers = args.nThreads)
test_dataloader = DataLoader(testing_data, batch_size = batch_size, shuffle = False)#, num_workers = args.nThreads)

In [35]:
for X_train, y_train in train_dataloader:
    print("Shape of X_train [N, C, H, W]: ", X_train.shape)
    print("Shape of y_train: ", y_train.shape, y_train.dtype)
    break

Shape of X_train [N, C, H, W]:  torch.Size([64, 1, 28, 28])
Shape of y_train:  torch.Size([64]) torch.int64


# 4.模型构建

## 4.1 模型构建

In [13]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = torch.nn.Flatten()
        self.linear_relu_stack = torch.nn.Sequential(
            torch.nn.Linear(28 * 28, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 10),
            torch.nn.ReLU(),
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        
        return logits

In [14]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


## 4.2 模型训练

In [19]:
# 损失函数
loss_fn = nn.CrossEntropyLoss()


# 优化算法
optimizer =  optim.SGD(model.parameters(), lr = learning_rate)


def train(dataloader, model, loss_fn, optimizer):
    """
    In a single training loop, the model makes predictions 
    on the training dataset (fed to it in batches), 
    and backpropagates the prediction error to adjust 
    the model’s parameters.
    """
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # 计算预测值和误差
        pred = model(X)
        loss = loss_fn(pred, y)
        # 后向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn):
    """
    check the model’s performance against 
    the test dataset to ensure it is learning
    """
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [21]:
for t in range(epochs):
    print(f"Epoch {t + 1}\n---------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
---------------------------
loss: 2.211925 [    0/60000]
loss: 2.184119 [ 6400/60000]
loss: 2.182308 [12800/60000]
loss: 2.171432 [19200/60000]
loss: 2.152758 [25600/60000]
loss: 2.155768 [32000/60000]
loss: 2.118394 [38400/60000]
loss: 2.127769 [44800/60000]
loss: 2.057956 [51200/60000]
loss: 2.058423 [57600/60000]
Test Error: 
 Accuracy: 43.3%, Avg loss: 2.075775 

Epoch 2
---------------------------
loss: 2.074895 [    0/60000]
loss: 2.034454 [ 6400/60000]
loss: 2.028780 [12800/60000]
loss: 2.035985 [19200/60000]
loss: 2.035927 [25600/60000]
loss: 1.982557 [32000/60000]
loss: 1.888451 [38400/60000]
loss: 1.964618 [44800/60000]
loss: 1.861885 [51200/60000]
loss: 1.770835 [57600/60000]
Test Error: 
 Accuracy: 46.2%, Avg loss: 1.885451 

Epoch 3
---------------------------
loss: 2.016435 [    0/60000]
loss: 1.831326 [ 6400/60000]
loss: 1.742608 [12800/60000]
loss: 1.884317 [19200/60000]
loss: 1.781443 [25600/60000]
loss: 1.690614 [32000/60000]
loss: 1.736117 [38400/60000]
loss:

## 4.3 模型保存

In [24]:
torch.save(model.state_dict(), "./model/model.pth")
print("Saved PyTorch Model State to model.pth.")

Saved PyTorch Model State to model.pth.


# 5.模型重载、预测

## 5.1 模型重载

In [25]:
model = NeuralNetwork()
model.load_state_dict(torch.load("./model/model.pth"))
model.eval()

<All keys matched successfully>

## 5.2 模型预测

In [28]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f"Predicted: '{predicted}', Actual: '{actual}'")

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)

Predicted: 'Ankle boot', Actual: 'Ankle boot'
