In [10]:
import torch
from torch import Tensor
from torch import nn
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import time

matplotlib.use('TkAgg')

In [11]:
# 获取数据集
training_data = torchvision.datasets.MNIST(root='data', train=True, download=True, transform=torchvision.transforms.ToTensor())
test_data = torchvision.datasets.MNIST(root='data', train=False, download=True, transform=torchvision.transforms.ToTensor())

In [12]:
# data loader
batch_size = 64

train_dataloader = torch.utils.data.DataLoader(training_data, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)


In [14]:
print('Training data', training_data.data.shape)
print('test data', test_data.data.shape)

for X,y in test_dataloader:
    print(f"Shape of X [N, C, H, W]:{X.shape}")
    print(f"Shape of y:{y.shape} {y.dtype}")
    break

Training data torch.Size([60000, 28, 28])
test data torch.Size([10000, 28, 28])
Shape of X [N, C, H, W]:torch.Size([64, 1, 28, 28])
Shape of y:torch.Size([64]) torch.int64


In [15]:
# 训练方式 cpu/gpu
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [22]:
# 用现有卷积层定义模型
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(64 * 12 * 12, 256),
            nn.ReLU(),
            nn.Linear(256, 10),
        )
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [23]:
# 实例1
model1 = CNN().to(device)
print(model1)

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=9216, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=10, bias=True)
  )
)


In [24]:
# 训练和测试函数
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [25]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy:{(100 * correct):>0.1f}%, Avg loss:{test_loss:>8f}\n")

In [26]:
# 训练第一个模型
# 损失函数
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model1.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)

epochs = 5
print("Start")
t1 =time.time()
for t in range(epochs):
    print(f"Epoch {t + 1}\n-------------------------------")
    train(train_dataloader, model1, loss_fn, optimizer)
    test(test_dataloader, model1, loss_fn)
print("Done!  Time cost: ", time.time() - t1)

Start
Epoch 1
-------------------------------
loss: 2.311503 [    0/60000]
loss: 0.124321 [ 6400/60000]
loss: 0.164660 [12800/60000]
loss: 0.048784 [19200/60000]
loss: 0.036829 [25600/60000]
loss: 0.165229 [32000/60000]
loss: 0.113340 [38400/60000]
loss: 0.073978 [44800/60000]
loss: 0.166705 [51200/60000]
loss: 0.061555 [57600/60000]
Test Error: 
 Accuracy:98.3%, Avg loss:0.049882

Epoch 2
-------------------------------
loss: 0.016103 [    0/60000]
loss: 0.108024 [ 6400/60000]
loss: 0.016330 [12800/60000]
loss: 0.028433 [19200/60000]
loss: 0.009245 [25600/60000]
loss: 0.059181 [32000/60000]
loss: 0.070592 [38400/60000]
loss: 0.048360 [44800/60000]
loss: 0.078897 [51200/60000]
loss: 0.051312 [57600/60000]
Test Error: 
 Accuracy:98.7%, Avg loss:0.038269

Epoch 3
-------------------------------
loss: 0.007250 [    0/60000]
loss: 0.049616 [ 6400/60000]
loss: 0.010427 [12800/60000]
loss: 0.040619 [19200/60000]
loss: 0.020660 [25600/60000]
loss: 0.009932 [32000/60000]
loss: 0.064644 [38400/

In [27]:
training_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()