In [13]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from torch import Tensor
import time

matplotlib.use('TkAgg')

In [2]:
# 获取数据集
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [3]:
# data loaders
batch_size = 64

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [4]:
print("Training data: ", training_data.data.shape)
print("Test data: ", test_data.data.shape)
for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Training data:  torch.Size([60000, 28, 28])
Test data:  torch.Size([10000, 28, 28])
Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [5]:
# 训练方式 CPU/CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [6]:
# 自定义卷积层

class MyConv2dFunc(torch.autograd.Function):
    @staticmethod
    def conv2d(input: Tensor, kernel: Tensor) -> Tensor:
        """
            卷积运算
            Output = Input * Kernel
        :param input: Tensor[B, Cin, N, N]
        :param kernel: Tensor[Cout, Cin, K, K]
        :return: Tensor[B, Cout, M, M], M=N-K+1
        """
        B = input.shape[0]
        Cin = input.shape[1]
        N = input.shape[2]
        Cout = kernel.shape[0]
        K = kernel.shape[2]
        M = N - K + 1

        input_unf = nn.Unfold(kernel_size=K)(input)
        input_unf = input_unf.view((B, Cin, -1, M, M))
        kernel_view = kernel.view((Cout, Cin, K * K))

        output = torch.einsum("ijklm,njk->inlm", input_unf, kernel_view)
        return output

    @staticmethod
    def forward(ctx, input, weight):
        ctx.save_for_backward(input, weight)
        output = MyConv2dFunc.conv2d(input, weight)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input, weight = ctx.saved_tensors
        grad_input = grad_weight = None
        if grad_output is None:
            return None, None
        if ctx.needs_input_grad[0]:
            # 反卷积
            gop = nn.ZeroPad2d(weight.shape[2] - 1)(grad_output)
            kk = torch.rot90(weight, 2, (2, 3))  # 旋转180度
            kk = torch.transpose(kk, 0, 1)
            grad_input = MyConv2dFunc.conv2d(gop, kk)
        if ctx.needs_input_grad[1]:
            input_ = torch.transpose(input, 0, 1)
            grad_output_ = torch.transpose(grad_output, 0, 1)
            grad_weight = MyConv2dFunc.conv2d(input_, grad_output_).transpose(0, 1)
        return grad_input, grad_weight


class MyConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size: tuple):
        super(MyConv2d, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        # Parameters
        self.weight = nn.Parameter(torch.empty(out_channels, in_channels, kernel_size[0], kernel_size[1]))

        nn.init.uniform_(self.weight, -0.1, 0.1)

    def forward(self, x):
        return MyConv2dFunc.apply(x, self.weight)

    def extra_repr(self):
        return 'MyConv2d: in_channels={}, out_channels={}, kernel_size={}'.format(
            self.in_channels, self.out_channels, self.kernel_size
        )

In [7]:
# 用现有的卷积层定义模型
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(64 * 12 * 12, 256),
            nn.ReLU(),
            nn.Linear(256, 10),
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [10]:
# 用自定义的卷积层定义模型

class CNN_new(nn.Module):
    def __init__(self):
        super(CNN_new, self).__init__()

        self.layer1 = nn.Sequential(
            MyConv2d(1, 32, (3, 3)),
            nn.ReLU(),
            nn.Conv2d(32, 64, (3, 3)),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(64 * 12 * 12, 256),
            nn.ReLU(),
            nn.Linear(256, 10),
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [16]:
# 实例1
model = CNN().to(device)
print(model)

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=9216, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=10, bias=True)
  )
)


In [17]:
# 实例2
model_new = CNN_new().to(device)
print(model_new)

CNN_new(
  (layer1): Sequential(
    (0): MyConv2d(MyConv2d: in_channels=1, out_channels=32, kernel_size=(3, 3))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=9216, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=10, bias=True)
  )
)


In [20]:
# 训练和测试函数
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [23]:
######## 训练第一个模型 ###########

# 损失函数 和 优化器
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)

# 训练
epochs = 5
print("start!")
t1 = time.time()
for t in range(epochs):
    print(f"Epoch {t + 1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!  Time cost: ", time.time() - t1)

start!
Epoch 1
-------------------------------
loss: 0.001021  [    0/60000]
loss: 0.002670  [ 6400/60000]
loss: 0.000270  [12800/60000]
loss: 0.000347  [19200/60000]
loss: 0.000355  [25600/60000]
loss: 0.000165  [32000/60000]
loss: 0.000091  [38400/60000]
loss: 0.002655  [44800/60000]
loss: 0.004224  [51200/60000]
loss: 0.000110  [57600/60000]
Test Error: 
 Accuracy: 99.2%, Avg loss: 0.035867 

Epoch 2
-------------------------------
loss: 0.000370  [    0/60000]
loss: 0.000260  [ 6400/60000]
loss: 0.000396  [12800/60000]
loss: 0.000471  [19200/60000]
loss: 0.000312  [25600/60000]
loss: 0.000129  [32000/60000]
loss: 0.000069  [38400/60000]
loss: 0.000587  [44800/60000]
loss: 0.001002  [51200/60000]
loss: 0.000298  [57600/60000]
Test Error: 
 Accuracy: 99.2%, Avg loss: 0.033667 

Epoch 3
-------------------------------
loss: 0.000909  [    0/60000]
loss: 0.000534  [ 6400/60000]
loss: 0.000492  [12800/60000]
loss: 0.000298  [19200/60000]
loss: 0.000330  [25600/60000]
loss: 0.000103  [32

In [22]:
######## 训练第二个模型new ###########

# 损失函数 和 优化器
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_new.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)

# 训练
epochs = 5
print("start!")
t1 = time.time()
for t in range(epochs):
    print(f"Epoch {t + 1}\n-------------------------------")
    train(train_dataloader, model_new, loss_fn, optimizer)
    test(test_dataloader, model_new, loss_fn)
print("Done!  Time cost: ", time.time() - t1)

start!
Epoch 1
-------------------------------
loss: 2.334300  [    0/60000]
loss: 0.147311  [ 6400/60000]
loss: 0.157568  [12800/60000]
loss: 0.047446  [19200/60000]
loss: 0.047818  [25600/60000]
loss: 0.186212  [32000/60000]
loss: 0.122187  [38400/60000]
loss: 0.094942  [44800/60000]
loss: 0.159324  [51200/60000]
loss: 0.076851  [57600/60000]
Test Error: 
 Accuracy: 97.9%, Avg loss: 0.060762 

Epoch 2
-------------------------------
loss: 0.031384  [    0/60000]
loss: 0.149726  [ 6400/60000]
loss: 0.034975  [12800/60000]
loss: 0.013555  [19200/60000]
loss: 0.002692  [25600/60000]
loss: 0.057804  [32000/60000]
loss: 0.057752  [38400/60000]
loss: 0.036334  [44800/60000]
loss: 0.096239  [51200/60000]
loss: 0.037418  [57600/60000]
Test Error: 
 Accuracy: 98.2%, Avg loss: 0.046696 

Epoch 3
-------------------------------
loss: 0.011701  [    0/60000]
loss: 0.094357  [ 6400/60000]
loss: 0.026394  [12800/60000]
loss: 0.048856  [19200/60000]
loss: 0.019533  [25600/60000]
loss: 0.067598  [32