# 手写数字识别

## 模型构建

- 使用双层MLP模型

In [None]:
import torch
from torch import nn

num_input = 28 * 28
num_output = 10
num_hidden = 256

mlp = nn.Sequential(nn.Flatten(), 
                    nn.Linear(num_input, num_hidden), 
                    nn.ReLU(), 
                    nn.Linear(num_hidden, num_output))

- 使用CNN模型

In [None]:
cnn = nn.Sequential(nn.Conv2d(1, 1, kernel_size=3, padding=1), 
                    nn.ReLU(), 
                    nn.MaxPool2d(kernel_size=2, stride=2), 
                    nn.Conv2d(1, 1, kernel_size=3, padding=1), 
                    nn.ReLU(), 
                    nn.MaxPool2d(kernel_size=2, stride=2), 
                    nn.Flatten(), 
                    nn.Linear(49, 10))

## 数据集

In [None]:
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

mnist_train = MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=100, shuffle=False)

## 训练

In [None]:
from d2l import torch as d2l
def ecvaluate_accuracy(data_iter, net, device=torch.device('cpu')):
    if isinstance(net, torch.nn.Module):
        net.eval()
    metric = d2l.Accumulator(2)
    for X, y in data_iter:
        X, y = X.to(device), y.to(device)
        metric.add(d2l.accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

def train(model, train_loader, test_loader, loss_fn, optimizer, num_epochs=10, device=torch.device('cpu')):
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], legend=['train loss', 'train acc', 'test acc'])
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss, train_acc, n = 0.0, 0.0, 0
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            y_hat = model(X)
            loss = loss_fn(y_hat, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_acc += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
        animator.add(epoch + 1, (train_loss / n, train_acc / n, ecvaluate_accuracy(test_loader, model, device)))

In [None]:
train(mlp, train_loader, test_loader, nn.CrossEntropyLoss(), torch.optim.SGD(mlp.parameters(), lr=0.1), 10, torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
train(cnn, train_loader, test_loader, nn.CrossEntropyLoss(), torch.optim.SGD(cnn.parameters(), lr=0.1), 10, torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
# 保存模型
torch.save(mlp.state_dict(), 'mlp.params')
torch.save(cnn.state_dict(), 'cnn.params')

## 预测

加载模型

In [8]:
import torch
from torch import nn

num_input = 28 * 28
num_output = 10
num_hidden = 256

mlp = nn.Sequential(nn.Flatten(), 
                    nn.Linear(num_input, num_hidden), 
                    nn.ReLU(), 
                    nn.Linear(num_hidden, num_output))
cnn = nn.Sequential(nn.Conv2d(1, 1, kernel_size=3, padding=1), 
                    nn.ReLU(), 
                    nn.MaxPool2d(kernel_size=2, stride=2), 
                    nn.Conv2d(1, 1, kernel_size=3, padding=1), 
                    nn.ReLU(), 
                    nn.MaxPool2d(kernel_size=2, stride=2), 
                    nn.Flatten(), 
                    nn.Linear(49, 10))

# 加载模型
mlp.load_state_dict(torch.load('mlp.params'))
cnn.load_state_dict(torch.load('cnn.params'))

<All keys matched successfully>

定义预测函数

In [9]:
from torchvision import transforms

def predict(img, model):
    preprocess = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
    ])
    X = preprocess(img).unsqueeze(0)
    if X.mean() > 0.5: # 如果是黑底白字，进行反色
        X = 1 - X
    return model(X).argmax(axis=1).item()

测试

In [10]:
# 读取图片
from PIL import Image
img = Image.open('./image.png')
print(predict(img, cnn))

3


绘制一个GUI界面，用于手写数字输入

In [12]:
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageDraw

class DigitCanvas:
    def __init__(self, SIZE, master, model, bold=10):
        self.master = master
        self.model = model
        self.size = SIZE
        self.bold = bold
        self.canvas = tk.Canvas(self.master, width=SIZE[0], height=SIZE[1], bg='white')
        self.canvas.pack()
        self.image = Image.new('RGB', SIZE, 'white')
        self.draw = ImageDraw.Draw(self.image)
        self.canvas.bind("<B1-Motion>", self.draw_digit)
        self.master.bind('<Return>', self.predict)  # 绑定回车键到predict方法
        self.master.bind('<Delete>', self.clear)  # 绑定删除键到clear方法

    def draw_digit(self, event):
        x1, y1 = (event.x - self.bold), (event.y - self.bold)
        x2, y2 = (event.x + self.bold), (event.y + self.bold)
        self.canvas.create_oval(x1, y1, x2, y2, fill='black')
        self.draw.ellipse([x1, y1, x2, y2], fill='black')

    def get_image(self):
        return self.image

    def predict(self, event=None):  # 添加event参数以满足bind的要求
        img = self.get_image()
        img.save('image.png')
        result = predict(img, self.model)
        messagebox.showinfo("Prediction Result", str(result))  # 使用messagebox显示结果
        self.clear()  # 预测后自动清空画布

    def clear(self, event=None):  # 添加event参数以满足bind的要求
        self.canvas.delete("all")
        self.image = Image.new('RGB', self.size, 'white')
        self.draw = ImageDraw.Draw(self.image)

def main():
    root = tk.Tk()
    dc = DigitCanvas((400,400), root, mlp, bold=10)
    root.mainloop()

if __name__ == "__main__":
    main()