# 数字识别比赛调试环境

### 数据集上传

1、在实验平台中下载实验数据文件`data.zip`

2、在jupyter环境中上传数据文件

3、运行`! unzip data.zip`

4、将数据集放置在根目录下，与当前文档同级

In [2]:
# 初始化工作
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset
import os
from PIL import Image
import torch.nn.functional as F

print('torch: ', torch.__version__, sep='')
print('torchvision: ', torchvision.__version__, sep='')

torch: 1.0.0
torchvision: 0.2.1


In [None]:
# 自定义数据集类型
class MnistDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.images = [os.path.join(self.root, path) for path in os.listdir(self.root) if path.endswith('bmp')]
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, item):
        image_path = self.images[item]  # 图像索引，获取单张图像路径
        image = Image.open(image_path)
        _, image_name = os.path.split(image_path)
        label = image_name.split('.')[-2]
        label = int(label[-1])
        if self.transform is not None:
            image = self.transform(image)
        else:
            image = transforms.ToTensor()(image)
        return image, label

In [None]:
# 网络定义（3层全链接网络）
# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.fc1 = nn.Linear(784, 512)
#         self.fc2 = nn.Linear(512, 256)
#         self.fc3 = nn.Linear(256, 10)

#     def forward(self, x):
#         x = x.view(-1, 784)
#         x = torch.relu(self.fc1(x))
#         x = torch.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

In [None]:
# 加载数据集
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = MnistDataset('./train', transform=transform)
test_dataset = MnistDataset('./test', transform=transform)

# 如出现“out of memory”的报错信息，可减小batch_size或resize
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [None]:
# 初始化模型和优化器
model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

### 训练

In [None]:
# 训练模型
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = nn.functional.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [None]:
# 测试模型
def test():
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += nn.functional.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset), accuracy))

In [None]:
# 训练5轮
for epoch in range(1, 5):
    train(epoch)
    test()
torch.save(model, './model.pkl')

### 保存模型

In [None]:
# 保存模型到文件 model.pkl
torch.save(model, './model.pkl')

## 上传比赛系统

1、上传文件应为一个zip压缩包，压缩包解压有应直接是文件而不是文件夹（通过直接选中文件进行zip压缩，而不是压缩文件夹）；

2、压缩包中至少包含训练数据的模型文件（model.pkl）和主程序（main.py）；

3、比赛系统会自动解压缩，而后自动运行main.py文件，运行后生成包含识别结果的result.csv文件，系统通过此文件与标准文件对比得出识别率；

4、主程序（main.py）中应最少载入模型文件（model.pkl），并通过模型对test文件夹中的图像文件进行识别，而后生成表格文件result.csv，表格中第一列为文件名（不含后缀）第二列为识别结果（0~9）

上传main.py文件示例

In [None]:
# 用于上传作业系统
# main.py
import torch
import torchvision
from torchvision import transforms
from PIL import Image
import os


# 答应当前库版本（训练环境中的库版本应保持与比赛环境相同）
print('torch: ', torch.__version__, sep='')
print('torchvision: ', torchvision.__version__, sep='')


# 定义神经网络模型，要与训练环境（本环境）中的一致
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(784, 512)
        self.fc2 = torch.nn.Linear(512, 256)
        self.fc3 = torch.nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    
# 与训练欢迎一致
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# 读取训练好的模型
model = torch.load('./model.pkl')

# 生成测试结果文件
path = './test'
# 保存结果
answer = []
# 循环文件，识别并保存到answer中
for f in (os.path.join(path, file) for file in os.listdir(path) if file.endswith('bmp')):
    img = Image.open(f)
    f = f.split('/')[-1]
    image = transform(img).unsqueeze(0)
    y = model(image)
    ret = torch.argmax(y, dim=1)
    # 打印每一个识别结果
    # print(int(f.strip('.bmp')), int(ret))
    answer.append((int(f.strip('.bmp')), int(ret)))
# 排序
answer = sorted(answer, key=lambda a: a[0])
# 写入结果文件
with open('result.csv', 'w') as f:
    for k, v in answer:
        print("%d,%s"%(k, v), file=f)

In [None]:
# 根据生成识别文件的代码，自行编写main.py文件，要求文件可生成结果文件result.csv
# 已知的坑：main.py中需增加模型类的定义

# 测试main.py生成result.csv
!python main.py
# 生成后自行验证