In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

In [10]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
np.random.seed(0)
torch.backends.cudnn.deterministic=True
device = torch.device("cuda:0" if torch.cuda.is_available() else "CPU")

In [22]:
import torchvision
import torchvision.transforms as transforms

# 定义数据转换
transform = transforms.Compose([
    transforms.ToTensor(), # 转换 PIL.Image 或者 numpy.ndarray 到 torch.FloatTensor 类型，形状为 (C x H x W) 并且归一化到 [0.0, 1.0]
    transforms.Normalize((0.1307,), (0.3081,)) # 归一化，针对 MNIST 数据集
])

# 下载训练集并应用转换
train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=transform, 
                                           download=True)

train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [int(len(train_dataset)*0.8), int(len(train_dataset)*0.2)])

# 下载测试集并应用转换
test_dataset = torchvision.datasets.MNIST(root='./data', 
                                          train=False, 
                                          transform=transform)

# 创建 DataLoader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                          num_workers=1)
# 创建 DataLoader
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=512, 
                                           shuffle=True,
                                        num_workers=1)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=1000, 
                                          shuffle=False,
                                         num_workers=1)



In [23]:
data, tags = next(iter(train_loader))

In [24]:
data.shape

torch.Size([64, 1, 28, 28])

In [25]:
tags.shape

torch.Size([64])

In [39]:

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        
        self.last_conv_filt = 32
        self.dens_size_sub = 3
        self.dens_size = self.dens_size_sub*self.dens_size_sub*self.last_conv_filt
        
        # 定义网络层
        self.conv1 = nn.Conv2d(1, 16, 3)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.conv3 = nn.Conv2d(32, self.last_conv_filt, 3)
        # 定义全连接层
        self.dens = nn.Linear(self.dens_size, 10)
        #self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # 定义前向传播
        x = F.relu(self.conv1(x)) #26
        x = F.max_pool2d(x, 2) #13
        x = F.relu(self.conv2(x)) #11
        x = F.max_pool2d(x, 2) #5
        x = F.relu(self.conv3(x)) #3
        #x = x.view(-1,self.dens_size)
        #x = torch.flatten(x, 1)
        x = x.view(x.size(0), -1)  
        x = self.dens(x)
        #x = self.softmax(x)
        return x

net = Net()
print(net)


Net(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (dens): Linear(in_features=288, out_features=10, bias=True)
)


In [32]:
import torch.nn as nn
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        # fully connected layer, output 10 classes
        self.out = nn.Linear(32 * 7 * 7, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return output

In [40]:
torch.cuda.empty_cache()
# 构建模型
model = Net()
model.to(device)  # 将模型发送到GPU，如果有的话


# # He/Kaiming 正态分布初始化
# torch.nn.init.kaiming_normal_(net.conv1.weight, mode='fan_in', nonlinearity='relu')
# torch.nn.init.kaiming_normal_(net.conv2.weight, mode='fan_in', nonlinearity='relu')
# torch.nn.init.kaiming_normal_(net.conv3.weight, mode='fan_in', nonlinearity='relu')

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# 训练模型
num_epochs = 50

for epoch in range(num_epochs):
    model.train()  # 设置模型为训练模式

    loss_acc = 0.
    train_num = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()  # 清除之前的梯度
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        loss.backward()  # 反向传播计算当前的梯度
        optimizer.step()  # 更新参数

        loss_acc += loss.item()
        train_num += 1
    
    model.eval() 
    val_loss_acc = 0.
    val_num = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            # 前向传播
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            val_loss_acc += val_loss.item()
            val_num += 1

    print("Epoch [{}/{}], Loss: {:.2f}, Val_loss: {:.2f}".format(epoch+1, num_epochs, loss_acc/train_num, val_loss_acc/val_num))


Epoch [1/50], Loss: 0.80, Val_loss: 0.32
Epoch [2/50], Loss: 0.25, Val_loss: 0.20
Epoch [3/50], Loss: 0.17, Val_loss: 0.14
Epoch [4/50], Loss: 0.13, Val_loss: 0.12
Epoch [5/50], Loss: 0.11, Val_loss: 0.10
Epoch [6/50], Loss: 0.09, Val_loss: 0.09
Epoch [7/50], Loss: 0.08, Val_loss: 0.08
Epoch [8/50], Loss: 0.08, Val_loss: 0.08
Epoch [9/50], Loss: 0.07, Val_loss: 0.07
Epoch [10/50], Loss: 0.06, Val_loss: 0.06
Epoch [11/50], Loss: 0.06, Val_loss: 0.06
Epoch [12/50], Loss: 0.06, Val_loss: 0.06


KeyboardInterrupt: 

In [41]:
# 假设 model 是你的模型，val_loader 是你的验证数据加载器
model.eval()  # 将模型设置为评估模式
total_correct = 0
total_samples = 0

with torch.no_grad():  # 在此模式下，不计算梯度，节省计算资源
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)  # 将数据和标签移动到正确的设备
        outputs = model(data)
        #outputs = F.softmax(outputs, dim=1)
        _, predicted = torch.max(outputs.data, 1)  # 获取预测结果
        total_samples += target.size(0)
        total_correct += (predicted == target).sum().item()

accuracy = total_correct / total_samples
print(f'Accuracy of the model on the validation data: {accuracy * 100:.2f}%')


Accuracy of the model on the validation data: 98.52%
