In [3]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
import time
import numpy as np

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
!/opt/bin/nvidia-smi

# 超参数设置

In [5]:
EPOCH = 10 # 遍历数据集次数
BATCH_SIZE = 64  # 批处理尺寸(batch_size)
LR = 0.01  # 学习率

# 获取数据

In [6]:
transform = transforms.ToTensor()
trainset = torchvision.datasets.MNIST(root='./dataset',train=True,download=False,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,shuffle=True,num_workers=0)

testset = torchvision.datasets.MNIST(root='./dataset',train=False,download=False,transform=transform)
testloader = torch.utils.data.DataLoader(testset,batch_size=BATCH_SIZE,shuffle=False,num_workers=0)


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# 定义网络

In [8]:
class AlexNet(nn.Module):
    def __init__(self, width_mult=1):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential( # 输入1*28*28
            nn.Conv2d(1, 32, kernel_size=3, padding=1), # 32*28*28
            nn.MaxPool2d(kernel_size=2, stride=2), # 32*14*14
            nn.ReLU(inplace=True),
            )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # 64*14*14
            nn.MaxPool2d(kernel_size=2, stride=2), # 64*7*7
            nn.ReLU(inplace=True),
            )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1), # 128*7*7
            )
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1), # 256*7*7
            )
 
        self.layer5 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, padding=1), # 256*7*7
            nn.MaxPool2d(kernel_size=3, stride=2), # 256*3*3
            nn.ReLU(inplace=True),
            )
        self.fc1 = nn.Linear(256*3*3, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
 
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = x.view(-1, 256*3*3)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

# 定义网络损失函数优化器

In [17]:
epsilon = 2

# This analysis has a total privacy cost of epsilon = 1, even though we release many results!
f = lambda x: x + np.random.laplace(loc=0, scale=1/epsilon)

def mysgd(params, lr, batch_size):  
    """小批量随机梯度下降"""
    # print(params)
    with torch.no_grad():
        for param in params:
            # print(param)
            # param -= (lr * param.grad / batch_size).apply(f)
            # param -= (lr * (param.grad+ torch.tensor(np.random.laplace(loc=0, scale=1/epsilon))) ) 
            param -= (lr * param.grad) + torch.tensor(np.random.laplace(loc=0, scale=5/epsilon))

            # param -= lr * param.grad / batch_size
            param.grad.zero_()
            # print(param)


In [None]:
net = AlexNet().to(device)
criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数，通常用于多分类问题上
# optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9)

# 训练

In [16]:
def train():
 
    for epoch in range(EPOCH):
        sum_loss = 0.0
        # 数据读取
        for i, data in enumerate(trainloader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
 
            # 梯度清零
            # optimizer.zero_grad()
 
            # forward + backward
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            # optimizer.step()
            mysgd(net.parameters(), lr=LR, batch_size=BATCH_SIZE)
 
            # 每训练100个batch打印一次平均loss
            sum_loss += loss.item()
            if i % 100 == 99:
                print('[%d, %d] loss: %.03f'
                      % (epoch + 1, i + 1, sum_loss / 100))
                sum_loss = 0.0
        # 每跑完一次epoch测试一下准确率
        with torch.no_grad():
            correct = 0
            total = 0
            for data in testloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                outputs = net(images)
                # 取得分最高的那个类
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            print('第%d个epoch的识别准确率为：%d%%' % (epoch + 1, (100 * correct / total)))
        # 保存模型参数
        # torch.save(net.state_dict(), './params.pth')

In [17]:
train()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[1, 100] loss: 2.293
[1, 200] loss: 1.483
[1, 300] loss: 0.297
[1, 400] loss: 0.179
[1, 500] loss: 0.137
[1, 600] loss: 0.112
第1个epoch的识别准确率为：97%
[2, 100] loss: 0.087
[2, 200] loss: 0.078
[2, 300] loss: 0.084
[2, 400] loss: 0.066
[2, 500] loss: 0.071
[2, 600] loss: 0.063
第2个epoch的识别准确率为：98%
[3, 100] loss: 0.057
[3, 200] loss: 0.050
[3, 300] loss: 0.054
[3, 400] loss: 0.046
[3, 500] loss: 0.053
[3, 600] loss: 0.048
第3个epoch的识别准确率为：98%
[4, 100] loss: 0.043
[4, 200] loss: 0.043
[4, 300] loss: 0.038
[4, 400] loss: 0.034
[4, 500] loss: 0.036
[4, 600] loss: 0.038
第4个epoch的识别准确率为：98%
[5, 100] loss: 0.033
[5, 200] loss: 0.030
[5, 300] loss: 0.027
[5, 400] loss: 0.035
[5, 500] loss: 0.026
[5, 600] loss: 0.036
第5个epoch的识别准确率为：99%
[6, 100] loss: 0.029
[6, 200] loss: 0.024
[6, 300] loss: 0.031
[6, 400] loss: 0.023
[6, 500] loss: 0.025
[6, 600] loss: 0.025
第6个epoch的识别准确率为：98%
[7, 100] loss: 0.021
[7, 200] loss: 0.018
[7, 300] loss: 0.023
[7, 400] loss: 0.022
[7, 500] loss: 0.027
[7, 600] loss: 0.02