In [2]:
import torch

In [3]:
import gzip, struct

In [4]:
import numpy as np

In [5]:
import torch

In [6]:
from torch import nn

In [7]:
from torch.autograd import Variable

In [8]:
from torch.nn import functional as F

In [9]:
import torch.optim as optim

In [10]:
from torch.utils.data import TensorDataset, DataLoader

In [11]:
from torchvision import transforms

In [12]:
import torchvision.datasets as datasets

In [13]:
import math
import os
import random

In [14]:
data_trains = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3082,))
])

In [15]:
train_data=datasets.MNIST('data',train=True,download=True, transform = data_trains)
test_data=datasets.MNIST('data',train=False,download=True, transform = data_trains)

In [16]:
n_train = int(len(train_data)*0.9)
n_validation = len(train_data) - n_train

In [17]:
batch_size=64

In [18]:
train_data, valid_data = torch.utils.data.random_split(train_data,[n_train, n_validation])

In [19]:
#制作数据的iterator
train_iterator = torch.utils.data.DataLoader(train_data,shuffle=True,batch_size=batch_size)
valid_iterator = torch.utils.data.DataLoader(valid_data, batch_size=batch_size)
test_iterator = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

In [21]:
class Block(nn.Module):
    
    expansion = 2

    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
        super(Block, self).__init__()
        group_width = cardinality * bottleneck_width
        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(group_width)
        self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
        self.bn2 = nn.BatchNorm2d(group_width)
        self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*group_width)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*group_width:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*group_width)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNeXt(nn.Module):
    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
        #num_blocks是一个1*4的数组
        super(ResNeXt, self).__init__()
        self.cardinality = cardinality
        self.bottleneck_width = bottleneck_width
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(num_blocks[0], 1)
        self.layer2 = self._make_layer(num_blocks[1], 2)
        self.layer3 = self._make_layer(num_blocks[2], 2)
        # self.layer4 = self._make_layer(num_blocks[3], 2)
        self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
    def _make_layer(self, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
        # Increase bottleneck_width by 2 after each stage.
        self.bottleneck_width *= 2
        return nn.Sequential(*layers)
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        # out = self.layer4(out)
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNeXt29_2x64d():
    return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)

In [22]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [23]:
model=ResNeXt29_2x64d().to(device)#构建了一个计算图模型并载入到了内存

In [28]:
model_dir='models'
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
    
model_path=os.path.join(model_dir, 'resnet_mnist.pt')
print(model_path)

models\resnet_mnist.pt


In [29]:
optimizer = optim.Adam(model.parameters())

In [30]:
lossfunc = nn.CrossEntropyLoss()

In [31]:
def accu(fx, y):
    pred = fx.max(1, keepdim=True)[1]#argmax得到预测的类型
    correct=pred.eq(y.view_as(pred)).sum()#得到正确答案的数量
    acc = correct.float()/pred.shape[0]
    return acc

In [32]:
def train(model, device, iterator, optimizer, lossfunc):#训练一个epoch
    epoch_loss = 0
    epoch_acc = 0
    model.train()              #将model的状态改成PHASE=Train
    for (x,y) in iterator:     #加载每一个minibatch
        x = x.to(device)       #将张量加入到设备中CPU/GPU
        y = y.to(device)       #将label加入到设备中CPU/GPU
        optimizer.zero_grad()  #将所有的神经元/参数的梯度“寄存器”都置零
        fx = model(x)          #对于输入的训练样本进行模型预测
        loss=lossfunc(fx, y)   #计算预测与label之间的差距
        acc = accu(fx,y)       #计算预测的准确率，用来做显示
        loss.backward()        #通过loss求得各个参数的梯度，求得的梯度寄存在梯度“寄存器”中
        optimizer.step()       #对于参数进行统一的更新
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [33]:
#做一个评测集/验证集的完整性评测并给出相应的分数
def evaluate(model, device, iterator, lossfunc):
    epoch_loss = 0
    epoch_acc = 0
    model.eval()       #将模型的状态参数改为PHASE=Eval，这样不用自动求导
    with torch.no_grad():
        for (x, y) in iterator:
            x = x.to(device)
            y = y.to(device)
            fx = model(x)
            loss = lossfunc(fx, y)
            acc = accu(fx, y)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [34]:
epochs = 1
best_valid_loss=float('inf')
model

ResNet18(
  (feature_block): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (rb1): ResNetLayer(
    (blocks): Sequential(
      (0): ResNetBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (downsample): Sequential()
      )
      (1): ResNetBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 

In [35]:
for epoch in range(epochs):
    #对于全部的1个epoch的训练集数据进行梯度下降，并输出平均train_loss和train_acc
    train_loss, train_acc = train(model, device, train_iterator, optimizer, lossfunc)
    #使用验证集对已经更新好参数的模型进行一轮验证，并输出平均的val_loss和val_acc
    valid_loss, valid_acc = evaluate(model, device, valid_iterator, lossfunc)
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), model_path)
    print('Epoch:{0}|Train Loss:{1}|Train Acc:{2}|Val Loss:{3}|Val Acc:{4}'.format(epoch+1,train_loss,train_acc,valid_loss,valid_acc))
    

AttributeError: 'ResNetBlock' object has no attribute 'conv'

In [None]:
model.load_state_dict(torch.load(model_path))
test_loss,test_acc=evaluate(model,device,test_iterator,lossfunc)
print('Test Loss:{0}|Test Acc:{1}'.format(test_loss,test_acc))