In [2]:
# ResNet：深度残差网络
# http://zh.gluon.ai/chapter_convolutional-neural-networks/resnet-gluon.html

from mxnet.gluon import nn
from mxnet import nd

class _Residual(nn.Block):
    def __init__(self, channels, same_shape=True, **kwargs):
        super(_Residual, self).__init__(**kwargs)
        self.same_shape = same_shape
        strides = 1 if same_shape else 2
        self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1,
                              strides=strides)
        self.bn1 = nn.BatchNorm()
        self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1,
                              strides=strides)
        self.bn2 = nn.BatchNorm()
        if not same_shape:
            self.conv3 = nn.Conv2D(channels, kernel_size=1,
                                  strides=strides)
            
class ResidualIdentity(_Residual):
    def __init__(self, channels, same_shape=True, **kwargs):
        super(ResidualIdentity, self).__init__(channels, same_shape, **kwargs)
    
    def forward(self, x):
#         print('x.shape:', x.shape)
        
        out = self.conv1(nd.relu(self.bn1(x)))
#         print('out.shape:', out.shape)
        
        out = self.conv2(nd.relu(self.bn2(x)))
#         print('out.shape:', out.shape)
        
        if not self.same_shape:
            x = self.conv3(x)
#             print('x.shape:', x.shape)
            
        return out + x

In [3]:
# 输入输出通道相同
blk = ResidualIdentity(3)
blk.initialize()

x = nd.random.uniform(shape=(4,3,96,96))
y = blk(x)
print('y.shape:', y.shape)
print(blk)

y.shape: (4, 3, 96, 96)
ResidualIdentity(
  (conv1): Conv2D(3 -> 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=3)
  (conv2): Conv2D(3 -> 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=3)
)


In [4]:
# 输入输出通道不同
blk2 = ResidualIdentity(8, same_shape=False)
blk2.initialize()
# print(blk2)
y2 = blk2(x)
print('y2.shape:',y2.shape)
print(blk2)

y2.shape: (4, 8, 48, 48)
ResidualIdentity(
  (conv1): Conv2D(3 -> 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=3)
  (conv2): Conv2D(3 -> 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bn2): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=3)
  (conv3): Conv2D(3 -> 8, kernel_size=(1, 1), stride=(2, 2))
)


In [5]:
class _ResidualBottleneck(nn.Block):
    def __init__(self, channels_in, channels_out, same_shape=True, **kwargs):
        super(_ResidualBottleneck, self).__init__(**kwargs)
        self.same_shape = same_shape
        strides = 1 if same_shape else 2
        self.conv1 = nn.Conv2D(channels_in, kernel_size=1,
                              strides=strides)
        self.bn1 = nn.BatchNorm()
        self.conv2 = nn.Conv2D(channels_in, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm()
        self.conv3 = nn.Conv2D(channels_out, kernel_size=1,
                              strides=1)
        self.bn3 = nn.BatchNorm()
        if not same_shape:
            self.conv4 = nn.Conv2D(channels_out, kernel_size=1,
                                  strides=strides)
            
#     def forward(self, x):
# #         print('same in / out shape:', self.same_shape)
# #         print('x.shape:', x.shape)
#         out = nd.relu(self.bn1(self.conv1(x)))
# #         print(out.shape)
#         out = nd.relu(self.bn2(self.conv2(out)))
# #         print(out.shape)
#         out = self.bn3(self.conv3(out))
# #         print(out.shape)
#         if not self.same_shape:
#             x = self.conv4(x)
# #             print('x.shape:',x.shape)
#         return nd.relu(out + x)
class ResidualIdentityBottleneck(_ResidualBottleneck):
    def __init__(self, channels_in, channels_out, same_shape=True, **kwargs):
        super(ResidualIdentityBottleneck, self).__init__(channels_in, channels_out, same_shape, **kwargs)
    
        def forward(self, x):
#             print('x.shape:', x.shape)

            out = self.conv1(nd.relu(self.bn1(x)))
#             print('out.shape:', out.shape)

            out = self.conv2(nd.relu(self.bn2(x)))
#             print('out.shape:', out.shape)

            out = self.conv3(nd.relu(self.bn3(x)))
#             print('out.shape:', out.shape)

            if not self.same_shape:
                x = self.conv3(x)
#                 print('x.shape:', x.shape)

            return out + x

In [6]:
# 构建ResNet
class ResNet18(nn.Block):
    def __init__(self, num_classes, verbose=False, **kwargs):
        super(ResNet18, self).__init__(**kwargs)
        self.verbose = verbose
        # add name_scope on the outermost Sequential
        with self.name_scope():
            # block 1
            b1 = nn.Conv2D(64, kernel_size=7, strides=2)
            # block 2
            b2 = nn.Sequential()
            b2.add(
                nn.MaxPool2D(pool_size=3, strides=2),
                ResidualIdentity(64),
                ResidualIdentity(64)
            )
            # block 3
            b3 = nn.Sequential()
            b3.add(
                ResidualIdentity(128, same_shape=False),
                ResidualIdentity(128)
            )
            # block 4
            b4 = nn.Sequential()
            b4.add(
                ResidualIdentity(256, same_shape=False),
                ResidualIdentity(256)
            )
            # block 5
            b5 = nn.Sequential()
            b5.add(
                ResidualIdentity(512, same_shape=False),
                ResidualIdentity(512)
            )
            # block 6
            b6 = nn.Sequential()
            b6.add(
                nn.AvgPool2D(pool_size=3),
                nn.Dense(num_classes)
            )
            # chain all blocks together
            self.net = nn.Sequential()
            self.net.add(b1, b2, b3, b4, b5, b6)
    
    def forward(self, x):
        out = x
        for i, b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print('Block %d output: %s' % (i+1, out.shape))
        return out

In [7]:
net = ResNet18(10, verbose=True)
net.initialize()

x = nd.random.uniform(shape=(4,3,96, 96))
y = net(x)
print(net)

Block 1 output: (4, 64, 45, 45)
Block 2 output: (4, 64, 22, 22)
Block 3 output: (4, 128, 11, 11)
Block 4 output: (4, 256, 6, 6)
Block 5 output: (4, 512, 3, 3)
Block 6 output: (4, 10)
ResNet18(
  (net): Sequential(
    (0): Conv2D(3 -> 64, kernel_size=(7, 7), stride=(2, 2))
    (1): Sequential(
      (0): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
      (1): ResidualIdentity(
        (conv1): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=64)
        (conv2): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, use_global_stats=False, in_channels=64)
      )
      (2): ResidualIdentity(
        (conv1): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=Fals

In [8]:
# 读取数据
import sys
sys.path.append('..')
import utils
from mxnet import autograd
from mxnet import gluon
from mxnet import nd
from mxnet import image
from mxnet import init

batch_size = 128
resize=224

ctx = utils.try_gpu(1)
net = ResNet18(10, verbose=False)
net.initialize(ctx=ctx, init=init.Xavier())

def transform(data, label):
#     print(data.shape)   # (28, 28, 1)
#     print(label.shape)    # (1,)
# change data from batch x height x width x channel
# to batch x channel x height x width
#     return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
    data = image.imresize(data, resize, resize)
#     data = new_data
            
    return nd.transpose(data.astype('float32'), (2,0,1))/255, label.astype('float32')
#     return data.astype('float32') / 255, label.astype('float32')
mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

In [9]:
ctx = utils.try_gpu(1)
ctx

gpu(1)

In [None]:
# 训练
import time
from mxnet import gluon

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainable_params = net.collect_params()
print(trainable_params)

trainer = gluon.Trainer(trainable_params, 'sgd', {
    'learning_rate': 0.05
})

for epoch in range(10):
    time_start = time.time()
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data.as_in_context(ctx))
            loss = softmax_cross_entropy(output, label)
        
        b1_params = net.b1.params
        print(b1_params)
        
        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)
    test_acc = utils.evaluate_accuracy(test_data, net, ctx)
    print("Epoch %d. Loss: %.4f, Train acc %.4f, Test acc %.4f, Time %.0f sec" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc, time.time() - time_start))