In [1]:
# 更深的卷积神经网络：GoogLeNet
# http://zh.gluon.ai/chapter_convolutional-neural-networks/googlenet-gluon.html

from mxnet.gluon import nn
from mxnet import nd

class Inception(nn.Block):
    def __init__(self, n1_1, n2_1, n2_3, n3_1, n3_5, n4_1, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # path 1
        self.p1_conv_1 = nn.Conv2D(n1_1, kernel_size=1,
                                  activation='relu')
        # path 2
        self.p2_conv_1 = nn.Conv2D(n2_1, kernel_size=1,
                                  activation='relu')
        self.p2_conv_3 = nn.Conv2D(n2_3, kernel_size=3, padding=1,
                                  activation='relu')
        # path 3
        self.p3_conv_1 = nn.Conv2D(n3_1, kernel_size=1,
                                  activation='relu')
        self.p3_conv_5 = nn.Conv2D(n3_5, kernel_size=5, padding=2,
                                  activation='relu')
        # path 4
        self.p4_pool_3 = nn.MaxPool2D(pool_size=3, padding=1,
                                      strides=1)
        self.p4_conv_1 = nn.Conv2D(n4_1, kernel_size=1,
                                  activation='relu')
    
    def forward(self, x):
        p1 = self.p1_conv_1(x)
        p2 = self.p2_conv_3(self.p2_conv_1(x))
        p3 = self.p3_conv_5(self.p3_conv_1(x))
        p4 = self.p4_conv_1(self.p4_pool_3(x))
        return nd.concat(p1, p2, p3, p4, dim=1)

  from ._conv import register_converters as _register_converters


In [2]:
incp = Inception(64, 96, 128, 16, 32, 32)
incp.initialize()

x = nd.random.uniform(shape=(32, 3, 64, 64))
y = incp(x)
print(y.shape)
print(incp)

(32, 256, 64, 64)
Inception(
  (p1_conv_1): Conv2D(3 -> 64, kernel_size=(1, 1), stride=(1, 1))
  (p2_conv_1): Conv2D(3 -> 96, kernel_size=(1, 1), stride=(1, 1))
  (p2_conv_3): Conv2D(96 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (p3_conv_1): Conv2D(3 -> 16, kernel_size=(1, 1), stride=(1, 1))
  (p3_conv_5): Conv2D(16 -> 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (p4_pool_3): MaxPool2D(size=(3, 3), stride=(1, 1), padding=(1, 1), ceil_mode=False)
  (p4_conv_1): Conv2D(3 -> 32, kernel_size=(1, 1), stride=(1, 1))
)


In [3]:
# 定义GoogLeNet
class GoogLeNet(nn.Block):
    def __init__(self, num_classes, verbose=False, **kwargs):
        super(GoogLeNet, self).__init__(**kwargs)
        self.verbose = verbose
        # add name_scope on the outer most Sequential
        with self.name_scope():
            # block 1
            b1 = nn.Sequential()
            b1.add(
                nn.Conv2D(64, kernel_size=7, strides=2,
                         padding=3, activation='relu'),
                nn.MaxPool2D(pool_size=3, strides=2)
            )
            # block 2
            b2 = nn.Sequential()
            b2.add(
                nn.Conv2D(64, kernel_size=1),
                nn.Conv2D(192, kernel_size=3, padding=1),
                nn.MaxPool2D(pool_size=3, strides=2)
            )
            
            # block 3
            b3 = nn.Sequential()
            b3.add(
                Inception(64, 96, 128, 16, 32, 23),
                Inception(128, 128, 192, 32, 96, 64),
                nn.MaxPool2D(pool_size=3, strides=2)
            )
            
            # block 4
            b4 = nn.Sequential()
            b4.add(
                Inception(192, 96, 208, 16, 48, 64),
                Inception(160, 112, 224, 24, 64, 64),
                Inception(128, 128, 256, 24, 64, 64),
                Inception(112, 144, 288, 32, 64, 64),
                Inception(256, 160, 320, 32, 128, 128),
                nn.MaxPool2D(pool_size=3, strides=2)
            )
            
            # block 5
            b5 = nn.Sequential()
            b5.add(
                Inception(256, 160, 320, 32, 128, 128),
                Inception(384, 192, 384, 48, 128, 128),
                nn.AvgPool2D(pool_size=2)
            )
            
            # block 6
            b6 = nn.Sequential()
            b6.add(
                nn.Flatten(),
                nn.Dense(num_classes)
            )
            
            # chain blocks together
            self.net = nn.Sequential()
            self.net.add(b1, b2, b3, b4, b5, b6)
        
    def forward(self, x):
        out = x
        for i, b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print('Block %d output: %s' % (i+1, out.shape))
        return out

In [4]:
net = GoogLeNet(10, verbose=True)
net.initialize()

x = nd.random.uniform(shape=(4, 3, 96, 96))
y = net(x)

In [5]:
# 读取数据
import sys
sys.path.append('..')
import utils
from mxnet import autograd
from mxnet import gluon
from mxnet import nd
from mxnet import image
from mxnet import init

batch_size = 64
resize=224

ctx = utils.try_gpu()
net = GoogLeNet(10, verbose=False)

def transform(data, label):
#     print(data.shape)   # (28, 28, 1)
#     print(label.shape)    # (1,)
# change data from batch x height x width x channel
# to batch x channel x height x width
#     return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
    data = image.imresize(data, resize, resize)
#     data = new_data
            
    return nd.transpose(data.astype('float32'), (2,0,1))/255, label.astype('float32')
#     return data.astype('float32') / 255, label.astype('float32')
mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

gpu(0)
GoogLeNet(
  (net): Sequential(
    (0): Sequential(
      (0): Conv2D(1 -> 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (1): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
    )
    (1): Sequential(
      (0): Conv2D(64 -> 64, kernel_size=(1, 1), stride=(1, 1))
      (1): Conv2D(64 -> 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (2): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
    )
    (2): Sequential(
      (0): Inception(
        (p1_conv_1): Conv2D(192 -> 64, kernel_size=(1, 1), stride=(1, 1))
        (p2_conv_1): Conv2D(192 -> 96, kernel_size=(1, 1), stride=(1, 1))
        (p2_conv_3): Conv2D(96 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (p3_conv_1): Conv2D(192 -> 16, kernel_size=(1, 1), stride=(1, 1))
        (p3_conv_5): Conv2D(16 -> 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        (p4_pool_3): MaxPool2D(size=(3, 3), stride=(1, 1), padding=(1, 1), cei

In [6]:
# 训练
import time
from mxnet import gluon

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate': 0.1
})

for epoch in range(10):
    time_start = time.time()
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data.as_in_context(ctx))
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)
    test_acc = utils.evaluate_accuracy(test_data, net, ctx)
    print("Epoch %d. Loss: %.4f, Train acc %.4f, Test acc %.4f, Time %.0f sec" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc, time.time() - time_start))

Epoch 0. Loss: 1.1790, Train acc 0.5678, Test acc 0.8438, Time 190 sec
Epoch 1. Loss: 0.3865, Train acc 0.8589, Test acc 0.8769, Time 181 sec
Epoch 2. Loss: 0.3119, Train acc 0.8851, Test acc 0.8955, Time 185 sec
Epoch 3. Loss: 0.2748, Train acc 0.9000, Test acc 0.9007, Time 184 sec
Epoch 4. Loss: 0.2474, Train acc 0.9094, Test acc 0.9023, Time 181 sec
Epoch 5. Loss: 0.2282, Train acc 0.9154, Test acc 0.9162, Time 181 sec
Epoch 6. Loss: 0.2101, Train acc 0.9228, Test acc 0.9153, Time 182 sec
Epoch 7. Loss: 0.1954, Train acc 0.9267, Test acc 0.9184, Time 181 sec
Epoch 8. Loss: 0.1793, Train acc 0.9349, Test acc 0.9210, Time 184 sec
Epoch 9. Loss: 0.1662, Train acc 0.9387, Test acc 0.9244, Time 189 sec
