In [1]:

import sys
sys.path.insert(0, '..')
import gluonbook as gb
from mxnet import nd, init, gluon
from mxnet.gluon import nn

class Inception(nn.Block):
    # c1 - c4 为每条线路里的层的输出通道数。
    def __init__(self, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # 线路 1，单 1 x 1 卷积层。
        self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu')
        # 线路 2，1 x 1 卷积层后接 3 x 3 卷积层。
        self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu')
        self.p2_2 = nn.Conv2D(c2[1], kernel_size=3, padding=1,
                              activation='relu')
        # 线路 3，1 x 1 卷积层后接 5 x 5 卷积层。
        self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu')
        self.p3_2 = nn.Conv2D(c3[1], kernel_size=5, padding=2,
                              activation='relu')
        # 线路 4，3 x 3 最大池化层后接 1 x 1 卷积层。
        self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1)
        self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu')

    def forward(self, x):
        p1 = self.p1_1(x)
        p2 = self.p2_2(self.p2_1(x))
        p3 = self.p3_2(self.p3_1(x))
        p4 = self.p4_2(self.p4_1(x))
        # 在通道维上合并输出
        return nd.concat(p1, p2, p3, p4, dim=1)

  from ._conv import register_converters as _register_converters


In [2]:
b1 = nn.Sequential()
b1.add(
    nn.Conv2D(64, kernel_size=7, strides=2, padding=3, activation='relu'),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)

In [3]:

b2 = nn.Sequential()
b2.add(
    nn.Conv2D(64, kernel_size=1),
    nn.Conv2D(192, kernel_size=3, padding=1),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)

In [4]:
b3 = nn.Sequential()
b3.add(
    Inception(64, (96, 128), (16, 32), 32),
    Inception(128, (128, 192), (32, 96), 64),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)



In [5]:
b4 = nn.Sequential()
b4.add(
    Inception(192, (96, 208), (16, 48), 64),
    Inception(160, (112, 224), (24, 64), 64),
    Inception(128, (128, 256), (24, 64), 64),
    Inception(112, (144, 288), (32, 64), 64),
    Inception(256, (160, 320), (32, 128), 128),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)

In [6]:

b5 = nn.Sequential()
b5.add(
    Inception(256, (160, 320), (32, 128), 128),
    Inception(384, (192, 384), (48, 128), 128),
    nn.GlobalAvgPool2D()
)

net = nn.Sequential()
net.add(b1, b2, b3, b4, b5, nn.Dense(10))


In [7]:
# ctx = gb.try_gpu()
# X = nd.random.uniform(shape=(1,1,96,96), ctx=ctx)
# net.initialize( ctx=ctx, init=init.Xavier())
# for layer in net:
#     X = layer(X)
#     print(layer.name, 'output shape:\t', X.shape)

In [8]:

lr = 0.1
ctx = gb.try_gpu()
net.initialize( ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_data, test_data = gb.load_data_fashion_mnist(batch_size=170, resize=224)

loss = gluon.loss.SoftmaxCrossEntropyLoss()
gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=150)

training on gpu(0)
epoch 1, loss 2.0780, train acc 0.231, test acc 0.568, time 423.1 sec
epoch 2, loss 1.0638, train acc 0.589, test acc 0.742, time 414.0 sec
epoch 3, loss 0.6144, train acc 0.768, test acc 0.825, time 414.6 sec
epoch 4, loss 0.6369, train acc 0.769, test acc 0.699, time 411.1 sec
epoch 5, loss 0.4945, train acc 0.816, test acc 0.800, time 400.7 sec
epoch 6, loss 0.3922, train acc 0.853, test acc 0.871, time 413.9 sec
epoch 7, loss 0.3508, train acc 0.869, test acc 0.874, time 414.2 sec
epoch 8, loss 0.3257, train acc 0.878, test acc 0.871, time 414.3 sec
epoch 9, loss 0.4024, train acc 0.856, test acc 0.852, time 414.6 sec
epoch 10, loss 0.3160, train acc 0.881, test acc 0.896, time 414.7 sec
epoch 11, loss 0.2793, train acc 0.897, test acc 0.899, time 414.9 sec
epoch 12, loss 0.2634, train acc 0.902, test acc 0.906, time 414.3 sec
epoch 13, loss 0.2483, train acc 0.908, test acc 0.903, time 414.6 sec
epoch 14, loss 0.2378, train acc 0.911, test acc 0.898, time 414.3 

epoch 116, loss 2.3029, train acc 0.099, test acc 0.101, time 410.7 sec
epoch 117, loss 2.3029, train acc 0.099, test acc 0.100, time 410.3 sec
epoch 118, loss 2.3028, train acc 0.100, test acc 0.099, time 410.7 sec
epoch 119, loss 2.3029, train acc 0.099, test acc 0.100, time 410.7 sec
epoch 120, loss 2.3029, train acc 0.099, test acc 0.101, time 410.9 sec
epoch 121, loss 2.3029, train acc 0.098, test acc 0.099, time 410.8 sec
epoch 122, loss 2.3029, train acc 0.098, test acc 0.100, time 410.7 sec
epoch 123, loss 2.3029, train acc 0.100, test acc 0.099, time 411.0 sec
epoch 124, loss 2.3029, train acc 0.099, test acc 0.100, time 411.1 sec
epoch 125, loss 2.3029, train acc 0.099, test acc 0.100, time 410.5 sec
epoch 126, loss 2.3029, train acc 0.096, test acc 0.100, time 410.8 sec
epoch 127, loss 2.3029, train acc 0.099, test acc 0.100, time 411.1 sec
epoch 128, loss 2.3028, train acc 0.100, test acc 0.100, time 410.9 sec
epoch 129, loss 2.3028, train acc 0.099, test acc 0.100, time 41

KeyboardInterrupt: 

In [21]:
for X, y in test_data:
    break
    

In [82]:
import collections
x  = X[1:2].as_in_context(ctx)
print(x.shape)
# for layer in net:
#     try:
#         for sublayer in layer:
#             print(sublayer.collect_params(),"!!!!!")
#     except TypeError :
#         print(sublayer)
net.collect_params
#     print(layer.name,".................", ":\t\t", layer.collect_params(".*weight").get("conv15_weight"))
# layer = net[1]
for sub in layer:
    print(sub.weight.data())
    nd.where

(1, 1, 224, 224)

[[[[nan]]

  [[nan]]

  [[nan]]

  ...

  [[nan]]

  [[nan]]

  [[nan]]]


 [[[nan]]

  [[nan]]

  [[nan]]

  ...

  [[nan]]

  [[nan]]

  [[nan]]]


 [[[nan]]

  [[nan]]

  [[nan]]

  ...

  [[nan]]

  [[nan]]

  [[nan]]]


 ...


 [[[nan]]

  [[nan]]

  [[nan]]

  ...

  [[nan]]

  [[nan]]

  [[nan]]]


 [[[nan]]

  [[nan]]

  [[nan]]

  ...

  [[nan]]

  [[nan]]

  [[nan]]]


 [[[nan]]

  [[nan]]

  [[nan]]

  ...

  [[nan]]

  [[nan]]

  [[nan]]]]
<NDArray 64x64x1x1 @gpu(0)>

[[[[nan nan nan]
   [nan nan nan]
   [nan nan nan]]

  [[nan nan nan]
   [nan nan nan]
   [nan nan nan]]

  [[nan nan nan]
   [nan nan nan]
   [nan nan nan]]

  ...

  [[nan nan nan]
   [nan nan nan]
   [nan nan nan]]

  [[nan nan nan]
   [nan nan nan]
   [nan nan nan]]

  [[nan nan nan]
   [nan nan nan]
   [nan nan nan]]]


 [[[nan nan nan]
   [nan nan nan]
   [nan nan nan]]

  [[nan nan nan]
   [nan nan nan]
   [nan nan nan]]

  [[nan nan nan]
   [nan nan nan]
   [nan nan nan]]

  ...

  [[

AttributeError: 'MaxPool2D' object has no attribute 'weight'

In [28]:

lr = 0.1
ctx = gb.try_gpu()
net.initialize( ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_data, test_data = gb.load_data_fashion_mnist(batch_size=128, resize=96)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=150)

  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set for

  data = np.fromstring(fin.read(), dtype=np.uint8)


training on  gpu(0)
epoch 1, loss 0.5049, train acc 0.819, test acc 0.865, time 59.2 sec
epoch 2, loss 0.3455, train acc 0.869, test acc 0.873, time 59.2 sec
epoch 3, loss 0.3119, train acc 0.881, test acc 0.874, time 59.4 sec
epoch 4, loss 0.2891, train acc 0.890, test acc 0.878, time 59.5 sec
epoch 5, loss nan, train acc 0.477, test acc 0.100, time 58.8 sec
epoch 6, loss nan, train acc 0.100, test acc 0.100, time 58.7 sec
epoch 7, loss nan, train acc 0.100, test acc 0.100, time 58.7 sec
epoch 8, loss nan, train acc 0.100, test acc 0.100, time 58.8 sec
epoch 9, loss nan, train acc 0.100, test acc 0.100, time 58.6 sec
epoch 10, loss nan, train acc 0.100, test acc 0.100, time 58.6 sec
epoch 11, loss nan, train acc 0.100, test acc 0.100, time 58.8 sec
epoch 12, loss nan, train acc 0.100, test acc 0.100, time 58.8 sec
epoch 13, loss nan, train acc 0.100, test acc 0.100, time 58.7 sec


KeyboardInterrupt: 

In [7]:

lr = 0.05
ctx = gb.try_gpu()
net.initialize( ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_data, test_data = gb.load_data_fashion_mnist(batch_size=128, resize=96)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=150)

  label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
  data = np.fromstring(fin.read(), dtype=np.uint8)


training on  gpu(0)
epoch 1, loss 1.9175, train acc 0.327, test acc 0.674, time 65.0 sec
epoch 2, loss 0.6988, train acc 0.737, test acc 0.756, time 59.5 sec
epoch 3, loss 0.4983, train acc 0.812, test acc 0.845, time 59.7 sec
epoch 4, loss 0.4189, train acc 0.842, test acc 0.857, time 59.8 sec
epoch 5, loss 0.3732, train acc 0.858, test acc 0.867, time 59.8 sec
epoch 6, loss 0.3453, train acc 0.869, test acc 0.881, time 59.9 sec
epoch 7, loss 0.3228, train acc 0.877, test acc 0.878, time 60.0 sec
epoch 8, loss 0.3034, train acc 0.885, test acc 0.881, time 60.0 sec
epoch 9, loss 0.2886, train acc 0.890, test acc 0.893, time 59.8 sec
epoch 10, loss 0.2745, train acc 0.895, test acc 0.894, time 59.9 sec
epoch 11, loss 0.2630, train acc 0.900, test acc 0.890, time 60.0 sec
epoch 12, loss 0.2503, train acc 0.905, test acc 0.895, time 59.9 sec
epoch 13, loss 0.2415, train acc 0.908, test acc 0.898, time 59.9 sec
epoch 14, loss 0.2310, train acc 0.913, test acc 0.903, time 59.9 sec
epoch 15,

KeyboardInterrupt: 