# 卷积神经网络(CNN)

In [1]:
import mxnet as mx
from mxnet import autograd,init,nd,gluon
import d2lzh as d2l
import sys
import os
import time
from mxnet.gluon import nn,data as gdata,loss as gloss

In [2]:
net = nn.Sequential()
net.add(nn.Conv2D(channels=6,kernel_size=5,activation='sigmoid'),
        nn.MaxPool2D(pool_size=2,strides=2),
        nn.Conv2D(channels=16,kernel_size=5,activation='sigmoid'),
        nn.MaxPool2D(pool_size=2,strides=2),
        
        nn.Dense(120,activation='sigmoid'),
        nn.Dense(84,activation='sigmoid'),
        nn.Dense(10)
        )

In [3]:
X = nd.random.uniform(shape=(1,1,28,28))
net.initialize(force_reinit=True)
for layer in net:
    X = layer(X)
    print(layer.name,"output shape:\t",X.shape)

conv0 output shape:	 (1, 6, 24, 24)
pool0 output shape:	 (1, 6, 12, 12)
conv1 output shape:	 (1, 16, 8, 8)
pool1 output shape:	 (1, 16, 4, 4)
dense0 output shape:	 (1, 120)
dense1 output shape:	 (1, 84)
dense2 output shape:	 (1, 10)


## 获取数据和训练模型

In [4]:
lr,num_epochs = 0.9,5
batch_size = 256
ctx = d2l.try_gpu()
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)
net.initialize(force_reinit=True,ctx=ctx,init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
d2l.train_ch5(net,train_iter,test_iter,batch_size,trainer,ctx,num_epochs)

training on gpu(0)
epoch 1, loss 2.3201, train acc 0.100, test acc 0.100, time 6.3 sec
epoch 2, loss 1.9582, train acc 0.248, test acc 0.559, time 6.8 sec
epoch 3, loss 0.9691, train acc 0.617, test acc 0.682, time 9.0 sec
epoch 4, loss 0.7613, train acc 0.704, test acc 0.732, time 6.6 sec
epoch 5, loss 0.6685, train acc 0.735, test acc 0.756, time 7.6 sec


In [5]:
def train_ch5(net,train_iter,test_iter,batch_size,trainer,ctx,num):
    print("training on ",ctx)
    loss = gloss.SoftmaxCELoss()
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,start = 0.0,0.0,0,time.time()
        for X,y in train_iter:
            X,y = X.as_in_context(ctx),y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat,y).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter,net,ctx)
        print('epoch %d, loss %.4f, train_acc %.3f, test acc %.3f, time %.1f sec'
                 % (epoch+1,train_l_sum/n,train_acc_sum/n,test_acc,time.time()-start))

In [None]:
lr,num_epochs =0.9,5
net.initialize(force_reinit=True,ctx=ctx,init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})


In [23]:
def show():
    for X,y in test_iter:
        break
    true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())
    pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
    titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]
    d2l.show_fashion_mnist(X[0:9], titles[0:9])

### 使用GPU训练

In [21]:
d2l.train_ch5(net,train_iter,test_iter,batch_size,trainer,ctx,num_epochs)

training on gpu(0)
epoch 1, loss 1.1577, train acc 0.546, test acc 0.687, time 7.1 sec
epoch 2, loss 0.7997, train acc 0.691, test acc 0.731, time 7.2 sec
epoch 3, loss 0.6730, train acc 0.735, test acc 0.748, time 7.2 sec
epoch 4, loss 0.6215, train acc 0.756, test acc 0.779, time 7.9 sec
epoch 5, loss 0.5778, train acc 0.774, test acc 0.790, time 7.5 sec
