实验MLP和LeNet在集中数据集训练下的模型收敛情况

In [1]:
import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn
from mxnet import autograd as ag
from mxnet import ndarray as nd
from Algorithm.CNN import CNN_Model
from Algorithm.MLP import MLP
from Tools import utils

In [2]:
LeNet = CNN_Model('LeNet')
mlp = MLP()
ctx = utils.try_all_gpus()

In [3]:
LeNet.initialize(init=mx.initializer.Xavier(),ctx=ctx)
mlp.initialize(init=mx.initializer.Xavier(),ctx=ctx)
mnist = mx.test_utils.get_mnist()
batch_size = 100
train_data = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
val_data = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)

In [4]:
def validation(val_data, net):
    metric = mx.metric.Accuracy()
    val_data.reset()
    for batch in val_data:
        data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
        outputs = []
        for x in data:
            outputs.append(net(x))
        metric.update(label, outputs)
    name,acc =metric.get()
    print('validation acc: %s=%f'%(name,acc))
    return acc

def train(net, data, epoch, stop_acc):
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.02})
    metric = mx.metric.Accuracy()
    softmax_cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
    for i in range(epoch):
        train_data.reset()
        for batch in train_data:
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            outputs = []
            with ag.record():
                for x, y in zip(data, label):
                    z = net(x)
                    loss = softmax_cross_entropy_loss(z, y)
                    loss.backward()
                    outputs.append(z)
            metric.update(label, outputs)
            trainer.step(batch.data[0].shape[0])
        name, acc = metric.get()
        metric.reset()
        print('training acc at epoch %d: %s=%f'%(i, name, acc))

def train_till_acc(net, stop_acc, lr, batch_size):
    train_data = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
    val_data = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    softmax_cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
    i=0
    while(True):
        train_data.reset()
        for batch in train_data:
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            outputs = []
            with ag.record():
                for x, y in zip(data, label):
                    z = net(x)
                    loss = softmax_cross_entropy_loss(z, y)
                    loss.backward()
                    outputs.append(z)
            metric.update(label, outputs)
            trainer.step(batch.data[0].shape[0])
        acc = validation(val_data,net)
        if acc >= stop_acc:
            break
        else:
            i += 1
            print('training acc at epoch %d: acc=%f'%(i, acc))

In [5]:
train(mlp, train_data, 1)

training acc at epoch 0: accuracy=0.794767


In [6]:
validation(val_data, mlp)

validation acc: accuracy=0.897000


0.897

In [7]:
train(LeNet,train_data,1)

training acc at epoch 0: accuracy=0.737350


In [8]:
validation(val_data, LeNet)

validation acc: accuracy=0.919700


0.9197

In [10]:
mlp[0].weight.data()


[[ 0.0079186   0.01506139  0.03490832 ...  0.04762334 -0.05183759
  -0.02541927]
 [ 0.06057825 -0.07930771  0.07600733 ... -0.03930367  0.02262817
  -0.05705202]
 [-0.04488811 -0.02812008  0.00375759 ... -0.03395101 -0.00274882
  -0.0018217 ]
 ...
 [ 0.02491552 -0.04777068 -0.06245412 ... -0.05506866  0.00573815
  -0.04120179]
 [ 0.01849365  0.06660184  0.06971296 ... -0.01355646 -0.01250703
   0.03720766]
 [ 0.05493402  0.01987972  0.0336106  ...  0.02365118 -0.05372665
  -0.0321993 ]]
<NDArray 128x784 @gpu(0)>

In [13]:
zero = nd.zeros(shape=mlp[0].weight.data().shape,ctx=ctx[0])

In [14]:
mlp[0].weight.data()[:] = zero[:]

In [15]:
mlp[0].weight.data()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 128x784 @gpu(0)>

In [16]:
for layer in LeNet:
    try:
        zero_w = nd.zeros(shape=layer.weight.data().shape,ctx=self.__ctx[0])
        layer.weight.data()[:] = zero_w[:]
        zero_b = nd.zeros(shape=layer.bias.data().shape,ctx=self.__ctx[0])
        layer.bias.data()[:] = zero_b[:]
    except:
        continue

In [20]:
LeNet[0].weight.data().shape

(6, 1, 5, 5)

In [22]:
z = nd.zeros(shape=LeNet[0].weight.data().shape,ctx=ctx[0])

In [23]:
LeNet[0].weight.data()[:] = z