测试梯度更新算法可行性
以及for循环遍历神经网络层可行性

In [1]:
from mxnet import gluon
from mxnet.gluon import nn
from mxnet import autograd as ag
from mxnet import ndarray as nd
from mxnet.gluon import loss 
import copy

def LeNet_(activation='relu'):
    # 获取一个结构定义完整的LeNet卷积神经网络
    # 激活函数可自选 默认为sigmoid
    net = nn.Sequential()
    net.add(nn.Conv2D(channels=6, kernel_size=(5,5), activation=activation),
            nn.MaxPool2D(pool_size=(2,2), strides=(2,2)),
            nn.Conv2D(channels=16, kernel_size=(5,5), activation=activation),
            nn.MaxPool2D(pool_size=(2,2), strides=(2,2)),
            # Dense会默认将(批量大小， 通道， 高， 宽)形状的输入转换成
            # (批量大小， 通道 * 高 * 宽)形状的输入
            nn.Dense(120, activation=activation),
            nn.Dense(84, activation=activation),
            nn.Dense(10))
    return net

In [2]:
import mxnet as mx
net = LeNet_()
input_shape = (1,1,28,28)
ctx = [mx.gpu()]
mx.random.seed(42)
net.load_parameters("recv_model.params",ctx=ctx)
"""
net.initialize(mx.init.Xavier(magnitude=2.24),ctx=ctx)
_ = net(nd.random.uniform(shape=input_shape,ctx=ctx[0]))
"""

'\nnet.initialize(mx.init.Xavier(magnitude=2.24),ctx=ctx)\n_ = net(nd.random.uniform(shape=input_shape,ctx=ctx[0]))\n'

In [3]:
#验证
#val_x,val_y = val_data_set[0],val_data_set[1]
#val_data = mx.io.NDArrayIter(val_x,val_y,batch_size=100)
mnist = mx.test_utils.get_mnist()
val_data = mx.io.NDArrayIter(mnist['test_data'],mnist['test_label'],batch_size=100)    
for batch in val_data:
    data = gluon.utils.split_and_load(batch.data[0],ctx_list=ctx,batch_axis=0)
    label = gluon.utils.split_and_load(batch.label[0],ctx_list=ctx,batch_axis=0)
    outputs = []
    metric = mx.metric.Accuracy()
    for x in data:
        outputs.append(net(x))
    metric.update(label,outputs)
print('验证集准确率 validation acc:%s=%f'%metric.get())

验证集准确率 validation acc:accuracy=0.120000


In [4]:

def init_gradient(net,local_gradient=[]):
    local_gradient['weight'].clear()
    local_gradient['bias'].clear()
    for layer in net:
        try:
            shape_w = layer.weight.data().shape
            shape_b = layer.bias.data().shape
        except:
            continue
        local_gradient['weight'].append(nd.zeros(shape=shape_w,ctx=ctx[0]))
        local_gradient['bias'].append(nd.zeros(shape=shape_b,ctx=ctx[0]))
    
def collect_gradient(net, local_gradient,batch_size):
    idx = 0
    for layer in net:
        try:
            grad_w = layer.weight.data().grad
            grad_b = layer.bias.data().grad
        except:
            continue
        local_gradient['weight'][idx] = local_gradient['weight'][idx] + grad_w.as_in_context(local_gradient['weight'][idx].context)/batch_size
        local_gradient['bias'][idx] = local_gradient['bias'][idx] + grad_b.as_in_context(local_gradient['bias'][idx].context)/batch_size
        idx+=1

def updata_gradient(net,gradient_info,learning_rate):
    # 由Client回传的梯度信息 更新Server模型
    idx = 0
    grad_w = gradient_info['weight']
    grad_b = gradient_info['bias']
    update_flag = False
    for layer in net:
        try:
            layer.weight.data()[:] = layer.weight.data()[:] - learning_rate*grad_w[idx]
            #layer.weight.set_data(layer.weight.data()[:] - learning_rate*gradient_info[idx])
            layer.bias.data()[:] = layer.bias.data()[:] - learning_rate*grad_b[idx]
        except:
            continue
        idx += 1
    """
    if update_flag:
        print("-gradient successfully updated-")
    else:
        print("-gradient failure-")
    """


In [5]:
#训练
train_data = mx.io.NDArrayIter(mnist['train_data'],mnist['train_label'],batch_size=100) 
epoch = 10
metric = mx.metric.Accuracy()
smc_loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.02})
#初始化梯度
gradient_info = {'weight':[],'bias':[]}


In [6]:
# 正常训练
epoch = 10
origin_net = copy.deepcopy(net)
init_gradient(origin_net,gradient_info)
for i in range(epoch):
    train_data.reset()
    for batch in train_data:
        #init_gradient(origin_net,gradient_info)
        data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
        outputs = []
        with ag.record():
            for x,y in zip(data,label):
                z = net(x)
                loss = smc_loss(z, y)
                loss.backward()
                outputs.append(z)
        #收集梯度
        collect_gradient(net,gradient_info,batch_size=batch.data[0].shape[0])
        #updata_gradient(origin_net,gradient_info,learning_rate=0.02)
        metric.update(label,outputs)
        trainer.step(batch.data[0].shape[0])
    name,acc = metric.get()
    metric.reset()
    print('training acc at epoch %d, %s=%f'%(i,name,acc))
    # YA DA ZE
#updata_gradient(origin_net, gradient_info, learning_rate=0.02)

training acc at epoch 0, accuracy=0.727650
training acc at epoch 1, accuracy=0.941783
training acc at epoch 2, accuracy=0.959367
training acc at epoch 3, accuracy=0.967683
training acc at epoch 4, accuracy=0.972750
training acc at epoch 5, accuracy=0.976033
training acc at epoch 6, accuracy=0.978650
training acc at epoch 7, accuracy=0.980533
training acc at epoch 8, accuracy=0.981917
training acc at epoch 9, accuracy=0.983383


In [7]:
grad_w = gradient_info["weight"]

In [8]:
grad_w[0]


[[[[-3.18599200e+00 -9.34846783e+00 -1.43530703e+01 -1.30739603e+01
    -6.91845226e+00]
   [-6.50202084e+00 -1.23138132e+01 -1.50071907e+01 -1.15951147e+01
    -5.85215616e+00]
   [-3.97536135e+00 -6.17571688e+00 -5.38680601e+00 -2.99157929e+00
    -2.00498986e+00]
   [ 1.79946810e-01 -1.55205786e+00  2.14468345e-01  3.80625248e+00
     3.97456408e+00]
   [ 2.21792817e+00  3.17987680e-01  1.92008662e+00  5.72566271e+00
     5.53352594e+00]]]


 [[[ 5.01327562e+00  7.39579439e+00  3.82502556e+00 -2.99571371e+00
    -5.14984655e+00]
   [ 5.84246397e+00  4.46708059e+00 -3.57500005e+00 -1.22951460e+01
    -1.16523972e+01]
   [-3.40581346e+00 -1.05188780e+01 -2.17105751e+01 -2.86208515e+01
    -1.83223839e+01]
   [-1.05427876e+01 -2.17182331e+01 -3.11569157e+01 -3.06445999e+01
    -1.52490826e+01]
   [-8.62746525e+00 -1.48311291e+01 -1.79878521e+01 -1.57289419e+01
    -7.99687719e+00]]]


 [[[-2.41066265e+00 -4.00162554e+00 -2.72957897e+00 -1.44467628e+00
    -2.50280118e+00]
   [ 1.26913

In [9]:
(origin_net[0].weight.data()[:] - net[0].weight.data()[:])/0.02


[[[[-3.1859987e+00 -9.3484316e+00 -1.4353036e+01 -1.3073931e+01
    -6.9184380e+00]
   [-6.5020003e+00 -1.2313809e+01 -1.5007193e+01 -1.1595118e+01
    -5.8521600e+00]
   [-3.9753647e+00 -6.1757288e+00 -5.3867888e+00 -2.9915776e+00
    -2.0050013e+00]
   [ 1.7995983e-01 -1.5520632e+00  2.1447539e-01  3.8062532e+00
     3.9745746e+00]
   [ 2.2179322e+00  3.1797850e-01  1.9200860e+00  5.7256589e+00
     5.5335474e+00]]]


 [[[ 5.0132852e+00  7.3957839e+00  3.8250208e+00 -2.9956918e+00
    -5.1498432e+00]
   [ 5.8424864e+00  4.4670734e+00 -3.5749912e+00 -1.2295164e+01
    -1.1652373e+01]
   [-3.4058161e+00 -1.0518911e+01 -2.1710587e+01 -2.8620882e+01
    -1.8322351e+01]
   [-1.0542776e+01 -2.1718225e+01 -3.1156988e+01 -3.0644632e+01
    -1.5249116e+01]
   [-8.6274700e+00 -1.4831153e+01 -1.7987886e+01 -1.5728939e+01
    -7.9968963e+00]]]


 [[[-2.4106576e+00 -4.0016222e+00 -2.7295852e+00 -1.4446833e+00
    -2.5027902e+00]
   [ 1.2687966e-02 -5.5968779e-01 -2.9167235e-01 -6.4668840e-01
   

In [7]:
val_data.reset()
for batch in val_data:
    data = gluon.utils.split_and_load(batch.data[0],ctx_list=ctx,batch_axis=0)
    label = gluon.utils.split_and_load(batch.label[0],ctx_list=ctx,batch_axis=0)
    outputs = []
    metric = mx.metric.Accuracy()
    for x in data:
        outputs.append(net(x))
    metric.update(label,outputs)
print('验证集准确率 validation acc:%s=%f'%metric.get())

验证集准确率 validation acc:accuracy=0.990000
