In [1]:
# mini-batch SGD训练下
# 梯度传输

from mxnet import gluon
from mxnet.gluon import nn
from mxnet import autograd as ag
from mxnet import ndarray as nd
from mxnet.gluon import loss 
import copy

def LeNet_(activation='relu'):
    # 获取一个结构定义完整的LeNet卷积神经网络
    # 激活函数可自选 默认为sigmoid
    net = nn.Sequential()
    net.add(nn.Conv2D(channels=6, kernel_size=(5,5), activation=activation),
            nn.MaxPool2D(pool_size=(2,2), strides=(2,2)),
            nn.Conv2D(channels=16, kernel_size=(5,5), activation=activation),
            nn.MaxPool2D(pool_size=(2,2), strides=(2,2)),
            # Dense会默认将(批量大小， 通道， 高， 宽)形状的输入转换成
            # (批量大小， 通道 * 高 * 宽)形状的输入
            nn.Dense(120, activation=activation),
            nn.Dense(84, activation=activation),
            nn.Dense(10))
    return net

In [2]:
import mxnet as mx
net = LeNet_()
input_shape = (1,1,28,28)
ctx = [mx.gpu()]
mx.random.seed(42)
net.initialize(mx.init.Xavier(magnitude=2.24),ctx=ctx)
_ = net(nd.random.uniform(shape=input_shape,ctx=ctx[0]))

In [3]:
#验证
mnist = mx.test_utils.get_mnist()
val_data = mx.io.NDArrayIter(mnist['test_data'],mnist['test_label'],batch_size=100)    
for batch in val_data:
    data = gluon.utils.split_and_load(batch.data[0],ctx_list=ctx,batch_axis=0)
    label = gluon.utils.split_and_load(batch.label[0],ctx_list=ctx,batch_axis=0)
    outputs = []
    metric = mx.metric.Accuracy()
    for x in data:
        outputs.append(net(x))
    metric.update(label,outputs)
print('初始模型验证准确率 validation acc:%s=%f'%metric.get())

初始模型验证准确率 validation acc:accuracy=0.100000


In [4]:

def grad_dict():
    grad_dict = {}
    grad_dict['weight'] = []
    grad_dict['bias'] = []
    """
    for layer in net:
        try:
            shape_w = layer.weight.data().shape
            shape_b = layer.bias.data().shape
        except:
            continue
        grad_dict['weight'].append(nd.zeros(shape=shape_w,ctx=ctx[0]))
        grad_dict['bias'].append(nd.zeros(shape=shape_b,ctx=ctx[0]))
    """
    return grad_dict

def collect_gradient(net,grad_dict_list,batch_size):
    #idx = 0
    grad_dic = grad_dict()
    for layer in net:
        try:
            grad_w = layer.weight.data().grad
            grad_b = layer.bias.data().grad
        except:
            continue
        grad_dic["weight"].append(grad_w/batch_size)
        grad_dic["bias"].append(grad_b/batch_size)
        #idx+=1
    grad_dict_list.append(grad_dic)

def updata_gradient(net,grad_dict_list,learning_rate):
    # 由Client回传的梯度信息 更新Server模型
    idx = 0
    for grad_dic in grad_dict_list:
        grad_w = grad_dict['weight']
        grad_b = grad_dict['bias']
        update_flag = False
        for layer in net:
            try:
                layer.weight.data()[:] = layer.weight.data()[:] - learning_rate*grad_w[idx]
                #layer.weight.set_data(layer.weight.data()[:] - learning_rate*gradient_info[idx])
                layer.bias.data()[:] = layer.bias.data()[:] - learning_rate*grad_b[idx]
            except:
                continue
            idx += 1
    """
    if update_flag:
        print("-gradient successfully updated-")
    else:
        print("-gradient failure-")
    """


In [5]:
#训练
train_data = mx.io.NDArrayIter(mnist['train_data'],mnist['train_label'],batch_size=100) 
epoch = 10
metric = mx.metric.Accuracy()
smc_loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.02})

In [6]:
# 正常训练
origin_net = copy.deepcopy(net)
grad_dict_list = []
for i in range(epoch):
    train_data.reset()
    for batch in train_data:
        data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
        outputs = []
        with ag.record():
            for x,y in zip(data,label):
                z = net(x)
                loss = smc_loss(z, y)
                loss.backward()
                outputs.append(z)
        #收集梯度
        collect_gradient(net,grad_dict_list,batch_size=batch.data[0].shape[0])
        #updata_gradient(origin_net,gradient_info,learning_rate=0.02)
        metric.update(label,outputs)
        trainer.step(batch.data[0].shape[0])
    name,acc = metric.get()
    metric.reset()
    print('training acc at epoch %d, %s=%f'%(i,name,acc))
    # YA DA ZE
updata_gradient(origin_net, grad_dict_list, learning_rate=0.02)

UnboundLocalError: local variable 'grad_dict' referenced before assignment