In [1]:
#-*-coding:utf-8-*- # 如果使用py文件，需声明此文件为 UTF-8 格式，这样可以使用中文注释。如果使用ipynb文件，则无需此行

import logging
import math
import random
import mxnet as mx # 导入 MXNet 库
import numpy as np # 导入 NumPy 库，这是 Python 常用的科学计算库

In [2]:
# 打开调试信息的显示
logging.getLogger().setLevel(logging.DEBUG) 

In [3]:
# 设置超参数
n_sample = 10000 # 训练用的数据点个数
batch_size = 10 # 批大小
learning_rate = 0.1 # 学习速率
n_epoch = 10 # 训练 epoch 数

In [4]:
# 数据生成器 和 迭代器
# 每个数据点是在 (0,1) 之间的 2 个随机数
train_in = [[ random.uniform(0, 1) for c in range(2)] for n in range(n_sample)] 

train_out = [0 for n in range(n_sample)] # 期望输出，先初始化为 0

for i in range(n_sample):
    # 每个数据点的期望输出是 2 个输入数中的大者
    train_out[i] = max(train_in[i][0], train_in[i][1])

train_iter = mx.io.NDArrayIter(data = np.array(train_in), label = {'reg_label':np.array(train_out)}, batch_size = batch_size, shuffle = True)

In [8]:
# 定义神经网络结构， 有两个隐层， 每个隐层有10个神经元, Relu非线性
src = mx.sym.Variable('data') # 输入层
fc1  = mx.sym.FullyConnected(data = src, num_hidden = 3, name = 'fc1') # 全连接层
act1 = mx.sym.Activation(data = fc1, act_type = "relu", name = 'act1') # ReLU层
fc2  = mx.sym.FullyConnected(data = act1, num_hidden = 3, name = 'fc2') # 全连接层
act2 = mx.sym.Activation(data = fc2, act_type = "relu", name = 'act2') # ReLU层
fc3  = mx.sym.FullyConnected(data = act2, num_hidden = 1, name = 'fc3') # 全连接层
net = mx.sym.LinearRegressionOutput(data = fc3, name = 'reg') # 输出层

module = mx.mod.Module(symbol = net, label_names = (['reg_label']))

In [9]:
# 训练模型
module.fit(
    train_iter, # 训练数据的迭代器
    eval_data = None, # 在此只训练，不使用测试数据
    eval_metric = mx.metric.create('mse'), # 输出 MSE 损失信息
    # 将权重和偏置初始化为在[-0.5, 0.5]间均匀的随机数 
    initializer = mx.initializer.Uniform(0.5), 
    optimizer = 'sgd', # 梯度下降算法为 SGD
    # 设置学习速率
    optimizer_params = {'learning_rate': learning_rate}, 
    num_epoch = n_epoch, # 训练 epoch 数
    batch_end_callback = None, # 减少输出信息
    epoch_end_callback = None # 减少输出信息
)

INFO:root:Epoch[0] Train-mse=0.023414
INFO:root:Epoch[0] Time cost=0.716
INFO:root:Epoch[1] Train-mse=0.005282
INFO:root:Epoch[1] Time cost=0.698
INFO:root:Epoch[2] Train-mse=0.000843
INFO:root:Epoch[2] Time cost=0.664
INFO:root:Epoch[3] Train-mse=0.000063
INFO:root:Epoch[3] Time cost=0.668
INFO:root:Epoch[4] Train-mse=0.000010
INFO:root:Epoch[4] Time cost=0.658
INFO:root:Epoch[5] Train-mse=0.000005
INFO:root:Epoch[5] Time cost=0.647
INFO:root:Epoch[6] Train-mse=0.000003
INFO:root:Epoch[6] Time cost=0.658
INFO:root:Epoch[7] Train-mse=0.000002
INFO:root:Epoch[7] Time cost=0.652
INFO:root:Epoch[8] Train-mse=0.000002
INFO:root:Epoch[8] Time cost=0.711
INFO:root:Epoch[9] Train-mse=0.000001
INFO:root:Epoch[9] Time cost=0.665


In [7]:
for k in module.get_params(): # 对于所有参数…
    print(k) # 输出参数

{'fc1_weight': 
[[-0.01184775  0.16463038]
 [ 0.27909833  0.48233786]
 [-0.00098594  0.47086403]
 [ 0.02674723  0.44647846]
 [-0.08945666  0.05756824]
 [ 0.25086388 -0.13035302]
 [-0.0624128  -0.20246539]
 [ 0.67430329 -0.64340603]
 [ 0.25227246 -0.29551166]
 [-0.11655849 -0.02233487]]
<NDArray 10x2 @cpu(0)>, 'fc1_bias': 
[ 0.00535045  0.04567979  0.00130494  0.01639411  0.10619283  0.12758821
  0.         -0.01135134 -0.21885937  0.        ]
<NDArray 10 @cpu(0)>, 'fc2_weight': 
[[ 0.31813049  0.37764475  0.08855581  0.03124433  0.07548539 -0.11491889
   0.42559665  0.41615865 -0.39416015 -0.16260383]
 [-0.40435246  0.17725928 -0.46859649 -0.11560615  0.3497932   0.4769921
   0.27815676 -0.42607126  0.36124319  0.37008727]
 [ 0.50743699  0.04484019  0.36397478  0.35658357 -0.03057637  0.01213695
   0.2805292   0.26668847 -0.34359515  0.22063267]
 [ 0.13464035  0.06703536 -0.38952747  0.00824052  0.44123992  0.28398368
   0.02184832 -0.4565787  -0.09770405 -0.02639958]
 [-0.21842466 -0.