### 使用Gluon来实现线性回归

#### 创建数据集

In [1]:
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon

num_inputs = 2
num_examples = 1000

true_w = [2, -3.4]
true_b = 4.2

X = nd.random_normal(shape=(num_examples, num_inputs))
# print('X[:, 0]', X[:, 0])
# print('X[:, 1]', X[:, 1])
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += .01 * nd.random_normal(shape=y.shape)

#### 数据读取

In [2]:
batch_size = 10
dataset = gluon.data.ArrayDataset(X, y)
data_iter = gluon.data.DataLoader(dataset, batch_size, shuffle=True)

In [3]:
for data, label in data_iter:
    print(data, label)
    break


[[ 1.43091547 -0.01514311]
 [ 0.18769865 -0.52407396]
 [-0.08445202 -0.53799033]
 [ 2.00276661  0.04691195]
 [ 0.32510808 -1.30023408]
 [-0.01416099  0.27084762]
 [ 0.21322168 -2.43881702]
 [ 1.97054493  1.07400227]
 [ 0.71944642 -0.12684734]
 [-0.18353732  0.58039618]]
<NDArray 10x2 @cpu(0)> 
[  7.1033082    6.34757566   5.85545254   8.04833603   9.26425552
   3.24432778  12.92499447   4.48208475   6.0792551    1.84583116]
<NDArray 10 @cpu(0)>


#### 定义模型

In [4]:
net = gluon.nn.Sequential() # 定义一个空的模型

In [5]:
#添加网络层数
net.add(gluon.nn.Dense(1)) # 添加一个Dense层，在Gluon中，线性模型使用的是Dense层，Dense唯一必须定义的参数是输出节点的个数，在线性模型里面是1

In [6]:
# 初始化模型的权重
net.initialize() #使用默认的随机初始化方法

In [7]:
# 损失函数
square_loss = gluon.loss.L2Loss() #使用Gluon内置的平方误差损失函数

In [8]:
# 优化
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

#### 训练

In [10]:
epochs = 5
batch_size = 10
for e in range(epochs):
    total_loss = 0
    for data, label in data_iter:
        with autograd.record():
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()
        trainer.step(batch_size)
        total_loss += nd.sum(loss).asscalar()
    print('Epoch %d, average loss: %f' %(e, total_loss / num_examples))

Epoch 0, average loss: 0.906724
Epoch 1, average loss: 0.000051
Epoch 2, average loss: 0.000051
Epoch 3, average loss: 0.000051
Epoch 4, average loss: 0.000051


In [13]:
dense = net[0]

In [14]:
true_w, dense.weight.data()

([2, -3.4], 
 [[ 1.99963272 -3.39924645]]
 <NDArray 1x2 @cpu(0)>)

In [15]:
true_b, dense.bias.data()

(4.2, 
 [ 4.19937277]
 <NDArray 1 @cpu(0)>)

In [16]:
help(trainer.step)

Help on method step in module mxnet.gluon.trainer:

step(batch_size, ignore_stale_grad=False) method of mxnet.gluon.trainer.Trainer instance
    Makes one step of parameter update. Should be called after
    `autograd.compute_gradient` and outside of `record()` scope.
    
    Parameters
    ----------
    batch_size : int
        Batch size of data processed. Gradient will be normalized by `1/batch_size`.
        Set this to 1 if you normalized loss manually with `loss = mean(loss)`.
    ignore_stale_grad : bool, optional, default=False
        If true, ignores Parameters with stale gradient (gradient that has not
        been updated by `backward` after last step) and skip update.



In [17]:
help(dense.weight)

Help on Parameter in module mxnet.gluon.parameter object:

class Parameter(builtins.object)
 |  A Container holding parameters (weights) of Blocks.
 |  
 |  :py:class:`Parameter` holds a copy of the parameter on each :py:class:`Context` after
 |  it is initialized with ``Parameter.initialize(...)``. If :py:attr:`grad_req` is
 |  not ``'null'``, it will also hold a gradient array on each :py:class:`Context`::
 |  
 |      ctx = mx.gpu(0)
 |      x = mx.nd.zeros((16, 100), ctx=ctx)
 |      w = mx.gluon.Parameter('fc_weight', shape=(64, 100), init=mx.init.Xavier())
 |      b = mx.gluon.Parameter('fc_bias', shape=(64,), init=mx.init.Zero())
 |      w.initialize(ctx=ctx)
 |      b.initialize(ctx=ctx)
 |      out = mx.nd.FullyConnected(x, w.data(ctx), b.data(ctx), num_hidden=64)
 |  
 |  Parameters
 |  ----------
 |  name : str
 |      Name of this parameter.
 |  grad_req : {'write', 'add', 'null'}, default 'write'
 |      Specifies how to update gradient to grad arrays.
 |  
 |      - ``'wr