In [18]:
%matplotlib inline
from matplotlib_inline import backend_inline
from mxnet import autograd, np, npx
import matplotlib.pyplot as plt
import math
import random


npx.set_np()

x = np.arange(4.0)

# 通过调用attach_grad来为一个张量的梯度分配内存
x.attach_grad()
# 在计算关于x的梯度后，将能够通过'grad'属性访问它，它的值被初始化为0
x.grad
# 把代码放到autograd.record内，以建立计算图
with autograd.record():
    y = 2 * np.dot(x, x)
 
y.backward()
print(y)
print(x.grad)
'''
automatic differentiation
computational graph
backpropagate
'''
w = np.random.normal(0, 0.01, (2, 1))

b = np.zeros(1)
w.attach_grad()
b.attach_grad()

batch_size = 10

def synthetic_data(w, b, num_examples):  #@save
    """生成y=Xw+b+噪声"""
    X = np.random.normal(0, 1, (num_examples, len(w)))
    y = np.dot(X, w) + b
    y += np.random.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

def set_figsize(figsize=(3.5, 2.5)):  #@save
    """设置matplotlib的图表大小"""
    use_svg_display()
    plt.rcParams['figure.figsize'] = figsize
    
def use_svg_display():  #@save
    """使用svg格式在Jupyter中显示绘图"""
    backend_inline.set_matplotlib_formats('svg')
    
    
true_w = np.array([2, -3.4])
true_b = 4.2

features, labels = synthetic_data(true_w, true_b, 10)

def linreg(X, w, b):  #@save
    """线性回归模型"""
    return np.dot(X, w) + b


def squared_loss(y_hat, y):  #@save
    """均方损失"""
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 276

def sgd(params, lr, batch_size):  #@save
    """小批量随机梯度下降"""
    for param in params:
        param[:] = param - lr * param.grad / batch_size

def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = np.array(
            indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]
        
lr = 0.03
num_epochs = 13
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        with autograd.record():
            l = loss(net(X, w, b), y)  # X和y的小批量损失
        # 计算l关于[w,b]的梯度
        l.backward()
        sgd([w, b], lr, batch_size)  # 使用参数的梯度更新参数
    train_l = loss(net(features, w, b), labels)
    print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
    
print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')    
    

28.0
[ 0.  4.  8. 12.]
epoch 1, loss 0.068386
epoch 2, loss 0.068366
epoch 3, loss 0.068347
epoch 4, loss 0.068327
epoch 5, loss 0.068307
epoch 6, loss 0.068287
epoch 7, loss 0.068268
epoch 8, loss 0.068248
epoch 9, loss 0.068228
epoch 10, loss 0.068209
epoch 11, loss 0.068189
epoch 12, loss 0.068169
epoch 13, loss 0.068150
w的估计误差: [ 2.0226386 -3.3875885]
b的估计误差: [4.190946]
