## 用numpy实现了d2l-08

In [3]:
import numpy as np

In [21]:
# 生成模拟数据  
def synthetic_data(w, num_examples):
    X = np.random.randn(num_examples, len(w))
    y = X.dot(w)
    # 为了使数据更真实，加上一个随机干扰    
    y += np.random.normal(0, 0.01, y.shape)
    return X, y


# w是列向量
true_w = np.array([8.0, -6.2]).reshape(-1, 1)
features, labels = synthetic_data(true_w, 1000)

# 批量样本的生成器 每次调用next(g)，都会得到batch_size个数据
def data_iter(batch_size, features, labels):
    # 随机下样本索引     
    num_examples = len(features)
    indices = np.arange(num_examples)
    indices = np.random.permutation(indices)
    for i in range(0, num_examples, batch_size):
        start = i
        end = min(i + batch_size, num_examples)
        
        select_indices = indices[start:end]
        yield features[select_indices], labels[select_indices]

In [20]:
# 定义网络相关的参数 不定义类了

n_w = np.random.normal(0, 0.01, (2, 1))
# 学习率
lr = 0.1
# 每一个batch的大小
batch_size = 100
# epochs次，即总共学习3次
num_epochs = 5


# 均方误差
def square_loss(y, t):
    return (y - t) ** 2 / 2


# 推理一次，由于用了均方误差，输出函数用恒等函数即可。
def predict(X):
    return X.dot(n_w)

# 运行网络并计算网络的损失
# 由于X t是批量，因此要返回均值作为网络的误差
def loss_net(X, t):
    y = predict(X)
    l = square_loss(y, t)
    return l.mean()

# 一个通用的算术求梯度，遍历ndarray进行运算
def numerical_gradient_common(f, x):
    h = 1e-4
    grad = np.zeros_like(x)

    # 利用迭代器遍历ndarray，这样才能保证拿到索引并且有修改ndarray的权限
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    for _ in it:  # 也可以这么写while not it.finished:
        idx = it.multi_index
        temp = x[idx]
        x[idx] = temp + h
        fx1 = f(x)
        x[idx] = temp - h
        fx2 = f(x)
        grad[idx] = (fx1 - fx2) / (2 * h)
        x[idx] = temp
    return grad


In [19]:

n_w = np.random.normal(0, 0.01, (2, 1))

# 开始学习 一共学习num_epochs轮
for i in range(num_epochs):

    for X, t in data_iter(batch_size, features, labels):
        # 运行网络并计算损失
        loss_grad = lambda _ : loss_net(X, t)
        # 计算梯度
        grad = numerical_gradient_common(loss_grad, n_w)
        # 修改参数
        n_w -= lr * grad

    # 学习完一轮， 计算精度
    l = loss_net(features, labels)
    print(l)

#最后打印n_w 与true_w的差
print(n_w - true_w)

6.541955377757869
0.8745662051507856
0.11697001743576206
0.015657134667008667
0.0021433043742800323
[[-0.05504555]
 [ 0.03719021]]
