In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

def array2matrix(x):
    if len(x.shape)==1:
        x = x.reshape(x.shape[0],1)
    return x
def show_vector(x):
    #plt.clf()
    plt.figure(figsize=(8, 4))
    plt.plot(np.array(range(len(x))),x,linewidth = 0.3)
    plt.scatter(np.array(range(len(x))),x,s=3)
    plt.grid()
    #plt.pause(0.001)
    #if is_ipython:
    #    display.clear_output(wait=True)
def show_matrix(x):
    #plt.clf()
    plt.figure(figsize=(8, 4))
    for i in range(x.shape[1]):
        plt.plot(np.array(range(len(x[:,i]))),x[:,i],linewidth = 0.3)
        plt.scatter(np.array(range(len(x[:,i]))),x[:,i],s=3)
    plt.grid()
    #plt.pause(0.001)
    #if is_ipython:
    #   display.clear_output(wait=True)
def show(x):
    show_matrix(x) if len(x.shape)>1 else show_vector(x)

In [2]:
"""
    使用numpy实现Boston房价预测
    Step1 数据加载，来源sklearn中的load_boston
    Step2 数据规范化，将X 采用正态分布规范化
    Step3 初始化网络
    Step4 定义激活函数，损失函数，学习率 epoch
    Step5 循环执行：前向传播，计算损失函数，反向传播，参数更新
    Step6 输出训练好的model参数，即w1, w2, b1, b2
""" 
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample

# 数据加载
# X_为506*13矩阵,即506个样本,每个样本13个特征值
# Y为506个数的数组,即506个值
data = load_boston()
X_ = data['data']
Y = data['target']
# 将y转化为矩阵的形式
# 由于Y是向量,因此要转成506*1矩阵
y = Y.reshape(Y.shape[0],1)
# 数据规范化
# min-max方法:
#x = (X_ - np.min(X_,axis=0))/(np.max(X_,axis=0)-np.min(X_,axis=0))
# z-score方法:
x = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)
"""
    初始化网络参数
    定义隐藏层维度，w1,b1,w2,b2
""" 
# 获取x的特征值个数,即13
n_features = x.shape[1]
# 设置训练特征值个数
n_hidden = 10
# 初始化w1,w2,b1,b2
# w1用正态分布初始化,此处返回13*10矩阵
w1 = np.random.randn(n_features,n_hidden)
# b1初始化为全0,此处返回10个0值向量
b1 = np.zeros(n_hidden)
# w2用正态分布初始化,值在[0,1)之间,此处是10*1矩阵
w2 = np.random.rand(n_hidden, 1)
# b2初始化为0
b2 = np.zeros(1)
# 设置学习率,梯度下降步长
learning_rate = 1e-6

In [3]:
# relu函数,激活函数,使学习过程非线性化
def Relu(x):
    """ 这里写你的代码 """
    pass
    result = np.where(x<0,0,x)
    return result

In [4]:
# 定义损失函数,此处用MSE函数,即mean_square_error,均方差
def MSE_loss(y, y_hat):
    """ 这里写你的代码 """
    pass
    return np.mean(np.square(y_hat-y))

In [5]:
# 定义线性回归函数
def Linear(x, w, b):
    """ 这里写你的代码 """
    pass
    return x.dot(w)+b

In [39]:
# 5000次迭代
#plt.ion()
loss = 0
for t in range(5000):
    # 前向传播，计算预测值y (Linear->Relu->Linear)
    """ 这里写你的代码 """
    y1 = Linear(x,w1,b1)
    relu_y1 = Relu(y1)
    y_pre = Linear(relu_y1,w2,b2)
    # 计算损失函数, 并输出每次epoch的loss
    """ 这里写你的代码 """
    loss = MSE_loss(y,y_pre)
    #print('times:{},loss:{}'.format(t,loss))
    # 反向传播，基于loss 计算w1,w2,b1,b2的梯度
    """ 这里写你的代码 """
    # 首先列出一下各参数纬度:
    # x:506*13
    # y_pre:506*1
    # y:506*1
    # y1:506*10
    # relu_y1:506*10
    # L:1
    # w1:13*10
    # w2:10*1
    # b1:10*1
    # b2:1
    # 此处的loss函数为MSE,即:L = (y_pre-y)^2 = y^2 - 2y*y_pre + y_pre^2 = (y.T)(Y)-2(y.T)(y_pre)+(y_pre.T)·(y_pre)
    # 所以对y_pre求导:dL/dy_pre=2(y_pre-y)
    grad_y_pre = 2*(y_pre-y)
    # grad_y_pre:506*1
    # 对w2求导:dL/dw2 = (dL/dy_pre)*(dy_pre/dw2)=relu_y1.T·grad_y_pre
    # 因此grad_w2与w2维度相同:10*1
    grad_w2 = relu_y1.T.dot(grad_y_pre)
    # 对relu_y1求导:dL/drelu_y1=(dL/dy_pre)*(dy_pre/drelu_y1)=grad_y_pre·w2.T
    # grad_relu_y1:506*10
    grad_relu_y1 = grad_y_pre.dot(w2.T)
    grad_relu_y1[y1<0]=0
    grad_w1 = x.T.dot(grad_relu_y1)
    grad_b2 = sum(grad_y_pre)
    grad_b1 = grad_relu_y1.T.dot(grad_y_pre).squeeze()
    # 更新权重, 对w1, w2, b1, b2进行更新
    """ 这里写你的代码 """
    w1 -= learning_rate*grad_w1
    w2 -= learning_rate*grad_w2
    b1 -= learning_rate*grad_b1
    b2 -= learning_rate*grad_b2
print(loss)
#plt.ioff()

84.22118423518296


In [40]:
# 得到最终的w1, w2
print('w1={} \n w2={}'.format(w1, w2))

w1=[[ 7.47766905e-01 -1.94645902e+00  1.82150124e-01 -1.61544407e-01
  -1.76890087e+00 -3.23586127e+00 -1.70714175e+00 -1.89407137e+00
  -4.69713359e-01 -3.76206922e-01]
 [ 2.63598879e+00 -1.51054109e+00  1.56358115e+00  1.58697742e-01
  -2.00420487e+00 -7.59391165e-02  6.47383974e-01 -4.22705433e-01
   1.46255914e+00  4.33050005e+00]
 [ 5.33128508e-02  1.17787764e+00 -4.12605841e-01 -1.14262004e+00
  -1.35906966e+00  8.26312321e-01 -3.27999749e-01 -1.09064364e+00
   1.50189780e+00  4.67473048e-01]
 [-1.60470354e+00 -3.93449514e-04  2.25956569e+00 -1.84835659e-01
  -1.13833647e+00  7.75464894e-01 -8.26183673e-01 -2.72944317e+00
   4.29823314e-01 -5.25804244e-01]
 [ 9.71387027e-02 -1.43273877e+00 -2.48654006e+00 -1.00844614e+00
  -8.05879196e-01 -1.53799288e+00  1.04077006e+00  5.24333120e-01
   4.88681475e-01 -1.25423556e+00]
 [ 3.74570194e+00 -2.18242902e+00 -9.40094793e-01  7.52809828e-01
  -5.87814398e-01  2.25216141e+00 -4.38200470e-01  1.04159191e+00
   6.15367775e-01  1.19734187e