### 梯度的调试 (如何知道自己的梯度公式是否正确）

 <img src="../img/8.png" width="500" align="left"/>

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
np.random.seed(666)
X = np.random.random(size=(1000,10))

In [3]:
true_W = np.arange(1,12,dtype=float)

In [8]:
X_b = np.hstack([np.ones((len(X),1)), X])

y = X_b.dot(true_W) + np.random.normal(size=1000)

In [9]:
X.shape
y.shape

(1000, 10)

(1000,)

In [10]:
true_W

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [11]:
def J(W,X_b,y):
    try:
        return np.mean((y-X_b.dot(W))**2)
    except:
        return float('inf')

In [12]:
# 计算梯度
def dJ_math(W,X_b,y):
    return X_b.T.dot(X_b.dot(W) - y) * 2. / len(y)    

In [13]:
def dJ_debug(W,X_b,y,epsilon=0.01):
    res = np.empty(len(W))
    for i in range(len(W)):
        W_1 = W.copy()
        W_1[i] += epsilon
        W_2 = W.copy()
        W_2[i] -= epsilon
        
        res[i] = (J(W_1,X_b,y)-J(W_2,X_b,y)) / (2 * epsilon)
    return res

In [15]:
# 梯度下降
def gradient_descent_vector(dJ,init_w,X,y,eta=0.01,epsilon=1e-2,n_iters=1e4,init_b=0.):
    
    # 补1
    X_b = np.hstack([np.ones((len(X),1)),X])
    # 补b
    W = np.hstack([init_b,init_w])
    
    i_iter=0
    while i_iter<n_iters:
        grad=dJ(W,X_b,y)
        last_W=W
        W=W-eta*grad
        if(abs(J(W,X_b,y)-J(last_W,X_b,y)) < epsilon):
            break
        i_iter += 1    
    return W

In [18]:
%time gradient_descent_vector(dJ_debug,np.zeros(X.shape[1]),X,y)

CPU times: user 340 ms, sys: 30.1 ms, total: 370 ms
Wall time: 196 ms


array([8.26200061, 3.41547499, 3.50931419, 3.8359876 , 4.53870159,
       4.88207109, 5.10522801, 5.7360655 , 6.11828751, 6.60422379,
       7.06073814])

In [19]:
%time gradient_descent_vector(dJ_math,np.zeros(X.shape[1]),X,y)

CPU times: user 46.8 ms, sys: 6.01 ms, total: 52.8 ms
Wall time: 30.6 ms


array([8.26200061, 3.41547499, 3.50931419, 3.8359876 , 4.53870159,
       4.88207109, 5.10522801, 5.7360655 , 6.11828751, 6.60422379,
       7.06073814])