# 如何进行梯度调试

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
np.random.seed(666)
X=np.random.random(size=(1000,10))#1000个样本，10个特征

In [3]:
true_theta=np.arange(1,12,dtype=float)#共11个thetai，因为还有截距

In [4]:
X_b = np.hstack([np.ones((X.shape[0],1)),X])
y = X_b.dot(true_theta) + np.random.normal(size=1000)#因为有1000个样本，每个样本都加一个误差

In [5]:
X.shape

(1000, 10)

In [6]:
y.shape

(1000,)

In [7]:
true_theta

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [8]:
def J(theta,X_b,y):
    try:
        return np.sum((y - X_b.dot(theta)) ** 2)/X_b.shape[0]
    except:
        return float('inf')

## 数学解求梯度

In [9]:
def dJ_math(theta,X_b,y):
    return X_b.T.dot(X_b.dot(theta)-y) *2. /y.shape[0]#梯度函数

## 近似梯度求法，结果正确，但速度较慢，可以用来验证数学解

In [10]:
def dJ_debug(theta,X_b,y,epsilon=0.001):#近似梯度求法，且支持损失函数
    res = np.empty(len(theta))#结果
    for i in range(len(theta)):#对于每一个维度的导数进行计算
        theta_plus=theta.copy()
        theta_plus[i]+=epsilon
        theta_minus=theta.copy()
        theta_minus[i]-=epsilon
        res[i]=(J(theta_plus,X_b,y)-J(theta_minus,X_b,y))/(2*epsilon)#求出第i个维度的导数值
    return res

In [11]:
def gradient_descent(dJ,X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):  # 增加一个限制循环次数的情况，避免死循环
    theta = initial_theta
    i_iter = 0
    while i_iter < n_iters:
        gradient = dJ(theta, X_b, y)  # theta点的导数
        last_theta = theta
        theta = theta - eta * gradient  # theta移动

        if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:  # 函数变化情况很小
            break  # 退出循环不找了，再找也小不了到多少了
        i_iter += 1
    return theta

In [12]:
X_b=np.hstack([np.ones((X.shape[0],1)),X])
initital_theta=np.zeros(X_b.shape[1])#有多少个特征，有多少个thetai
eta=0.01
%time theta=gradient_descent(dJ_debug,X_b,y,initital_theta,eta)#用DJ_debug计算
theta

Wall time: 9.99 s


array([ 1.1251597 ,  2.05312521,  2.91522497,  4.11895968,  5.05002117,
        5.90494046,  6.97383745,  8.00088367,  8.86213468,  9.98608331,
       10.90529198])

In [13]:
%time theta=gradient_descent(dJ_math,X_b,y,initital_theta,eta)#用DJ_debug计算
theta

Wall time: 978 ms


array([ 1.1251597 ,  2.05312521,  2.91522497,  4.11895968,  5.05002117,
        5.90494046,  6.97383745,  8.00088367,  8.86213468,  9.98608331,
       10.90529198])