### 在偏导数不好求的时候 我们可以通过数学方法 （导数的定义） 近似求出导数来训练模型

In [1]:
import numpy as np

In [2]:
np.random.seed(666)
X = np.random.random(size=(1000, 10))

true_theta = np.arange(1, 12, dtype=float)
X_b = np.hstack([np.ones((len(X), 1)), X])
y = X_b.dot(true_theta) + np.random.normal(size=1000)

In [3]:
def J(theta, X_b, y):
    try:
        return np.sum((y - X_b.dot(theta))**2) / len(X_b)
    except:
        return float('inf')

In [4]:
def dJ_math(theta, X_b, y):
    return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(y)

In [19]:
def dJ_debug(theta, X_b, y, epsilon=0.01):
    #向量化后的近似算法  速度惊人！！！（这也说明了 矩阵乘法虽然比多次循环快 但是时间复杂度依然没有加法低）
    res = np.empty(len(theta))
    theta_1 = theta.copy() + epsilon
    theta_2 = theta.copy() - epsilon

    res = (J(theta_1, X_b, y) - J(theta_2, X_b, y)) / (2 * epsilon)
    return res

In [21]:
def gradient_descent(dJ, X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
    #函数可以作为参数传进来的 ：神奇的py
    theta = initial_theta
    cur_iter = 0

    while cur_iter < n_iters:
        gradient = dJ(theta, X_b, y)
        last_theta = theta
        theta = theta - eta * gradient
        if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
            break
            
        cur_iter += 1

    return theta

In [15]:
X_b = np.hstack([np.ones((len(X), 1)), X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01

682 µs ± 14.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


array([  1.07293573,   2.05354247,   2.93055223,   4.13101404,
         5.05988882,   5.91570444,   6.98527461,   8.00946189,
         8.87375842,   9.99684667,  10.91656348])

In [17]:
%timeit theta = gradient_descent(dJ_debug, X_b, y, initial_theta, eta)
theta

674 µs ± 7.46 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


array([  1.07293573,   2.05354247,   2.93055223,   4.13101404,
         5.05988882,   5.91570444,   6.98527461,   8.00946189,
         8.87375842,   9.99684667,  10.91656348])

In [18]:
%timeit theta = gradient_descent(dJ_math, X_b, y, initial_theta, eta)
theta

519 ms ± 5.62 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


array([  1.07293573,   2.05354247,   2.93055223,   4.13101404,
         5.05988882,   5.91570444,   6.98527461,   8.00946189,
         8.87375842,   9.99684667,  10.91656348])

In [22]:
def mini_batch_gradient_descent(dJ, X_b, y, initial_theta, eta, n_iters = 5, k=20,epsilon=1e-8):
    #函数可以作为参数传进来的 ：神奇的py
    theta = initial_theta
    cur_iter = 0

    while cur_iter < n_iters:
        for i in range(len(y)/k)
            gradient = dJ(theta, X_b, y)
            last_theta = theta
            theta = theta - eta * gradient
            if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
                break
            
        cur_iter += 1

    return theta

SyntaxError: invalid syntax (<ipython-input-22-087bbe66f4eb>, line 7)

In [69]:
def stochastic_gradient_descent(X_b, y_train,theta, n_iters,k=20):
    iters_of_number =int(len(X_b) / k)
    print(iters_of_number)
    for n in range(n_iters):
        # shuffle X和y  （注意X_y之间的顺序有关系！）
        X_y = np.hstack([X_b, y_train.reshape((-1, 1))])
        np.random.shuffle(X_y)
        X_b_new = X_y[:, 0:-1]
        y_b_new = X_y[:, -1]

        # 主要算法 因为X y 是随机序列 所以顺序取出来X y做随机梯度下降就可以
        for i in range(iters_of_number):
            grad_mini = 0
            # 计算mini batch梯度
            #print("计算mini batch----->",i)
            for mini_i in range(k):
                #print("下标：",i*k+mini_i)
                x_i = X_b_new[i*k+mini_i]
                y_i = y_b_new[i*k+mini_i]
                grad_mini += dJ_theta_stochastic(x_i, y_i, theta)
            #print("k个梯度的和:",grad_mini)
            # 更新theta
            theta = theta - learning_rate(i*50) * grad_mini
    return theta


In [139]:
def dJ_theta_stochastic(x_i, y_i, theta):
    return x_i.T.dot(x_i.dot(theta) - y_i) * 2.

def learning_rate(t):
    return 1 / (t+1)

In [162]:
X_b = np.hstack([np.ones((len(X), 1)), X])
initial_theta = np.zeros(X_b.shape[1])
stochastic_gradient_descent(X_b,y,initial_theta,n_iters=10,k=20)

50


array([ -218093.43176111,  -660476.33458645,  1033401.67658999,
         572962.7880886 , -1025444.04806688,  1270850.3055133 ,
       -1180121.05781863, -1677138.62263202,    -8884.49128022,
         899946.33069137,  1228640.2861648 ])