### 在偏导数不好求的时候 我们可以通过数学方法 （导数的定义） 近似求出导数来训练模型

In [1]:
import numpy as np

In [2]:
np.random.seed(666)
X = np.random.random(size=(1000, 10))

true_theta = np.arange(1, 12, dtype=float)
X_b = np.hstack([np.ones((len(X), 1)), X])
y = X_b.dot(true_theta) + np.random.normal(size=1000)

In [3]:
def J(theta, X_b, y):
    try:
        return np.sum((y - X_b.dot(theta))**2) / len(X_b)
    except:
        return float('inf')

In [4]:
def dJ_math(theta, X_b, y):
    return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(y)

In [129]:
def dJ_debug(theta, X_b, y, epsilon=0.01):
    res = np.empty(len(theta))
    for i in range(len(theta)):
        theta_1 = theta.copy()
        theta_1[i] += epsilon
        theta_2 = theta.copy()
        theta_2[i] -= epsilon
        res[i] = (J(theta_1, X_b, y) - J(theta_2, X_b, y)) / (2 * epsilon)
    return res

In [130]:
dJ_debug(initial_theta, X_b, y)

array([-67.00674934, -34.88852187, -34.12158402, -34.12599755,
       -34.25458534, -34.37153116, -33.57002731, -34.93556353,
       -35.45595111, -35.18706586, -35.82788229])

In [109]:
def gradient_descent(dJ, X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
    #函数可以作为参数传进来的 ：神奇的py
    theta = initial_theta
    cur_iter = 0

    while cur_iter < n_iters:
        gradient = dJ(theta, X_b, y)
        last_theta = theta
        theta = theta - eta * gradient
        if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
            break
            
        cur_iter += 1

    return theta

In [110]:
X_b = np.hstack([np.ones((len(X), 1)), X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01

In [111]:
%%time 
theta = gradient_descent(dJ_debug, X_b, y, initial_theta, eta)

res:shape, (11, 1)
theta:shape, (11, 1)
res:shape, (11, 1)
theta:shape, (11, 1)
res:shape, (11, 1)
theta:shape, (11, 1)
res:shape, (11, 1)
theta:shape, (11, 1)
res:shape, (11, 1)
theta:shape, (11, 1)
res:shape, (11, 1)
theta:shape, (11, 1)
Wall time: 116 ms


In [112]:
theta

array([  1.16826695e+15,   1.16826695e+15,   1.16826695e+15,
         1.16826695e+15,   1.16826695e+15,   1.16826695e+15,
         1.16826695e+15,   1.16826695e+15,   1.16826695e+15,
         1.16826695e+15,   1.16826695e+15])

In [113]:
%%time 
theta = gradient_descent(dJ_math, X_b, y, initial_theta, eta)

Wall time: 743 ms


In [114]:
theta.reshape((-1,1))

array([[  1.1251597 ],
       [  2.05312521],
       [  2.91522497],
       [  4.11895968],
       [  5.05002117],
       [  5.90494046],
       [  6.97383745],
       [  8.00088367],
       [  8.86213468],
       [  9.98608331],
       [ 10.90529198]])

res:shape, (11, 1)
theta:shape, (11, 1)


-402311.64992356207

In [107]:
dJ_math(initial_theta,X_b,y)

array([-67.00674934, -34.88852187, -34.12158402, -34.12599755,
       -34.25458534, -34.37153116, -33.57002731, -34.93556353,
       -35.45595111, -35.18706586, -35.82788229])

### mini batch梯度下降法（未能成功实现：原因不清楚！）

In [36]:
def stochastic_gradient_descent(X_b, y_train,theta, n_iters,k=20):
    iters_of_number =int(len(X_b) / k)
    print(iters_of_number)
    for n in range(n_iters):
        # shuffle X和y  （注意X_y之间的顺序有关系！）
        X_y = np.hstack([X_b, y_train.reshape((-1, 1))])
#         np.random.shuffle(X_y)
        X_b_new = X_y[:, 0:-1]
        y_b_new = X_y[:, -1]

        # 主要算法 因为X y 是随机序列 所以顺序取出来X y做随机梯度下降就可以
        for i in range(iters_of_number):
            grad_mini = 0
            # 计算mini batch梯度
            #print("计算mini batch----->",i)
            for mini_i in range(k):
                #print("下标：",i*k+mini_i)
                x_i = X_b_new[i*k+mini_i]
                y_i = y_b_new[i*k+mini_i]
                grad_mini += dJ_theta_stochastic(x_i, y_i, theta)
            #print("k个梯度的和:",grad_mini)
            # 更新theta
            theta = theta - 1e-2* grad_mini
    return theta


In [37]:
def dJ_theta_stochastic(x_i, y_i, theta):
    return x_i.T.dot(x_i.dot(theta) - y_i) * 2.

def learning_rate(t):
    return 1 / (t+1)

In [None]:
X_b = np.hstack([np.ones((len(X), 1)), X])
initial_theta = np.zeros(X_b.shape[1])
stochastic_gradient_descent(X_b,y,initial_theta,n_iters=1,k=20)