In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

boston = load_boston()
X,y = boston.data,boston.target
X_train,X_test,y_train,y_test= train_test_split(X,y)
X_train = StandardScaler().fit_transform(X_train)
lr = LinearRegression()
lr.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [3]:
# mean square method 均方误差
def mse(y, y_predict):
    y = np.array(y)
    y_predict = np.array(y_predict)
    return (y_predict - y) ** 2

def mse(X,theta,y):
    return mse(y, X.dot(theta.T))

In [4]:
# least square method 最小二乘法
def lsm(X , y):
    '''
    return (w,b)
    '''
    X = np.array(X)
    y = np.array(y)
    X_f = np.c_[X,np.ones(X.shape[0])]
    wb = np.linalg.inv(X_f.T.dot(X_f)).dot(X_f.T.dot(y))
    return (wb[:-1],wb[-1])

def test_lsm():
    print((lr.coef_,lr.intercept_))
    print(lsm(X_train,y_train))

test_lsm()

(array([-0.40828183,  1.03088803, -0.08295058,  0.76656535, -1.35953932,
        2.70549125, -0.37938392, -3.19405883,  2.06659491, -1.70945457,
       -1.7419433 ,  1.08846428, -4.30922475]), 22.462532981530376)
(array([-0.40828183,  1.03088803, -0.08295058,  0.76656535, -1.35953932,
        2.70549125, -0.37938392, -3.19405883,  2.06659491, -1.70945457,
       -1.7419433 ,  1.08846428, -4.30922475]), 22.462532981530376)


In [10]:
# 梯度向量
def gradient(X, y, theta):
    return (X.T.dot(X.dot(theta.T) - y))* 2. /len(y)

# 梯度下降
def gradient_descent(
    X
    , y
    , theta = np.zeros(X.shape[1] + 1)
    , alpha= 1e-3
    , epsilon=1e-8
    , max_iter = 1e5
):
    X_ = np.c_[X , np.ones(X.shape[0])]
    #print(X_)
    iters = 0
    while(iters < max_iter):
        old_theta = theta
        grad = gradient(X_,y,theta)
        #print(theta)
        theta = theta - alpha * grad
        if(np.all(np.absolute(theta - old_theta) < epsilon)):
            break
        iters += 1
    return (theta[:-1],theta[-1])

#theta=(w,b)
theta  = gradient_descent(X_train,y_train)
# 此结果发散 归一化之后不发散了.
print(theta)
print((lr.coef_,lr.intercept_))

(array([-0.40827346,  1.03087718, -0.08297828,  0.76656919, -1.35953003,
        2.70549677, -0.3793879 , -3.19405724,  2.06652227, -1.7093752 ,
       -1.74194016,  1.08846455, -4.30922467]), 22.462532981529492)
(array([-0.40828183,  1.03088803, -0.08295058,  0.76656535, -1.35953932,
        2.70549125, -0.37938392, -3.19405883,  2.06659491, -1.70945457,
       -1.7419433 ,  1.08846428, -4.30922475]), 22.462532981530376)
