# 梯度下降法向量化(Boston房价)

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
boston = datasets.load_boston()
X = boston.data
y = boston.target

X = X[y<50.0]
y = y[y<50.0]

In [3]:
from playML.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)

In [4]:
from playML.LinearRegression import LinearRegression

lin_reg1 = LinearRegression()
%time lin_reg1.fit_normal(X_train, y_train)
lin_reg1.score(X_test, y_test)

CPU times: user 564 µs, sys: 244 µs, total: 808 µs
Wall time: 486 µs


0.8129794056212925

## 真实数据集，利用梯度下降

数据的维度相差太大！！不是一个规模

默认学习率，会报错



In [5]:
from playML.LinearRegression import LinearRegression

lin_reg2 = LinearRegression()
%time lin_reg2.fit_gd(X_train, y_train)
lin_reg2.score(X_test, y_test)

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return np.sum((y - X_b.dot(theta)) ** 2) / len(y)
  if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):


CPU times: user 384 ms, sys: 4.98 ms, total: 389 ms
Wall time: 424 ms


nan

In [6]:
lin_reg2.coef_

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])

In [7]:
lin_reg3 = LinearRegression()
%time lin_reg3.fit_gd(X_train, y_train, eta=0.000001)
lin_reg3.score(X_test, y_test)

CPU times: user 361 ms, sys: 3.74 ms, total: 364 ms
Wall time: 365 ms


0.27586818724477224

## 步长太大！循环的次数太少(太多又耗时)！

最主要原因，还是数据规模不一致，

eta在不同维度上，或者太大，或者太小



In [8]:
%time lin_reg3.fit_gd(X_train, y_train, eta=0.000001, n_iters=1e6)
lin_reg3.score(X_test, y_test)

CPU times: user 34.8 s, sys: 160 ms, total: 34.9 s
Wall time: 35.2 s


0.7542932581943915

## 数据归一化

使用`梯度下降法`，搜索损失函数最小值之前，对数据需要归一化处理

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_standard = standardScaler.transform(X_train)

In [11]:
lin_reg4 = LinearRegression()
%time lin_reg4.fit_gd(X_train_standard, y_train)

CPU times: user 222 ms, sys: 4.37 ms, total: 227 ms
Wall time: 257 ms


LinearRegression()

In [12]:
X_test_standard = standardScaler.transform(X_test)
lin_reg4.score(X_test_standard, y_test)

0.8129873310487505

<br><br>

## 使用梯度下降法的优势

速度快

In [13]:
m = 1000
n = 5000

big_X = np.random.normal(size=(m, n))
true_theta = np.random.uniform(0.0, 100.0, size=n+1)
# 参数 截距
# 不能完全是线性的，添加噪音
big_y = big_X.dot(true_theta[1:]) + true_theta[0] + np.random.normal(0., 10., size=m)

In [14]:
big_reg1 = LinearRegression()
%time big_reg1.fit_normal(big_X, big_y)

CPU times: user 12.5 s, sys: 591 ms, total: 13.1 s
Wall time: 7.64 s


LinearRegression()

In [15]:
big_reg2 = LinearRegression()
%time big_reg2.fit_gd(big_X, big_y)

CPU times: user 3.73 s, sys: 33.4 ms, total: 3.77 s
Wall time: 1.93 s


LinearRegression()