## 随机梯度下降法

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
m = 100000

x = np.random.normal(size = m)
X = x.reshape(-1,1)

y = 4.* x + 3. + np.random.normal(0,3,size=m) 

In [3]:
def J(theta, X_b, y):
    
    try:
        return (y - X_b.dot(theta))**2/len(X_b)
    except:
        return float('inf')
def dJ(theta,X_b,y):
    return X_b.T.dot(X_b.dot(theta)-y)* 2. / len(y)
def gradient_descent(X_b,y,initial_theta, eta, n_iters = 1e4,epsilon = 1e-8):
    theta = initial_theta
    i_iters = 0
    while i_iters < n_iters:
        gradient = dJ(theta,X_b,y)
        last_theta = theta
        theta = theta - eta * gradient
    
        if(abs(J(last_theta, X_b, y)-J(theta, X_b, y)).all() < epsilon):
            break
        i_iters += 1
    return theta

In [4]:
%%time

X_b = np.hstack([np.ones((len(X),1)),X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01
theta = gradient_descent(X_b,y,initial_theta,eta)


Wall time: 3.05 s


In [5]:
theta

array([2.99532858, 4.00183365])

### 随机梯度下降法

In [6]:
def dJ_sgd(theta,X_b_i,y_i):
    return X_b_i.T.dot(X_b_i.dot(theta)-y_i)* 2. 

In [7]:
def sgd(X_b, y, initial_theta, n_iters):
    t0 = 5
    t1 = 50
    
    def learning_rate(t):
        return t0/(t+t1)
    theta = initial_theta
    for cur_iter in range(n_iters):
        rand_i = np.random.randint(len(X_b))
        gradient = dJ_sgd(theta, X_b[rand_i], y[rand_i])
        theta = theta - gradient * learning_rate(cur_iter)
        
        
    return theta

In [8]:
%%time
X_b = np.hstack([np.ones((len(X),1)),X])
initial_theta = np.zeros(X_b.shape[1])
theta = sgd(X_b,y,initial_theta,n_iters=len(X)//3)

Wall time: 267 ms


In [9]:
theta

array([3.01723881, 3.99622698])

### 使用我们自己的随机梯度下降法GSD

In [10]:
from PlayML.LinearRegression import LinearRegression

In [11]:
lin_reg = LinearRegression()
lin_reg.fit_sgd(X,y,n_iters = 2)

LinearRegression()

In [12]:
lin_reg.coef_

array([4.00064768])

In [13]:
lin_reg.interception_

3.0177223433038143

### 真实数据

In [14]:
from sklearn import datasets

boston = datasets.load_boston()

X = boston.data
y = boston.target

X = X[y<50.0]
y = y[y<50.0]

In [15]:
from PlayML.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,seed = 666)

In [16]:
from sklearn.preprocessing import StandardScaler

standardscaler = StandardScaler()
standardscaler.fit(X_train)
X_train_standard = standardscaler.transform(X_train)
X_test_standard = standardscaler.transform(X_test)

In [17]:
lin_reg2 = LinearRegression()
%time lin_reg2.fit_sgd(X_train_standard,y_train,n_iters = 2)
lin_reg2.score(X_test_standard,y_test)

Wall time: 10 ms


0.7458223881691162

In [18]:
%time lin_reg2.fit_sgd(X_train_standard,y_train,n_iters = 50)
lin_reg2.score(X_test_standard,y_test)

Wall time: 152 ms


0.811685157094956

In [19]:
%time lin_reg2.fit_sgd(X_train_standard,y_train,n_iters = 100)
lin_reg2.score(X_test_standard,y_test)

Wall time: 305 ms


0.8127442569222184

## sklearn 中的SGD

In [20]:
from sklearn.linear_model import SGDRegressor

In [21]:
sgd_regressor = SGDRegressor()
%time sgd_regressor.fit(X_train_standard,y_train)
sgd_regressor.score(X_test_standard,y_test)

Wall time: 1.5 ms


0.8119542627324808