In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

In [2]:
class SGD_simple():
    
    def __init__(self, eta=0.01, n_iterations=50000, epsilon=1e-5):
        self.eta = eta
        self.n_iterations = n_iterations
        self.epsilon = epsilon
        self.fit_theta = None
        self.coef_ = None
        self.intercept_ = None
        self.loop = 0

    def fit(self, X, y):
        iteration = 0
        loss = 1
        y = y.reshape((len(y), 1))
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # 创建一列全为1的向量作为截距的特征值与原来的特征空间，作为新的特征空间
        theta = np.random.rand(X_b.shape[1], 1)  # 初始化权重
        sample = X_b.shape[0]
        while iteration < self.n_iterations and loss > self.epsilon:
            last_theta = theta
            iteration += 1
            random_index = np.random.randint(sample)  # 随机取出一个样本
            X_i = X_b[random_index:random_index+1]
            y_i = y[random_index:random_index+1]
            gradients = 2 * X_i.T.dot(X_i.dot(theta) - y_i)  # 计算梯度
            theta = theta - self.eta * gradients  # 更新权重
            loss = np.linalg.norm(theta - last_theta)  # 计算两次权重间的距离
        self.fit_theta = theta
        self.coef_ = theta[1:].reshape(X.shape[1])
        self.intercept_ = theta[:1].reshape(1)
        self.loop  = iteration

    def predict(self, X_i):
        X_b = np.c_[np.ones((X_i.shape[0], 1)), X_i]
        return X_b.dot(self.fit_theta)

In [3]:
class BGD_simple():

    def __init__(self, eta=0.01, n_iterations=10000, epsilon=1e-5):
        self.eta = eta
        self.n_iterations = n_iterations
        self.epsilon = epsilon
        self.fit_theta = None
        self.coef_ = None
        self.intercept_ = None
        self.loop = 0

    def fit(self, X, y):
        iteration = 0
        loss = 1
        y = y.reshape((len(y), 1))
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        theta = np.random.rand(X_b.shape[1], 1)
        sample = X_b.shape[0]
        while iteration < self.n_iterations and loss > self.epsilon:
            last_theta = theta
            iteration += 1
            gradients = 2/sample * X_b.T.dot(X_b.dot(theta) - y)
            theta = theta - self.eta * gradients
            loss = np.linalg.norm(theta - last_theta)
        self.fit_theta = theta
        self.coef_ = theta[1:].reshape(X.shape[1])
        self.intercept_ = theta[:1].reshape(1)
        self.loop  = iteration

    def predict(self, X_i):
        X_b = np.c_[np.ones((X_i.shape[0], 1)), X_i]
        return X_b.dot(self.fit_theta)

In [4]:
class MBGD_simple():

    def __init__(self, eta=0.01, n_iterations=10000, epsilon=1e-5, batch_size=2):
        self.eta = eta
        self.n_iterations = n_iterations
        self.epsilon = epsilon
        self.batch_size = batch_size
        self.fit_theta = None
        self.coef_ = None
        self.intercept_ = None
        self.loop = 0

    def fit(self, X, y):
        iteration = 0
        loss = 1
        y = y.reshape((len(y), 1))
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        theta = np.random.rand(X_b.shape[1], 1)
        sample = X_b.shape[0]
        while iteration < self.n_iterations and loss > self.epsilon:
            last_theta = theta
            iteration += 1
            for _ in range(self.batch_size):
                i = np.random.randint(sample)
                X_i = X_b[i: i + 1]
                y_i = y[i: i + 1]
                gradients = 2 * X_i.T.dot(X_i.dot(theta) - y_i)
                theta = theta - self.eta * gradients
            loss = np.linalg.norm(theta - last_theta)
        self.fit_theta = theta
        self.coef_ = theta[1:].reshape(X.shape[1])
        self.intercept_ = theta[:1].reshape(1)
        self.loop  = iteration

    def predict(self, X_i):
        X_b = np.c_[np.ones((X_i.shape[0], 1)), X_i]
        return X_b.dot(self.fit_theta)

In [5]:
class LinearRegression_simple():

    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        self.fit_theta = None

    def fit(self, X, y):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        y = y.reshape(len(y), 1)
        self.fit_theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
        self.coef_ = self.fit_theta[1:].reshape(X.shape[1])
        self.intercept_ = self.fit_theta[0].reshape(1)

    def predict(self, X_i):
        X_b = np.c_[np.ones((X_i.shape[0]), 1), X_i]
        return X_b.dot(self.fit_theta)

In [6]:
boston = load_boston()
X = boston['data']
y = boston['target']
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [7]:
estimator = SGD_simple()
estimator.fit(X, y)
print(estimator.intercept_, estimator.coef_, estimator.loop)

[22.06454847] [-1.22834773  1.32106937 -0.30760342  0.84767901 -1.858552    1.30289628
 -0.94157253 -3.47282298  2.86632204 -1.76793421 -3.10390136  0.58553835
 -3.1115816 ] 6680


In [8]:
estimator = BGD_simple()
estimator.fit(X, y)
print(estimator.intercept_, estimator.coef_, estimator.loop)

[22.53280632] [-0.92778511  1.08093304  0.13892795  0.68202186 -2.05637533  2.67458811
  0.01916323 -3.10418784  2.65724203 -2.07112486 -2.06042282  0.84923338
 -3.74343522] 4817


In [9]:
estimator = MBGD_simple()
estimator.fit(X, y)
print(estimator.intercept_, estimator.coef_, estimator.loop)

[21.93305455] [-1.26915505  1.22502814  0.85800182 -0.05622181 -2.24101636  3.1469996
  0.55161056 -3.61495283  2.6051025  -1.78839978 -1.89578878  1.25159043
 -3.28913484] 10000


In [10]:
estimator = SGDRegressor()
estimator.fit(X, y)
print(estimator.intercept_, estimator.coef_)

[22.55735009] [-0.83912539  0.96991    -0.09526012  0.71921819 -1.88419786  2.70849453
 -0.01848842 -3.05871429  2.03702462 -1.34393621 -1.99501806  0.81312472
 -3.64932993]


In [11]:
estimator = LinearRegression_simple()
estimator.fit(X, y)
print(estimator.intercept_, estimator.coef_)

[22.53280632] [-0.92814606  1.08156863  0.1409      0.68173972 -2.05671827  2.67423017
  0.01946607 -3.10404426  2.66221764 -2.07678168 -2.06060666  0.84926842
 -3.74362713]
