In [39]:
import numpy as np

class GradientDescentRegressor:
    def __init__(self, learning_rate = .01, epochs = 1000, type = "batch", batch_size = 20, penalty = None, alpha = .1, l1_ratio = .5, random_state= None):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.type = type
        self.batch_size = batch_size
        self.penalty = penalty
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.weights = None
        np.random.seed(random_state)
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features + 1)
        bias = np.ones(n_samples)
        X = np.c_[bias, X]
        for epoch in range(self.epochs):
            if self.type == "batch":
                gradient = self._compute_gradient(X, y)
            elif self.type == "mini batch":
                indices = np.random.choice(n_samples, self.batch_size, replace=False)
                gradient = self._compute_gradient(X[indices], y[indices])
            elif self.type == "stochastic":
                index = np.random.choice(n_samples)
                gradient = self._compute_gradient(X[[index]], y[[index]])
            else:
                raise TypeError("only batch, mini batch and stochastic are supported")

            self.weights -= self.learning_rate * 1 / (2*n_samples) * gradient
        self.y_mean = np.mean(y)
            
            
    def _compute_gradient(self, X, y):
        gradient = -2 * X.T.dot(y) + 2 * X.T.dot(X).dot(self.weights)
        if self.penalty is not None:
            if self.penalty =="l1":
                penalty = self.alpha * np.sign(self.weights)
            elif self.penalty == "l2":
                penalty = 2 * self.alpha * self.weights
            elif self.penalty == "elastic net":
                l1_penalty = self.l1_ratio * self.alpha * np.sign(self.weights)
                l2_penalty = (1 - self.l1_ratio) * self.alpha * self.weights
                penalty = self.alpha * (l1_penalty + l2_penalty)
            else:
                raise ValueError("penalty can be None, l1, l2 or elastic net")
            
            gradient[1:] += penalty[1:]
        return gradient
    
    
    def predict(self, X):
        bias = np.ones(X.shape[0])
        X = np.c_[bias, X]
        return X.dot(self.weights)
    
    def score(self, X, y):
        y_pred = self.predict(X)
        sst = np.sum((y - self.y_mean)**2)
        sse = np.sum((y - y_pred)**2) 
        r_square = 1 - (sse / sst)
        return r_square       
                

In [40]:
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
X, y = make_regression(n_samples=1000, n_features=4, noise=50, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=0)
poly = PolynomialFeatures(degree=5)
X_train = poly.fit_transform(X_train)
X_test = poly.transform(X_test)

In [41]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train,y_train)
print(model.score(X_train,y_train))
print(model.score(X_test, y_test))

0.901975828548115
0.7847836703653526


In [44]:
model = GradientDescentRegressor(learning_rate=.01,epochs=10000,type = 'stochastic', batch_size= 20,penalty=None, random_state=0)
model.fit(X_train,y_train)
print(model.score(X_train,y_train))
print(model.score(X_test, y_test))

-981227260.2702419
-161337893.0906584


In [43]:
from sklearn.linear_model import SGDRegressor
model = SGDRegressor(eta0=.0001,max_iter=50000)
model.fit(X_train,y_train)
print(model.score(X_train,y_train))
print(model.score(X_test, y_test))

-77262.00945481856
-34554.609258156495
