In [4]:
import numpy as np
import pandas as pd
import random
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=1000, n_features=14, n_informative=10, noise=15, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [5]:
class MyLineReg():
    def __init__(self, n_iter, learning_rate=0.1, metric=None, reg=None, l1_coef=None, l2_coef=None, sgd_sample=None, random_state=42):
        """ 
        metric: r2, mse, mae, mape, rmse
        """
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = None
        self.metric = metric
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.sgd_sample = sgd_sample
        self.random_state = random_state

    def predict(self, X):
        X = X.copy()
        X.insert(0, "bias", 1)
        return X @ self.weights
    
    def get_loss_metric(self, y_pred, y):
        error = y_pred - y
        if (self.metric == None) | (self.metric == 'mse'):
            return np.sum(error ** 2) / len(y_pred)
        elif self.metric == 'rmse':
            return np.sqrt(np.sum(error ** 2) / len(y_pred))
        elif self.metric == 'mae':
            return np.sum(abs(error)) / len(y_pred)
        elif self.metric == 'mape':
            return 100 / len(y_pred) * np.sum(abs((error) / y))
        elif self.metric == 'r2':
            return 1 - np.sum((error) ** 2) / np.sum((y - np.mean(y)) ** 2)
        raise ValueError('Invalid metric. Supported metrics: mae, mse, rmse, mape, r2')
    
    def get_loss_regularization(self):
        if self.reg is None:
            return 0
        elif self.reg == 'l1':
            return self.l1_coef * np.sum(abs(self.weights))
        elif self.reg == 'l2':
            return self.l2_coef * np.sum(self.weights ** 2)
        elif self.reg == 'elasticnet':
            return self.l1_coef * np.sum(abs(self.weights)) + self.l2_coef * np.sum(self.weights ** 2)
        raise ValueError('Invalid regularization. Supported methods: l1, l2, elasticnet')

    def gr_mse(self, X, y_pred, y):
        error = y_pred - y
        if self.reg is None:
            return (2 / len(y_pred)) * X.T @ error
        if self.reg == 'l1':
            return (2 / len(y_pred)) * X.T @ error + self.l1_coef * (self.weights / abs(self.weights))
        elif self.reg == 'l2':
            return (2 / len(y_pred)) * X.T @ error + self.l2_coef * 2 * self.weights
        elif self.reg == 'elasticnet':
            return (2 / len(y_pred)) * X.T @ error + self.l1_coef * (self.weights / abs(self.weights)) + self.l2_coef * 2 * self.weights
        raise ValueError('Invalid regularization. Supported methods: l1, l2, elasticnet')

    def fit(self, X, y, verbose=False):
        random.seed(self.random_state)

        X = X.copy()
        y = y.copy()
        X.insert(0, "bias", 1)
        if self.weights is None:    
            self.weights = np.ones(X.shape[1])

        
        
        for epoch in range(1, self.n_iter + 1):
            X_batch, y_batch = self.get_batch(X, y, self.sgd_sample)

            step = self.learning_rate if type(self.learning_rate) in (int, float) else self.learning_rate(epoch)
            y_pred = X @ self.weights
            self.weights -= step * self.gr_mse(X, y_pred, y)

            self.loss_func_score = self.get_loss_metric(y_pred, y) + self.get_loss_regularization()
            if verbose and ((epoch % verbose == 0) | (epoch == 1)):
                if self.metric:
                    print(f"{epoch} | {self.metric} loss: {self.loss_func_score} | lr: {np.round(step, 5)}")
                else:
                    print(f"{epoch} | mse loss: {self.loss_func_score} | lr: {np.round(step, 5)}")
        self.best_score = self.loss_func_score

    def get_batch(self):
        

    def get_coef(self):
        return self.weights[1:]
    
    def get_best_score(self):
        return self.best_score

    def __str__(self) -> str:
        return f"{self.__class__.__name__} class: " + ", ".join("%s=%s" % item for item in vars(self).items())
    
    def __repr__(self) -> str:
        return f"{self.__class__.__name__} class: " + ", ".join("%s=%s" % item for item in vars(self).items())

In [6]:
reg = MyLineReg(100, metric='rmse', sgd_sample=5)

In [7]:
reg.fit(X, y, 2)

[654, 114, 25, 759, 281]
1 | rmse loss: 143.60045138779506 | lr: 0.1
[250, 228, 142, 754, 104]
2 | rmse loss: 115.58688750093506 | lr: 0.1
[692, 758, 913, 558, 89]
[604, 432, 32, 30, 95]
4 | rmse loss: 75.65422467693489 | lr: 0.1
[223, 238, 517, 616, 27]
[574, 203, 733, 665, 718]
6 | rmse loss: 50.54498882237576 | lr: 0.1
[558, 429, 225, 459, 603]
[284, 828, 890, 6, 777]
8 | rmse loss: 34.98148333517936 | lr: 0.1
[825, 163, 714, 432, 348]
[284, 159, 220, 980, 781]
10 | rmse loss: 25.646402741667007 | lr: 0.1
[344, 104, 94, 389, 99]
[367, 867, 352, 618, 270]
12 | rmse loss: 20.352461342132287 | lr: 0.1
[826, 44, 747, 470, 549]
[127, 996, 944, 387, 80]
14 | rmse loss: 17.556538287447793 | lr: 0.1
[565, 300, 849, 643, 633]
[906, 882, 370, 591, 196]
16 | rmse loss: 16.17355513928966 | lr: 0.1
[721, 71, 46, 677, 233]
[791, 296, 81, 875, 238]
18 | rmse loss: 15.518821561439578 | lr: 0.1
[887, 103, 389, 284, 464]
[650, 854, 373, 166, 379]
20 | rmse loss: 15.215453467201952 | lr: 0.1
[363, 214

In [243]:
reg.get_best_score()

11.997442020227814