In [81]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=1000, n_features=14, n_informative=10, noise=15, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [100]:
class MyLineReg():
    def __init__(self, n_iter, learning_rate, metric=None):
        """ 
        metric: r2, mse, mae, mape, rmse
        """
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = None
        self.metric = metric

        self.metric_map = {
            'mae': self.mae,
            'rmse': self.rmse,
            'r2': self.r2,
            'mape': self.mape,
            'mse': self.mse
        }

        if metric is not None and metric not in self.metric_map:
            raise ValueError(f"Invalid metric name '{metric}'. Supported metrics are: {', '.join(self.metric_map.keys())}")

    def predict(self, X):
        X = X.copy()
        X.insert(0, "bias", 1)
        return X @ self.weights

    def mae(self, X, y): # mean absolute error
        y_pred = X @ self.weights
        return 1 / len(y_pred) * np.sum(abs(y - y_pred))
    
    def rmse(self, X, y): # root mean square error
        return self.mse(X, y) ** 0.5
    
    def r2(self, X, y): #determination coef
        y_pred = X @ self.weights
        return 1 - np.sum((y - y_pred) ** 2) / np.sum((y - np.mean(y)) ** 2)
    
    def mape(self, X, y): #mean abs percentage error
        y_pred = X @ self.weights
        return 100 / len(y_pred) * np.sum(abs((y - y_pred) / y))

    def mse(self, X, y):
        y_pred = X @ self.weights
        error = y_pred - y
        return np.sum((error) ** 2) / len(y_pred)
    
    def gr_mse(self, X, y):
        y_pred = X @ self.weights
        error = y_pred - y
        return (2 / len(y_pred)) * X.T @ (error)

    def fit(self, X, y, verbose=False):
        X = X.copy()
        y = y.copy()
        X.insert(0, "bias", 1)
        if self.weights is None:    
            self.weights = np.ones(X.shape[1])

        for epoch in range(1, self.n_iter + 1):
            self.weights -= self.learning_rate * self.gr_mse(X, y)

            if verbose and epoch == 1:
                if self.metric:
                    metric_func = self.metric_map.get(self.metric)
                    score = metric_func(X, y)
                    print(f"start | loss: {self.mse(X, y)} | {self.metric}: {score}")
                else:
                    print(f"start | loss: {self.mse(X, y)}")

            if verbose and (epoch % verbose == 0):
                if self.metric:
                    metric_func = self.metric_map.get(self.metric)
                    score = metric_func(X, y)
                    print(f"{epoch} | loss: {self.mse(X, y)} | {self.metric}: {score}")
                else:
                    print(f"{epoch} | loss: {self.mse(X, y)}")
        if self.metric:
            self.best_score = self.metric_map.get(self.metric)(X, y)

    def get_coef(self):
        return self.weights[1:]
    
    def get_best_score(self):
        return self.best_score

    def __str__(self) -> str:
        return f"{self.__class__.__name__} class: " + ", ".join("%s=%s" % item for item in vars(self).items())
    
    def __repr__(self) -> str:
        return f"{self.__class__.__name__} class: " + ", ".join("%s=%s" % item for item in vars(self).items())

In [106]:
reg = MyLineReg(300, 0.01, 'mae')

In [107]:
reg.fit(X, y, 30)

start | loss: 19822.27161599988 | mae: 111.55513679241831
30 | loss: 6432.620103943331 | mae: 63.47483056684259
60 | loss: 2149.3809319907755 | mae: 36.61608218040928
90 | loss: 832.0335922185088 | mae: 22.808617356694597
120 | loss: 419.33129445725154 | mae: 16.268075407583645
150 | loss: 287.6864822359632 | mae: 13.510161579138433
180 | loss: 244.9565262138996 | mae: 12.51958267597529
210 | loss: 230.8547104766008 | mae: 12.195035785301014
240 | loss: 226.12735266068472 | mae: 12.074258717797877
270 | loss: 224.51929610084292 | mae: 12.025147041427331
300 | loss: 223.96488598569044 | mae: 12.008966479550269


In [108]:
reg.get_best_score()

12.008966479550269