In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import r2_score

In [2]:
RANDOM_STATE = 42
TEST_SIZE = 0.25

In [3]:
class LinearRegressor:
    
    def __init__(self, reg_coef = 0.0) -> None:
        self.lambda_ = reg_coef
        self.weights = None

    def fit(self, X_train: np.array, y_train: np.array) -> None:
        X_train = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
        self.weights = np.linalg.inv(X_train.T @ X_train + self.lambda_*np.ones(len(X_train[0]))) @ X_train.T @ y_train

    def predict(self, X_test: np.array) -> np.array:
        X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

        pred = X_test @ self.weights

        return pred

In [4]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE
    )

In [6]:
sc = StandardScaler()

sc.fit(X_train)

X_train = pd.DataFrame(sc.transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(sc.transform(X_test), columns=X_test.columns)

In [7]:
lr = LinearRegressor()
lr.fit(X_train, y_train)
pred = lr.predict(X_test)
r2_score(y_test, pred)

0.5910509795491352

In [8]:
lr = LinearRegressor(100)
lr.fit(X_train, y_train)
pred = lr.predict(X_test)
r2_score(y_test, pred)

0.5923211097894455