In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

# Загрузка данных

In [3]:
class Loader:
    def __init__(self):
        data = pd.read_csv('bikes_rent.csv')

        data = data.dropna()
        
        features = data.drop(columns=['cnt'], axis=1)
        target = data['cnt']

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(features, target, test_size=0.3, random_state=12)

In [4]:
loader = Loader()
X_train, X_test, y_train, y_test = loader.X_train, loader.X_test, loader.y_train, loader.y_test

# Метрики

In [5]:
# mse
def mse(y_pred, y_targ):
    y_pred = np.array(y_pred)
    y_targ = np.array(y_targ)
    return np.mean(np.square(y_targ - y_pred))

# rmse
def rmse(y_pred, y_targ):
    y_pred = np.array(y_pred)
    y_targ = np.array(y_targ)
    return (np.sum((y_targ - y_pred) ** 2) / len(y_pred)) ** 0.5

# r2
def r2(y_pred, y_targ):
    y_pred = np.array(y_pred)
    y_targ = np.array(y_targ)
    
    sse = np.sum((y_pred - y_targ) ** 2)
    sst = np.sum((y_pred - np.mean(y_pred)) ** 2)
    return abs(1 - sse / sst)

# Модель

In [6]:
class RidgeRegression:
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        self.coef_ = None

    def train(self, X, y):
        X = np.array(X)
        y = np.array(y)

        U, S, Vt = np.linalg.svd(X, full_matrices=False)

        S_reg = np.diag(S / (S ** 2 + self.alpha))

        self.coef_ = Vt.T @ S_reg @ U.T @ y

    def predict(self, X):
        return np.array(X) @ self.coef_

    @staticmethod
    def select_optimal_alpha(X, y, alphas):
        best_alpha = None
        best_error = float('inf')

        for alpha in alphas:
            model = RidgeRegression(alpha=alpha)
            model.train(X, y)
            predictions = model.predict(X)
            error = np.mean((y - predictions) ** 2)

            if error < best_error:
                best_error = error
                best_alpha = alpha

        return best_alpha

In [7]:
# подбор регулязационного параметра
alphas = [0.1, 1, 10, 100]
optimal_alpha = RidgeRegression.select_optimal_alpha(X_train, y_train, alphas)

In [8]:
%%time
# ручной алгоритм
model = RidgeRegression(alpha=optimal_alpha)
model.train(X_train, y_train)
pred = pd.Series(model.predict(X_test))
print(f'r2: {r2(pred, y_test)}')
print(f'mse: {mse(pred, y_test)}')
print(f'rmse: {rmse(pred, y_test)}')

r2: 0.7788606268120315
mse: 798058.4887337552
rmse: 893.341193908439
CPU times: total: 0 ns
Wall time: 2.99 ms


In [9]:
%%time
# библиотечный алгоритм
ridge_model = Ridge(alpha=optimal_alpha)
ridge_model.fit(X_train, y_train)
pred = ridge_model.predict(X_test)
print(f'r2: {r2(pred, y_test)}')
print(f'mse: {mse(pred, y_test)}')
print(f'rmse: {rmse(pred, y_test)}')

r2: 0.7800864825953512
mse: 729181.039387566
rmse: 853.9209795921201
CPU times: total: 15.6 ms
Wall time: 15.6 ms
