In [222]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_diabetes

In [154]:
import sys
import os

# Add Folder Z (parent directory) to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [4]:
data = load_diabetes()

In [13]:
X = data.data
y = data.target

In [258]:
model = Ridge(alpha=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(model.intercept_)
print(model.coef_)
print('-' * 20)
print(f'MSE: {mean_squared_error(y_test, y_pred)}')

152.15977674264442
[  30.08027851  -56.94656112  272.40309751  171.47193794    2.37245896
  -21.02240868 -147.68188837  123.94359636  220.75763022  122.75923171]
--------------------
MSE: 3553.204135737771


In [269]:
if 'utils.MyRegression' in sys.modules:
    del sys.modules['utils.MyRegression']

# Now re-import the class
from utils.MyRegression import MyLinearRegression

In [280]:
my_model = MyLinearRegression(learning_rate=0.01, l2_ratio=1, early_stopping_rounds=100000, verbose=10000, )

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
my_model.fit(X_train, y_train)
y_pred = my_model.predict(X_test)

print(my_model.coef_)
print('-' * 20)
print(f'MSE: {mean_squared_error(y_test, y_pred)}')

Iteration: 0, MSE: 14535.162071068571
Iteration: 10000, MSE: 2913.206895170198
Iteration: 20000, MSE: 2913.206895170198
Iteration: 30000, MSE: 2913.206895170198
Iteration: 40000, MSE: 2913.206895170198
Iteration: 50000, MSE: 2913.206895170198
Iteration: 60000, MSE: 2913.206895170198
Iteration: 70000, MSE: 2913.206895170198
Iteration: 80000, MSE: 2913.206895170198
Iteration: 90000, MSE: 2913.206895170198
[ 1.52426811e+02  3.23918823e-01  1.15284511e-01  1.05642826e+00
  7.58249838e-01  3.54078590e-01  3.11363299e-01 -7.61133936e-01
  8.22483824e-01  9.90790347e-01  7.43452213e-01]
--------------------
MSE: 6282.781898169247


In [263]:
my_model.best_iteration

170

In [239]:
print([f"{coef:.6f}" for coef in my_model.coef_])

['50.808853', '0.303603', '0.138292', '1.057717', '0.759856', '0.298220', '0.264714', '-0.803350', '0.837406', '0.980922', '0.809666']


In [274]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge, SGDRegressor
import matplotlib.pyplot as plt

In [289]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=123)

# Обучение пользовательской модели
my_model = MyLinearRegression(l1_ratio=1, learning_rate=0.01, verbose=100)
my_model.fit(X_train, y_train)
y_pred = my_model.predict(X_test)

print(f'Custom Model Coefficients: {my_model.coef_}')
print('-' * 20)
print(f'Custom Model MSE: {mean_squared_error(y_test, y_pred)}')

# Обучение Ridge-регрессии из scikit-learn
ridge_model = Ridge(alpha=1, fit_intercept=True)
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_test)
print(f'Scikit-learn Ridge MSE: {mean_squared_error(y_test, y_pred_ridge)}')

ridge_model = Lasso(alpha=1, fit_intercept=True)
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_test)
print(f'Scikit-learn Lasso MSE: {mean_squared_error(y_test, y_pred_ridge)}')

ridge_model = LinearRegression()
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_test)
print(f'Scikit-learn Linear Regression MSE: {mean_squared_error(y_test, y_pred_ridge)}')

Iteration: 0, MSE: 14537.805404441848
Iteration: 100, MSE: 3164.746753055705
Iteration: 200, MSE: 1782.0184136749126
Iteration: 300, MSE: 1593.6802789717544
Iteration: 400, MSE: 1566.92958635147
Iteration: 500, MSE: 1562.7171683662325
Iteration: 600, MSE: 1561.8356225421248
Iteration: 700, MSE: 1561.4977393651684
Iteration: 800, MSE: 1561.296162938811
Iteration: 900, MSE: 1561.122179036918
Iteration: 1000, MSE: 1560.9692927066087
Iteration: 1100, MSE: 1560.8372976137578
Iteration: 1200, MSE: 1560.7117702785258
Iteration: 1300, MSE: 1560.603849307717
Iteration: 1400, MSE: 1560.4964951314023
Iteration: 1500, MSE: 1560.399681832482
Iteration: 1600, MSE: 1560.3013128041007
Iteration: 1700, MSE: 1560.2113847929584
Iteration: 1800, MSE: 1560.1289042871751
Iteration: 1900, MSE: 1560.0528140981353
Iteration: 2000, MSE: 1559.9833313219428
Iteration: 2100, MSE: 1559.907666794054
Iteration: 2200, MSE: 1559.8391627340745
Iteration: 2300, MSE: 1559.7715336606452
Iteration: 2400, MSE: 1559.713701440

In [290]:
my_model.best_iteration

3507