# Линейная регрессия, полиноминальная, Lasso, Ridge и ElasticNet 

In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from pandas.api.types import is_numeric_dtype
import numpy as np
from pprint import pprint, pformat
import copy
from pathlib import Path

import joblib

from my_lib import *
from my_config import *

In [2]:
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 50) # Устанавливаем максимальное количество отображаемых столбцов равным 50
pd.set_option('display.max_rows', 20) # Устанавливаем максимальное количество отображаемых строк равным 20
pd.options.display.float_format = '{:.2f}'.format # Устанавливаем формат отображения чисел с двумя знаками после запятой
pd.options.mode.use_inf_as_na = True # Настройка режима Pandas для рассмотрения бесконечностей (inf) как пропущенных значений (NA)

# Конфигурация формата отображения графиков в виде векторных изображений
%config InlineBackend.figure_format = 'svg'

# для построения графиков внутри Jupyter Notebook
%matplotlib inline

In [3]:
from sklearn.linear_model import LinearRegression # для построения моделей линейной регрессии
from sklearn.preprocessing import PolynomialFeatures # для преобразования исходных признаков в полиномиальные, для построения моделей полиномиальной регрессии

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression, Ridge, Lasso, ElasticNet

from sklearn.model_selection import GridSearchCV


In [4]:
params = joblib.load(Path(result_foler, params_filename_after_PrepareTarget))

In [None]:
models_scores = pd.DataFrame(columns=["r2_score_train", "r2_score_test",
                                      "mse_train", "mse_test", 
                                      "rmse_train", "rmse_test", 
                                      "mae_train", "mae_test",
                                      "coef", "params"
                                      ])
#models_scores = pd.DataFrame({})

In [5]:
# задаем интервал перебора для alpha
# альфа зависит от размера выборки

start = 10      # Начальное значение диапазона
stop = 1000       # Конечное значение диапазона (не включается в результат)
step = 10       # Шаг между значениями
float_range = np.arange(start, stop, step)
print(float_range)

[ 10  20  30  40  50  60  70  80  90 100 110 120 130 140 150 160 170 180
 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360
 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540
 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720
 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890 900
 910 920 930 940 950 960 970 980 990]


In [None]:
%%time
scalers = ['NoScaler', 'StandardScaler', 'MaxAbsScaler', 'RobustScaler']
#scalers = ['StandardScaler']
#scalers = ['MinMaxScaler']
#scalers = ['MaxAbsScaler']
#scalers = ['RobustScaler']

y_train =joblib.load(Path(result_foler, y_train_template_filename_after_split % ""))
y_test = joblib.load(Path(result_foler, y_test_template_filename_after_split % ""))

scalers = ['NoScaler']
for scaler_name in scalers:
    X_train =joblib.load(Path(result_foler, X_train_template_filename_after_split % scaler_name))
    X_test = joblib.load(Path(result_foler, X_test_template_filename_after_split % scaler_name))

    # -------------------- LinearRegression() --------------------
    lin_reg = LinearRegression()
    lin_reg_fit = lin_reg.fit(X_train, y_train)
    Y_pred_train_lin = lin_reg_fit.predict(X_train) # train
    Y_pred_test_lin = lin_reg_fit.predict(X_test) # test
    add_scores(models_scores, f"{scaler_name}_LinearRegression", 
               y_train, Y_pred_train_lin, y_test, Y_pred_test_lin, 
               pformat(lin_reg_fit.get_params()), f"{lin_reg_fit.coef_}, const={lin_reg_fit.intercept_}")
    #display(models_scores)

    # -------------------- PolynomialFeatures() --------------------
    for d in [2]:
        poly_features = PolynomialFeatures(degree=d) # степень до 7, долго
        X_train_poly = poly_features.fit_transform(X_train)
        X_test_poly = poly_features.transform(X_test)
        poly_reg = LinearRegression()
        poly_reg_fit = poly_reg.fit(X_train_poly, y_train)    
        
        Y_pred_train_poly = poly_reg_fit.predict(X_train_poly)
        Y_pred_test_poly = poly_reg_fit.predict(X_test_poly)    

        add_scores(models_scores, f"{scaler_name}_PolynomialFeatures(degree={d})", 
                   y_train, Y_pred_train_poly, y_test, Y_pred_test_poly, 
                   pformat(poly_reg_fit.get_params()), f"{poly_reg_fit.coef_}, const={poly_reg_fit.intercept_}")
        #display(models_scores)
        

    # -------------------- ElasticNet() --------------------
    param_grid = {
        'alpha': [0.00005, 0.0005, 0.001, 0.01, 0.05, 0.06, 0.08, 1, 2, 3],
        'l1_ratio': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    }
    elastic_net = GridSearchCV(ElasticNet(), param_grid, scoring='r2', cv=10)        
    res_elastic_net_model = elastic_net.fit(X_train, y_train)

    # построим регрессию гребневую L2 с оптимальным параметром регуляризации, который мы подобрали перебором
    model_reg_elastic = ElasticNet(max_iter=1000, **res_elastic_net_model.best_params_) # alpha — величина регуляризации

    # обучение
    model_reg_elastic.fit(X_train, y_train)
    y_pred_test_elastic = model_reg_elastic.predict(X_test)
    y_pred_train_elastic = model_reg_elastic.predict(X_train)

    add_scores(models_scores, f"{scaler_name}_ElasticNet", 
               y_train, y_pred_train_elastic, y_test, y_pred_test_elastic, 
               pformat(res_elastic_net_model.best_params_), f"{model_reg_elastic.coef_}, const={model_reg_elastic.intercept_}")
    #display(models_scores)
    
    # -------------------- Ridge() --------------------
    param_grid = {
        'alpha': float_range
    }

    ridge = GridSearchCV(Ridge(), param_grid, scoring='r2', cv=10)        
    res_ridge_model = ridge.fit(X_train, y_train)

    # построим регрессию гребневую L2 с оптимальным параметром регуляризации, который мы подобрали перебором
    model_ridge = Ridge(max_iter=1000, **res_ridge_model.best_params_) # alpha — величина регуляризации

    # обучение
    model_ridge.fit(X_train, y_train)
    y_pred_test_elastic = model_ridge.predict(X_test)
    y_pred_train_elastic = model_ridge.predict(X_train)

    add_scores(models_scores, f"{scaler_name}_Ridge", 
               y_train, y_pred_train_elastic, y_test, y_pred_test_elastic, 
               pformat(res_ridge_model.best_params_), f"{model_ridge.coef_}, const={model_ridge.intercept_}")
    #display(models_scores)    
    
    # -------------------- Lasso() --------------------
    param_grid = {
        'alpha': float_range
    }

    lasso = GridSearchCV(Lasso(), param_grid, scoring='r2', cv=10)        
    res_lasso_model = lasso.fit(X_train, y_train)

    # построим регрессию гребневую L2 с оптимальным параметром регуляризации, который мы подобрали перебором
    model_lasso = Lasso(max_iter=1000, **res_lasso_model.best_params_) # alpha — величина регуляризации

    # обучение
    model_lasso.fit(X_train, y_train)
    y_pred_test_elastic = model_lasso.predict(X_test)
    y_pred_train_elastic = model_lasso.predict(X_train)

    add_scores(models_scores, f"{scaler_name}_Lasso", 
               y_train, y_pred_train_elastic, y_test, y_pred_test_elastic, 
               pformat(res_lasso_model.best_params_), f"{model_lasso.coef_}, const={model_lasso.intercept_}")

    display(models_scores)

Unnamed: 0,r2_score_train,r2_score_test,mse_train,mse_test,rmse_train,rmse_test,mae_train,mae_test,coef,params
NoScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-2.95775635e-01 -1.36629698e-05 -7.72765286e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=2),0.003853,0.000908,499348.093104,500846.440837,706.645663,707.705052,567.146356,567.867477,[-1.40996365e-03 1.20706909e+02 -1.13415401e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=3),0.010927,-0.015099,495802.347018,508870.843607,704.132336,713.351837,564.720963,571.704796,[ 29.89222004 -0.26230759 -12.15680861 ... 1...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_ElasticNet,0.002159,0.001856,500197.20226,500370.966523,707.24621,707.369045,567.756671,567.691477,[-2.94968404e-01 -1.29094741e-05 -7.28412455e-...,"{'alpha': 0.08, 'l1_ratio': 0.5}"
NoScaler_Ridge,0.002166,0.001837,500193.980641,500380.403497,707.243933,707.375716,567.745163,567.689678,[-2.95697774e-01 -1.39988976e-05 -7.69530454e-...,{'alpha': 990}
NoScaler_Lasso,0.001729,0.001653,500413.055334,500472.903446,707.398795,707.441095,567.942433,567.793794,[-2.42268144e-01 8.28590821e-05 -0.00000000e+...,{'alpha': 10}


Unnamed: 0,r2_score_train,r2_score_test,mse_train,mse_test,rmse_train,rmse_test,mae_train,mae_test,coef,params
NoScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-2.95775635e-01 -1.36629698e-05 -7.72765286e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=2),0.003853,0.000908,499348.093104,500846.440837,706.645663,707.705052,567.146356,567.867477,[-1.40996365e-03 1.20706909e+02 -1.13415401e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=3),0.010927,-0.015099,495802.347018,508870.843607,704.132336,713.351837,564.720963,571.704796,[ 29.89222004 -0.26230759 -12.15680861 ... 1...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_ElasticNet,0.002159,0.001856,500197.20226,500370.966523,707.24621,707.369045,567.756671,567.691477,[-2.94968404e-01 -1.29094741e-05 -7.28412455e-...,"{'alpha': 0.08, 'l1_ratio': 0.5}"
NoScaler_Ridge,0.002166,0.001837,500193.980641,500380.403497,707.243933,707.375716,567.745163,567.689678,[-2.95697774e-01 -1.39988976e-05 -7.69530454e-...,{'alpha': 990}
NoScaler_Lasso,0.001729,0.001653,500413.055334,500472.903446,707.398795,707.441095,567.942433,567.793794,[-2.42268144e-01 8.28590821e-05 -0.00000000e+...,{'alpha': 10}
StandardScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-3.99728896 -0.07956699 -1.12697186 -0.661788...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=2),0.003853,0.000907,499348.398218,500846.990304,706.645879,707.70544,567.147459,567.868853,[-1.33824138e+12 -3.67161077e+00 -7.19223708e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=3),0.015126,-0.011126,493697.456327,506879.208002,702.636077,711.954499,563.613853,570.786178,[ 9.88706151e+13 2.11904873e+12 -7.08280531e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_ElasticNet,0.002103,0.001908,500225.496583,500344.908166,707.266213,707.350626,567.795644,567.693859,[-2.85369867 0. -0.23643004 -0. ...,"{'alpha': 1, 'l1_ratio': 0.9}"


Unnamed: 0,r2_score_train,r2_score_test,mse_train,mse_test,rmse_train,rmse_test,mae_train,mae_test,coef,params
NoScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-2.95775635e-01 -1.36629698e-05 -7.72765286e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=2),0.003853,0.000908,499348.093104,500846.440837,706.645663,707.705052,567.146356,567.867477,[-1.40996365e-03 1.20706909e+02 -1.13415401e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=3),0.010927,-0.015099,495802.347018,508870.843607,704.132336,713.351837,564.720963,571.704796,[ 29.89222004 -0.26230759 -12.15680861 ... 1...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_ElasticNet,0.002159,0.001856,500197.20226,500370.966523,707.24621,707.369045,567.756671,567.691477,[-2.94968404e-01 -1.29094741e-05 -7.28412455e-...,"{'alpha': 0.08, 'l1_ratio': 0.5}"
NoScaler_Ridge,0.002166,0.001837,500193.980641,500380.403497,707.243933,707.375716,567.745163,567.689678,[-2.95697774e-01 -1.39988976e-05 -7.69530454e-...,{'alpha': 990}
NoScaler_Lasso,0.001729,0.001653,500413.055334,500472.903446,707.398795,707.441095,567.942433,567.793794,[-2.42268144e-01 8.28590821e-05 -0.00000000e+...,{'alpha': 10}
StandardScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-3.99728896 -0.07956699 -1.12697186 -0.661788...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=2),0.003853,0.000907,499348.398218,500846.990304,706.645879,707.70544,567.147459,567.868853,[-1.33824138e+12 -3.67161077e+00 -7.19223708e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=3),0.015126,-0.011126,493697.456327,506879.208002,702.636077,711.954499,563.613853,570.786178,[ 9.88706151e+13 2.11904873e+12 -7.08280531e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_ElasticNet,0.002103,0.001908,500225.496583,500344.908166,707.266213,707.350626,567.795644,567.693859,[-2.85369867 0. -0.23643004 -0. ...,"{'alpha': 1, 'l1_ratio': 0.9}"


Unnamed: 0,r2_score_train,r2_score_test,mse_train,mse_test,rmse_train,rmse_test,mae_train,mae_test,coef,params
NoScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-2.95775635e-01 -1.36629698e-05 -7.72765286e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=2),0.003853,0.000908,499348.093104,500846.440837,706.645663,707.705052,567.146356,567.867477,[-1.40996365e-03 1.20706909e+02 -1.13415401e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=3),0.010927,-0.015099,495802.347018,508870.843607,704.132336,713.351837,564.720963,571.704796,[ 29.89222004 -0.26230759 -12.15680861 ... 1...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_ElasticNet,0.002159,0.001856,500197.20226,500370.966523,707.24621,707.369045,567.756671,567.691477,[-2.94968404e-01 -1.29094741e-05 -7.28412455e-...,"{'alpha': 0.08, 'l1_ratio': 0.5}"
NoScaler_Ridge,0.002166,0.001837,500193.980641,500380.403497,707.243933,707.375716,567.745163,567.689678,[-2.95697774e-01 -1.39988976e-05 -7.69530454e-...,{'alpha': 990}
NoScaler_Lasso,0.001729,0.001653,500413.055334,500472.903446,707.398795,707.441095,567.942433,567.793794,[-2.42268144e-01 8.28590821e-05 -0.00000000e+...,{'alpha': 10}
StandardScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-3.99728896 -0.07956699 -1.12697186 -0.661788...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=2),0.003853,0.000907,499348.398218,500846.990304,706.645879,707.70544,567.147459,567.868853,[-1.33824138e+12 -3.67161077e+00 -7.19223708e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=3),0.015126,-0.011126,493697.456327,506879.208002,702.636077,711.954499,563.613853,570.786178,[ 9.88706151e+13 2.11904873e+12 -7.08280531e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_ElasticNet,0.002103,0.001908,500225.496583,500344.908166,707.266213,707.350626,567.795644,567.693859,[-2.85369867 0. -0.23643004 -0. ...,"{'alpha': 1, 'l1_ratio': 0.9}"


Unnamed: 0,r2_score_train,r2_score_test,mse_train,mse_test,rmse_train,rmse_test,mae_train,mae_test,coef,params
NoScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-2.95775635e-01 -1.36629698e-05 -7.72765286e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=2),0.003853,0.000908,499348.093104,500846.440837,706.645663,707.705052,567.146356,567.867477,[-1.40996365e-03 1.20706909e+02 -1.13415401e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=3),0.010927,-0.015099,495802.347018,508870.843607,704.132336,713.351837,564.720963,571.704796,[ 29.89222004 -0.26230759 -12.15680861 ... 1...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_ElasticNet,0.002159,0.001856,500197.20226,500370.966523,707.24621,707.369045,567.756671,567.691477,[-2.94968404e-01 -1.29094741e-05 -7.28412455e-...,"{'alpha': 0.08, 'l1_ratio': 0.5}"
NoScaler_Ridge,0.002166,0.001837,500193.980641,500380.403497,707.243933,707.375716,567.745163,567.689678,[-2.95697774e-01 -1.39988976e-05 -7.69530454e-...,{'alpha': 990}
NoScaler_Lasso,0.001729,0.001653,500413.055334,500472.903446,707.398795,707.441095,567.942433,567.793794,[-2.42268144e-01 8.28590821e-05 -0.00000000e+...,{'alpha': 10}
StandardScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-3.99728896 -0.07956699 -1.12697186 -0.661788...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=2),0.003853,0.000907,499348.398218,500846.990304,706.645879,707.70544,567.147459,567.868853,[-1.33824138e+12 -3.67161077e+00 -7.19223708e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=3),0.015126,-0.011126,493697.456327,506879.208002,702.636077,711.954499,563.613853,570.786178,[ 9.88706151e+13 2.11904873e+12 -7.08280531e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_ElasticNet,0.002103,0.001908,500225.496583,500344.908166,707.266213,707.350626,567.795644,567.693859,[-2.85369867 0. -0.23643004 -0. ...,"{'alpha': 1, 'l1_ratio': 0.9}"


CPU times: user 8h 40min 59s, sys: 2h 5min 11s, total: 10h 46min 11s
Wall time: 1h 11min 56s


In [7]:
display(models_scores)

Unnamed: 0,r2_score_train,r2_score_test,mse_train,mse_test,rmse_train,rmse_test,mae_train,mae_test,coef,params
NoScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-2.95775635e-01 -1.36629698e-05 -7.72765286e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=2),0.003853,0.000908,499348.093104,500846.440837,706.645663,707.705052,567.146356,567.867477,[-1.40996365e-03 1.20706909e+02 -1.13415401e-...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_PolynomialFeatures(degree=3),0.010927,-0.015099,495802.347018,508870.843607,704.132336,713.351837,564.720963,571.704796,[ 29.89222004 -0.26230759 -12.15680861 ... 1...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
NoScaler_ElasticNet,0.002159,0.001856,500197.20226,500370.966523,707.24621,707.369045,567.756671,567.691477,[-2.94968404e-01 -1.29094741e-05 -7.28412455e-...,"{'alpha': 0.08, 'l1_ratio': 0.5}"
NoScaler_Ridge,0.002166,0.001837,500193.980641,500380.403497,707.243933,707.375716,567.745163,567.689678,[-2.95697774e-01 -1.39988976e-05 -7.69530454e-...,{'alpha': 990}
NoScaler_Lasso,0.001729,0.001653,500413.055334,500472.903446,707.398795,707.441095,567.942433,567.793794,[-2.42268144e-01 8.28590821e-05 -0.00000000e+...,{'alpha': 10}
StandardScaler_LinearRegression,0.002166,0.001835,500193.941475,500381.728489,707.243905,707.376652,567.743943,567.689547,[-3.99728896 -0.07956699 -1.12697186 -0.661788...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=2),0.003853,0.000907,499348.398218,500846.990304,706.645879,707.70544,567.147459,567.868853,[-1.33824138e+12 -3.67161077e+00 -7.19223708e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_PolynomialFeatures(degree=3),0.015126,-0.011126,493697.456327,506879.208002,702.636077,711.954499,563.613853,570.786178,[ 9.88706151e+13 2.11904873e+12 -7.08280531e+...,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
StandardScaler_ElasticNet,0.002103,0.001908,500225.496583,500344.908166,707.266213,707.350626,567.795644,567.693859,[-2.85369867 0. -0.23643004 -0. ...,"{'alpha': 1, 'l1_ratio': 0.9}"


In [8]:
print("Коэффициенты моделей:")
for i in models_scores.index:
    print(f'{i}: {models_scores.loc[i]["coef"]}')

Коэффициенты моделей:
NoScaler_LinearRegression: [-2.95775635e-01 -1.36629698e-05 -7.72765286e-01 -5.94775651e-01
 -6.50742008e-02 -2.89393616e+00  3.26861851e+01  3.70497966e-01
  2.79125809e-03 -6.45158103e-01  1.59024802e+00 -4.26468420e-01
 -4.54424218e+00  1.13078296e+00 -7.18117523e-02 -1.93179347e+00
  3.56595109e+00  6.04841032e+00  7.83203681e-01  2.00492472e+00
 -3.22205547e+00 -1.61973621e+00 -5.45055937e-01 -6.53369773e+00
 -9.76220798e-01  5.06620339e+00], const=10171.739459929442
NoScaler_PolynomialFeatures(degree=2): [-1.40996365e-03  1.20706909e+02 -1.13415401e-01  1.19641583e+03
  5.73868607e+02  1.03509039e+02 -3.17820118e+02 -5.14838480e+03
  1.45852010e+02 -8.64604773e+00 -1.18151842e+03 -5.25065539e+01
  6.08767022e+01 -1.96145201e+04  1.74547691e+02 -5.11618164e+01
  3.12324354e+02  3.00873392e+03 -1.30155583e+03  7.01446394e+02
  1.24183672e+03 -4.36757302e+02  1.43206373e+03  3.49696378e+02
 -2.42571706e+03  5.78859548e+02  1.27022580e+03 -1.49232531e-03
 -1.146

In [9]:
print("Гиперпараметры моделей:")
for i in models_scores.index:
    print(f'{i}: {models_scores.loc[i]["params"]}')

Гиперпараметры моделей:
NoScaler_LinearRegression: {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
NoScaler_PolynomialFeatures(degree=2): {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
NoScaler_PolynomialFeatures(degree=3): {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
NoScaler_ElasticNet: {'alpha': 0.08, 'l1_ratio': 0.5}
NoScaler_Ridge: {'alpha': 990}
NoScaler_Lasso: {'alpha': 10}
StandardScaler_LinearRegression: {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
StandardScaler_PolynomialFeatures(degree=2): {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
StandardScaler_PolynomialFeatures(degree=3): {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
StandardScaler_ElasticNet: {'alpha': 1, 'l1_ratio': 0.9}
StandardScaler_Ridge: {'alpha': 990}
StandardScaler_Lasso: {'alpha': 10}
MinMaxScaler_LinearRegression: {'copy_X': True, 'fit_

In [10]:
print("r2_score моделей:")
for i in models_scores.index:
    print(f'{i:45}| train: {round(models_scores.loc[i]["r2_score_train"],8):10}  | test: {round(models_scores.loc[i]["r2_score_test"],8):10}')

r2_score моделей:
NoScaler_LinearRegression                    | train: 0.00216595  | test: 0.00183473
NoScaler_PolynomialFeatures(degree=2)        | train: 0.00385332  | test: 0.00090772
NoScaler_PolynomialFeatures(degree=3)        | train: 0.01092671  | test: -0.01509942
NoScaler_ElasticNet                          | train: 0.00215944  | test:  0.0018562
NoScaler_Ridge                               | train: 0.00216587  | test: 0.00183737
NoScaler_Lasso                               | train: 0.00172884  | test: 0.00165285
StandardScaler_LinearRegression              | train: 0.00216595  | test: 0.00183473
StandardScaler_PolynomialFeatures(degree=2)  | train: 0.00385272  | test: 0.00090662
StandardScaler_PolynomialFeatures(degree=3)  | train: 0.01512575  | test: -0.01112649
StandardScaler_ElasticNet                    | train:   0.002103  | test: 0.00190818
StandardScaler_Ridge                         | train: 0.00216591  | test: 0.00183613
StandardScaler_Lasso                         

В результате были построены модели:
  * линейная регрессия
  * полиноминальная регрессия с degree=2, 3
  * ElasticNet с подбором гиперпараметров
  * Ridge с подбором гиперпараметров
  * Lasso с подбором гиперпараметров

для нескольких наборов данных, отличающихся примененным алгоритмом нормализации данных - без нормализации, 'StandardScaler', 'MinMaxScaler', 'MaxAbsScaler', 'RobustScaler'.

Также проверялись разные подходы к подготовке набора данных:
  * с удалением столбцов с большой долей пропусков
  * с заполнением пропусков модой и с удалением строк, в которых есть пропуски
  * разные настройки OneHotEncoder


Результат был примерно один и тот же. В частности метрика r2_score колебалась в диапазоне от 0.0016 до 0.0040, что крайне мало и показывает, что построенные модели непригодны для прогнозирования целевой функции.
Следовательно:
* либо методы регрессии не подходят к решению этой задаче
* либо где-то ошибка в коде
* либо в принципе неверный подход к подготовке данных и/или обучению моделей
* либо неверно интерпретирую метрики

Предполагаю, что какая-то из двух последних причин. Нужна обратная связь.
