In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.ticker as mtick
plt.style.use('ggplot')
import seaborn as sns
pd.options.display.float_format = '{:,.0f}'.format
import dataframe_image as dfi
from scipy import stats
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso, LassoCV, ElasticNet, ElasticNetCV
lm = LinearRegression()
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
encoder = OneHotEncoder(sparse_output=False)
from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
#Import Imputeed Ames Data
Ames = pd.read_csv('Ames_HousePrice_Imputed.csv')

#Update with new Ames Data
Ames_Columns_Numeric = Ames[[
    '1stFlrSF', '2ndFlrSF', '3SsnPorch', 'BedroomAbvGr', 'BsmtFinSF1', 'BsmtFinSF2',
    'BsmtFullBath', 'BsmtHalfBath', 'BsmtUnfSF', 'EnclosedPorch', 'Fireplaces', 'FullBath',
    'GarageArea', 'GarageCars', 'GarageYrBlt', 'GrLivArea', 'HalfBath', 'KitchenAbvGr',
    'LotArea', 'LotFrontage', 'LowQualFinSF', 'MasVnrArea', 'MiscVal', 'MoSold',
    'OpenPorchSF', 'PoolArea', 'ScreenPorch', 'TotalBsmtSF', 'TotRmsAbvGrd', 'WoodDeckSF',
    'YearBuilt', 'YearRemodAdd', 'YrSold'
]]
Ames_Columns_Ordinal = Ames[[ 
    'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'BsmtQual',
    'Electrical', 'ExterCond', 'ExterQual', 'Fence', 'FireplaceQu', 'Functional',
    'GarageCond', 'GarageFinish', 'GarageQual', 'HeatingQC', 'KitchenQual',
    'LandSlope', 'LotShape', 'OverallCond', 'OverallQual', 'PavedDrive',
    'PoolQC', 'Utilities'
]]
Ames_Columns_Nominal = Ames[[ 
    'Alley', 'BldgType', 'CentralAir', 'Condition1', 'Condition2',
    'Exterior1st', 'Exterior2nd', 'Foundation', 'GarageType', 'Heating',
    'HouseStyle', 'LandContour', 'LotConfig', 'MasVnrType', 'MiscFeature',
    'MSSubClass', 'MSZoning', 'Neighborhood', 'RoofMatl', 'RoofStyle',
    'SaleCondition', 'SaleType', 'Street'
]].astype(str)

#Combination Table of Numeric & Ordinal Columns
Ames_Columns_Numeric_Ordinal = Ames[
    list(Ames_Columns_Numeric.columns) + list(Ames_Columns_Ordinal.columns)
]
#Combination Table of Nominal & Ordinal Columns
Ames_Columns_Nominal_Ordinal = Ames[
    list(Ames_Columns_Nominal.columns) + list(Ames_Columns_Ordinal.columns)
]

In [3]:
Y = Ames['SalePrice']
lm = LinearRegression()
encoder = OneHotEncoder(drop='first', sparse_output=False)

In [4]:
encoder_nominal = OneHotEncoder(drop='first', sparse_output=False)
encoder_ordinal = OneHotEncoder(drop='first', sparse_output=False)
Nominal_cols = Ames_Columns_Nominal.columns
Ordinal_cols = Ames_Columns_Ordinal.columns
Numeric_cols = Ames_Columns_Numeric.columns

# --- One-Hot Encode Nominal ---
Ames_Optimal_Nominal_One_Hot = encoder_nominal.fit_transform(Ames[Nominal_cols])
Ames_Optimal_Nominal_One_Hot_df = pd.DataFrame(
    Ames_Optimal_Nominal_One_Hot,
    columns=encoder_nominal.get_feature_names_out(Nominal_cols),
    index=Ames.index
)
#One-Hot Encode Ordinal
Ames_Optimal_Ordinal_One_Hot = encoder_ordinal.fit_transform(Ames[Ordinal_cols])
Ames_Optimal_Ordinal_One_Hot_df = pd.DataFrame(
    Ames_Optimal_Ordinal_One_Hot,
    columns=encoder_ordinal.get_feature_names_out(Ordinal_cols),
    index=Ames.index
)
#Merge Nominal & Ordinal Encodings
Ames_Optimal_Categorical_One_Hot = pd.merge(
    Ames_Optimal_Nominal_One_Hot_df,
    Ames_Optimal_Ordinal_One_Hot_df,
    left_index=True,
    right_index=True,
    how='left'
)
#Merge with Numeric Features
Ames_MLR_Optimal_Columns = pd.merge(
    Ames[Numeric_cols],
    Ames_Optimal_Categorical_One_Hot,
    left_index=True,
    right_index=True,
    how='left'
)

In [5]:
scaler_Y = StandardScaler()
scaler_X = StandardScaler()
Y_scaled = scaler_Y.fit_transform(Y.values.reshape(-1, 1)).flatten()
X_numeric_scaled = pd.DataFrame(
    scaler_X.fit_transform(Ames[Numeric_cols]),
    columns=Numeric_cols,
    index=Ames.index
)
X_scaled = pd.concat([X_numeric_scaled, Ames_Optimal_Categorical_One_Hot], axis=1)

In [6]:
kf = KFold(n_splits=5, shuffle=True, random_state=2)
pipelines = {
    'Ridge': Pipeline(steps=[('regressor', Ridge())]),
    'Lasso': Pipeline(steps=[('regressor', Lasso(max_iter=100))]),
    'ElasticNet': Pipeline(steps=[('regressor', ElasticNet(max_iter=10))])
}
cv_results = {}
for name, pipeline in pipelines.items():
    scores = cross_val_score(pipeline, X_scaled, Y_scaled, cv=kf, scoring='r2', n_jobs=-1)
    cv_results[name] = {
        'mean_R2': float(round(scores.mean(), 4)),
        'fold_R2': [float(round(s, 4)) for s in scores]
    }
print('Pipeline Cross_Val_Score Results (alpha=1)')
cv_results

Pipeline Cross_Val_Score Results (alpha=1)


{'Ridge': {'mean_R2': 0.9092,
  'fold_R2': [0.9119, 0.9331, 0.8414, 0.9273, 0.9323]},
 'Lasso': {'mean_R2': -0.0005,
  'fold_R2': [-0.0004, -0.0009, -0.001, -0.0001, -0.0001]},
 'ElasticNet': {'mean_R2': 0.2697,
  'fold_R2': [0.2789, 0.262, 0.285, 0.2611, 0.2614]}}

In [11]:
#Ridge Regression- Optimal Alpha via GridSearchCV
ridge = Ridge()
alpha_ridge_grid = np.linspace(1, 20, 100)

# Setup Grid Search for Ridge
ridge_grid = GridSearchCV(estimator=pipelines['Ridge'],
                          param_grid={'regressor__alpha': alpha_ridge_grid},
                          verbose=1)
ridge_grid.fit(X_scaled, Y_scaled)
 
#Best alpha & score
print('\nRidge Pipeline GridSearchCV Results\n')
ridge_best_alpha = ridge_grid.best_params_['regressor__alpha']
ridge_best_score = ridge_grid.best_score_
print(f"Best Alpha for Ridge: {ridge_best_alpha}")
print(f"Best CV Score: {round(ridge_best_score, 4)}")

Fitting 5 folds for each of 100 candidates, totalling 500 fits

Ridge Pipeline GridSearchCV Results

Best Alpha for Ridge: 2.5353535353535355
Best CV Score: 0.9169


In [12]:
#Ridge Regression- Optimal Alpha via RidgeCV
ridge = Ridge()
ridge_cv = RidgeCV(alphas=np.linspace(.1, 100, 100), cv=kf, scoring='r2')
ridge_cv.fit(X_scaled, Y_scaled)

print('RidgeCV Results\n')
alpha_opt_ridge = ridge_cv.alpha_
print(f"Optimal Ridge Alpha: {alpha_opt_ridge:.4f}")

#Fit Ridge with optimal alpha
ridge.set_params(alpha=alpha_opt_ridge)
ridge.fit(X_scaled, Y_scaled)
# Coefficients as a Series with feature names
ridge_coefs = pd.Series(ridge.coef_, index=X_scaled.columns)
ridge.score(X_scaled, Y_scaled)

#Scoring & Coefficients
cv_score_ridge = cross_val_score(ridge, X_scaled, Y_scaled, cv=kf, scoring='r2')
print(f"Mean R²: {cv_score_ridge.mean():.4f};  ", f"Fold R² scores: {', '.join([f'{s:.4f}' for s in cv_score_ridge])}")

ridge_coefs_formatted = ridge_coefs.sort_values(key=abs, ascending=False)
pd.set_option('display.float_format', lambda x: f'{x:.4f}')
print("\nTop 20 largest coefficients (by absolute value):")
print(ridge_coefs_formatted.head(20))

#Zero Coefficients
zero_coefs_ridge = ridge_coefs[ridge_coefs == 0].index.tolist()
print(f"\nNumber of features with zero coefficient: {len(zero_coefs_ridge)}")
print("Zero coefficient features:")
print(zero_coefs_ridge)

RidgeCV Results

Optimal Ridge Alpha: 6.1545
Mean R²: 0.9118;   Fold R² scores: 0.9182, 0.9333, 0.8431, 0.9276, 0.9369

Top 20 largest coefficients (by absolute value):
OverallQual_9            0.5830
Condition2_PosN         -0.5190
OverallQual_8            0.3788
Neighborhood_StoneBr     0.3442
RoofMatl_WdShngl         0.3421
Neighborhood_GrnHill     0.3394
Neighborhood_NoRidge     0.2685
Condition2_PosA          0.2590
OverallCond_2           -0.2449
OverallCond_8            0.2433
PoolQC_4                 0.2324
ExterQual_3              0.2239
GrLivArea                0.2065
OverallQual_3           -0.2054
Exterior1st_BrkFace      0.2049
OverallCond_1           -0.2018
BldgType_Twnhs          -0.1967
SaleCondition_Partial    0.1920
Neighborhood_Edwards    -0.1874
OverallCond_7            0.1858
dtype: float64

Number of features with zero coefficient: 0
Zero coefficient features:
[]


In [13]:
#Lasso Regression- Optimal Alpha via GridSearchCV
lasso = Lasso()
alpha_lasso_grid = np.linspace(.0001, .005, 100)
 
# Setup Grid Search for Ridge
lasso_grid = GridSearchCV(estimator=pipelines['Lasso'],
                          param_grid={'regressor__alpha': alpha_lasso_grid},
                          verbose=1)
lasso_grid.fit(X_scaled, Y_scaled)
print('\nLasso Pipeline GridSearchCV Results\n')

#Best alpha & score
lasso_best_alpha = lasso_grid.best_params_['regressor__alpha']
lasso_best_score = lasso_grid.best_score_
print(f"Best Alpha for lasso: {lasso_best_alpha:.6f}")
print(f"Best CV Score {round(lasso_best_score, 4)}")
lasso_cv_results_df = pd.DataFrame(lasso_grid.cv_results_)
lasso_best_row = lasso_cv_results_df.loc[lasso_cv_results_df['param_regressor__alpha'] == lasso_best_alpha]
lasso_fold_scores = [lasso_best_row[f'split{i}_test_score'].values[0] for i in range(5)]
lasso_results_df = pd.DataFrame({
    'Engineering': ['Pre'],
    'Model_Category': ['Penalized_MLR'],
	'Model_Scaling': ['Y, X_Numeric'],
	'Model': ['Lasso'],
    'Alpha': [lasso_best_alpha],
    'Mean_R²': [lasso_best_score],
    **{f'Fold_{i+1}_R²': [lasso_fold_scores[i]] for i in range(5)}
})

Fitting 5 folds for each of 100 candidates, totalling 500 fits

Lasso Pipeline GridSearchCV Results

Best Alpha for lasso: 0.000397
Best CV Score 0.9173


In [30]:
#Lasso Regression- Optimal Alpha via LassoCV
'''
lasso_cv = LassoCV(alphas=np.linspace(.0003, .0004, 100), cv=kf, n_jobs=-1, max_iter=5000)
lasso_cv.fit(X_scaled, Y_scaled)

print('LassoCV Results\n')
alpha_opt_lasso = lasso_cv.alpha_
print(f"Optimal Lasso Alpha: {alpha_opt_lasso:.10f}")

#Fit Lasso with optimal alpha
lasso = Lasso(alpha=alpha_opt_lasso, max_iter=5000)
lasso.fit(X_scaled, Y_scaled)

lasso_coefs = pd.Series(lasso.coef_, index=X_scaled.columns)
lasso_coefs_formatted = lasso_coefs.sort_values(key=abs, ascending=False)
pd.set_option('display.float_format', lambda x: f'{x:.10f}')

#Scores & Coefficients
cv_scores_lasso = cross_val_score(lasso, X_scaled, Y_scaled, cv=kf, scoring='r2')
print(f"\nMean R² Scores: {cv_scores_lasso.mean():.4f};  ", f"Fold R² scores: {', '.join([f'{s:.4f}' for s in cv_scores_lasso])}")

print("\nTop 20 largest coefficients:")
print(lasso_coefs_formatted.head(20))

#Zero Coefficients
zero_coefs_lasso = lasso_coefs[lasso_coefs == 0].index.tolist()
non_zero_coefs_lasso = lasso_coefs[~lasso_coefs.index.isin(zero_coefs_lasso)]
print(f"\nNumber of features with Non-Zero coefficient: {len(non_zero_coefs_lasso)}")
print(f"\nNumber of features with Zero coefficient: {len(zero_coefs_lasso)}")
print("Zero coefficient features:")
print(zero_coefs_lasso)
'''

LassoCV Results

Optimal Lasso Alpha: 0.0003454545

Mean R² Scores: 0.9097;   Fold R² scores: 0.9096, 0.9371, 0.8375, 0.9322, 0.9321

Top 20 largest coefficients:
Condition2_PosN         -1.5625979722
Neighborhood_GrnHill     1.0893454649
OverallQual_9            1.0167638329
OverallQual_8            0.6677477362
RoofMatl_WdShngl         0.5061729194
PoolQC_4                 0.5021957464
Neighborhood_StoneBr     0.4461727573
Neighborhood_NoRidge     0.3594875794
GrLivArea                0.3400547347
SaleCondition_Partial    0.2873529479
OverallQual_7            0.2824152690
OverallCond_1           -0.2821172896
OverallCond_8            0.2726662735
OverallCond_2           -0.2462571536
Neighborhood_Somerst     0.2455077772
ExterQual_3              0.2335892048
Neighborhood_NridgHt     0.2240534462
BldgType_Twnhs          -0.2205879726
Condition2_PosA          0.2126293264
OverallCond_7            0.2044201356
dtype: float64

Number of features with Non-Zero coefficient: 177

Number of 

In [58]:
#ElasticNet Regression- Optimal Alpha via GridSearchCV
l1_ratio = np.linspace(.01, 1, 100)
alpha_elastic_grid = np.linspace(.00001, .001, 10)

#Setup Grid Search for Ridge
elasticnet_grid = GridSearchCV(estimator=pipelines['ElasticNet'],
                               param_grid={'regressor__alpha': alpha_elastic_grid,
                                           'regressor__l1_ratio': l1_ratio},
                               verbose=1)
elasticnet_grid.fit(X_scaled, Y_scaled)


print('\nElasticNet Pipeline GridSearchCV Results\n')

#Best alpha & score
elasticnet_best_rho = elasticnet_grid.best_params_['regressor__l1_ratio']
elasticnet_best_alpha = elasticnet_grid.best_params_['regressor__alpha']
elasticnet_best_score = elasticnet_grid.best_score_
print(f"Best Rho for ElasticNet: {elasticnet_best_rho:.4f}")
print(f"Best Alpha for ElasticNet: {elasticnet_best_alpha:.6f}")
print(f"Best CV Score {round(elasticnet_best_score, 4)}")

Fitting 5 folds for each of 1000 candidates, totalling 5000 fits

ElasticNet Pipeline GridSearchCV Results

Best Rho for ElasticNet: 0.5100
Best Alpha for ElasticNet: 0.000780
Best CV Score 0.9146


In [48]:
#ElasticNet Regression- Optimal Alpha via ElasticNetCV
elastic_cv = ElasticNetCV(
    l1_ratio= np.linspace(.01, 1, 100),
    alphas=np.linspace(.00001, .001, 100),
    cv=5, max_iter=5000, n_jobs=-1
)
elastic_cv.fit(X_scaled, Y_scaled)

print('ElasticNetCV Results\n')
best_alpha_elastic = elastic_cv.alpha_
best_l1_elastic = elastic_cv.l1_ratio_
print(f"Optimal ElasticNet l1_ratio: {best_l1_elastic:.2f}")
print(f"Optimal ElasticNet Alpha: {best_alpha_elastic:.10f}")


#Build final ElasticNet model using best parameters
final_en = ElasticNet(alpha=best_alpha_elastic, l1_ratio=best_l1_elastic, max_iter=10000)
final_en.fit(X_scaled, Y_scaled)

#Scires
cv_scores_elasticnet = cross_val_score(final_en, X_scaled, Y_scaled, cv=kf, scoring='r2')
print(f"Mean CV R²: {cv_scores_elasticnet.mean():.4f};  ", f"Fold R² scores: {', '.join([f'{s:.4f}' for s in cv_scores_elasticnet])}\n")

#Coefficient Series
coef_elastic = pd.Series(final_en.coef_, index=X_scaled.columns)
zero_features_elastic = coef_elastic[coef_elastic == 0].index.tolist()
nonzero_features_elastic = coef_elastic[coef_elastic != 0]

top20_elastic = nonzero_features_elastic.reindex(nonzero_features_elastic.abs().sort_values(ascending=False).index).head(20)
pd.set_option('display.float_format', lambda x: f'{x:.4f}')
print("Top 20 largest non-zero coefficients (abs, 4 decimals):")
print(top20_elastic)

#Zero Coefficients
print(f"\nNumber of features with Non-Zero coefficient: {len(nonzero_features_elastic)}")
print(f"\nNumber of features with Zero coefficient: {len(zero_features_elastic)}")
print("Zero coefficient features:")
print(zero_features_elastic)

ElasticNetCV Results

Optimal ElasticNet l1_ratio: 0.33
Optimal ElasticNet Alpha: 0.0007600000
Mean CV R²: 0.9116;   Fold R² scores: 0.9156, 0.9357, 0.8400, 0.9312, 0.9356

Top 20 largest non-zero coefficients (abs, 4 decimals):
Condition2_PosN         -1.0358
OverallQual_9            0.8327
Neighborhood_GrnHill     0.6917
OverallQual_8            0.5363
RoofMatl_WdShngl         0.4595
Neighborhood_StoneBr     0.4057
PoolQC_4                 0.3821
Neighborhood_NoRidge     0.3171
GrLivArea                0.3084
Condition2_PosA          0.3024
SaleCondition_Partial    0.2724
OverallCond_8            0.2658
OverallCond_1           -0.2650
OverallCond_2           -0.2488
ExterQual_3              0.2440
BldgType_Twnhs          -0.2302
Neighborhood_Somerst     0.2063
Exterior1st_BrkFace      0.2051
Neighborhood_NridgHt     0.2002
OverallCond_7            0.1995
dtype: float64

Number of features with Non-Zero coefficient: 197

Number of features with Zero coefficient: 99
Zero coefficient fe

In [62]:
#ElasticNet.5 Regression where Rho = .5- Optimal Alpha via GridSearchCV
l1_ratio_5 = [.5]
alpha_elastic5_grid = np.linspace(.0007, .001, 100)
 
#Setup Grid Search for Ridge
elasticnet5_grid = GridSearchCV(estimator=pipelines['ElasticNet'],
                               param_grid={'regressor__alpha': alpha_elastic5_grid,
                                           'regressor__l1_ratio': l1_ratio_5},
                               verbose=1)
elasticnet5_grid.fit(X_scaled, Y_scaled)

print('\nElasticNet Pipeline GridSearchCV Results (Rho = .5)\n')

#Best alpha & score
elasticnet5_best_rho = elasticnet5_grid.best_params_['regressor__l1_ratio']
elasticnet5_best_alpha = elasticnet5_grid.best_params_['regressor__alpha']
elasticnet5_best_score = elasticnet5_grid.best_score_
print(f"Best Rho for ElasticNet: {elasticnet5_best_rho:.4f}")
print(f"Best Alpha for ElasticNet: {elasticnet5_best_alpha:.6f}")
print(f"Best CV Score {round(elasticnet5_best_score, 4)}")

Fitting 5 folds for each of 100 candidates, totalling 500 fits

ElasticNet Pipeline GridSearchCV Results (Rho = .5)

Best Rho for ElasticNet: 0.5000
Best Alpha for ElasticNet: 0.000797
Best CV Score 0.9146


In [60]:
#ElasticNetCV where Rho = .5
elastic5_cv = ElasticNetCV(
    l1_ratio=.5,
    alphas=np.linspace(.0001, .001, 100),
    cv=5,
    max_iter=5000,
    n_jobs=-1
)
elastic5_cv.fit(X_scaled, Y_scaled)

print('ElasticNetCV Results (Rho = .5)\n')
best_alpha_elastic5 = elastic5_cv.alpha_
best_l1_elastic5 = elastic5_cv.l1_ratio_
print(f"Optimal ElasticNet.5 l1_ratio: {best_l1_elastic5:.2f}")
print(f"Optimal ElasticNet.5 Alpha: {best_alpha_elastic5:.10f}")

#Build final ElasticNet model using best parameters
final_en = ElasticNet(alpha=best_alpha_elastic5, l1_ratio=best_l1_elastic5, max_iter=10000)
final_en.fit(X_scaled, Y_scaled)  #Fit final model

#Scores
cv_scores_elasticnet5 = cross_val_score(final_en, X_scaled, Y_scaled, cv=kf, scoring='r2')
print(f"Mean CV R²: {cv_scores_elasticnet5.mean():.4f};  ", f"Fold R² scores: {', '.join([f'{s:.4f}' for s in cv_scores_elasticnet5])}\n")

#Coefficients as a Series
coef_elastic5 = pd.Series(final_en.coef_, index=X_scaled.columns)

#Summary of coefficients
zero_features_elastic5 = coef_elastic5[coef_elastic5 == 0].index.tolist()
nonzero_features_elastic5 = coef_elastic5[coef_elastic5 != 0]

top20_elastic5 = nonzero_features_elastic5.reindex(nonzero_features_elastic5.abs().sort_values(ascending=False).index).head(20)
pd.set_option('display.float_format', lambda x: f'{x:.4f}')
print("Top 20 largest non-zero coefficients (abs, 4 decimals):")
print(top20_elastic5)

#Zero coefficients
print(f"\nNumber of features with Non-Zero coefficient: {len(nonzero_features_elastic5)}")
print(f"\nNumber of features with Zero coefficient: {len(zero_features_elastic5)}")
print("Zero coefficient features:")
print(zero_features_elastic5)

ElasticNetCV Results (Rho = .5)

Optimal ElasticNet.5 l1_ratio: 0.50
Optimal ElasticNet.5 Alpha: 0.0005727273
Mean CV R²: 0.9111;   Fold R² scores: 0.9141, 0.9362, 0.8389, 0.9317, 0.9347

Top 20 largest non-zero coefficients (abs, 4 decimals):
Condition2_PosN         -1.2084
OverallQual_9            0.9234
Neighborhood_GrnHill     0.8208
OverallQual_8            0.6103
RoofMatl_WdShngl         0.4799
PoolQC_4                 0.4281
Neighborhood_StoneBr     0.4229
GrLivArea                0.3401
Neighborhood_NoRidge     0.3352
Condition2_PosA          0.2897
SaleCondition_Partial    0.2784
OverallCond_1           -0.2753
OverallCond_8            0.2679
OverallQual_7            0.2483
OverallCond_2           -0.2476
ExterQual_3              0.2401
BldgType_Twnhs          -0.2278
Neighborhood_Somerst     0.2215
Neighborhood_NridgHt     0.2114
OverallCond_7            0.2011
dtype: float64

Number of features with Non-Zero coefficient: 191

Number of features with Zero coefficient: 105
Zer

In [63]:
#Set optimal hyperparameters
pipelines['Ridge'].set_params(regressor__alpha=ridge_best_alpha)
pipelines['Lasso'].set_params(regressor__alpha=lasso_best_alpha)
pipelines['ElasticNet'].set_params(regressor__alpha=elasticnet5_best_alpha, regressor__l1_ratio=0.5)

#Cross-validation
cv_results = {}
for name, pipeline in pipelines.items():
    scores = cross_val_score(pipeline, X_scaled, Y_scaled, cv=kf, scoring='r2', n_jobs=-1)
    cv_results[name] = {
        'mean_R2': float(round(scores.mean(), 4)),
        'fold_R2': [float(round(s, 4)) for s in scores]
    }
print('Pipeline Cross_Val_Score Results (Optimal Alpha)')
cv_results

Pipeline Cross_Val_Score Results (Optimal Alpha)


{'Ridge': {'mean_R2': 0.9111,
  'fold_R2': [0.9162, 0.934, 0.8422, 0.9279, 0.9352]},
 'Lasso': {'mean_R2': 0.9097,
  'fold_R2': [0.9102, 0.9369, 0.8364, 0.9321, 0.9328]},
 'ElasticNet': {'mean_R2': 0.9079,
  'fold_R2': [0.9115, 0.9347, 0.8312, 0.9284, 0.9338]}}

In [15]:
lasso_results_df

Unnamed: 0,Engineering,Model_Category,Model_Scaling,Model,Alpha,Mean_R²,Fold_1_R²,Fold_2_R²,Fold_3_R²,Fold_4_R²,Fold_5_R²
0,Pre,Penalized_MLR,"Y, X_Numeric",Lasso,0.0004,0.9173,0.8629,0.9276,0.9281,0.9357,0.9322


In [16]:
lasso_results_df.to_csv('Penalized_MLR_Scaling_Y_XNumeric.csv', index=False)