In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

print(plt.style.available)

plt.rcParams['figure.figsize'] = (12,8)
plt.rcParams['axes.titlesize'] = 14
plt.style.use('seaborn-deep')
sns.set_palette(palette='bright')

['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']


In [2]:
df_c = pd.read_excel('House_price_data.xlsx')

In [3]:
# independent variables
X = df_c.drop(['price'], axis=1)
# dependent variable
y = df_c['price']

In [4]:
df_c.dtypes

price                    float64
room_bed                 float64
room_bath                float64
ceil                     float64
sight                      int64
ceil_measure             float64
basement                 float64
living_measure15         float64
lot_measure15            float64
total_area               float64
years_old                float64
coast_1.0                  int64
furnished_1.0              int64
location_Bellevue          int64
location_Covington         int64
location_Federal Way       int64
location_Issaquah          int64
location_Kent              int64
location_Kirkland          int64
location_Maple Valley      int64
location_Others            int64
location_Redmond           int64
location_Renton            int64
location_Sammamish         int64
location_Seattle           int64
is_renovated_1             int64
condition_2.0              int64
condition_3.0              int64
condition_4.0              int64
condition_5.0              int64
quality_2 

## Train test split 

In [5]:
import statsmodels.api as sm
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

### SVR 

In [7]:
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Create an instance of SVR
svr = SVR()

# Fit the SVR model to the training data
svr.fit(X_train, y_train)

# Make predictions on the training set
train_predictions_svr = svr.predict(X_train)

# Make predictions on the test set
test_predictions_svr = svr.predict(X_test)

# Calculate performance metrics for the training set
train_mse_svr = mean_squared_error(y_train, train_predictions_svr)
train_rmse_svr = np.sqrt(train_mse_svr)
train_mae_svr = mean_absolute_error(y_train, train_predictions_svr)
train_mape_svr = np.sqrt(mean_absolute_error(y_train, train_predictions_svr))
train_r2_svr = r2_score(y_train, train_predictions_svr)

# Calculate performance metrics for the test set
test_mse_svr = mean_squared_error(y_test, test_predictions_svr)
test_rmse_svr = np.sqrt(test_mse_svr)
test_mae_svr = mean_absolute_error(y_test, test_predictions_svr)
test_mape_svr = np.sqrt(mean_absolute_error(y_test, test_predictions_svr))
test_r2_svr = r2_score(y_test, test_predictions_svr)

# Print the performance metrics
print("SVR Model Performance:")
print("Train MSE:", round(train_mse_svr, 4))
print("Train RMSE:", round(train_rmse_svr, 4))
print("Train MAE:", round(train_mae_svr, 4))
print("Train MAPE:", round(train_mape_svr, 4))
print("Train R2:", round(train_r2_svr, 4))
print()
print("Test MSE:", round(test_mse_svr, 4))
print("Test RMSE:", round(test_rmse_svr, 4))
print("Test MAE:", round(test_mae_svr, 4))
print("Test MAPE:", round(test_mape_svr, 4))
print("Test R2:", round(test_r2_svr, 4))



SVR Model Performance:
Train MSE: 0.1636
Train RMSE: 0.4044
Train MAE: 0.2382
Train MAPE: 0.488
Train R2: 0.8359

Test MSE: 0.2027
Test RMSE: 0.4502
Test MAE: 0.2686
Test MAPE: 0.5183
Test R2: 0.7987


### Hyper tuned SVR  

In [8]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define the parameter grid for Grid Search
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'epsilon': [0.1, 0.01, 0.001]
}

# Create an instance of SVR
svr = SVR()

# Perform Grid Search to find the best hyperparameters
grid_search = GridSearchCV(estimator=svr, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and model
best_params_svr = grid_search.best_params_
best_model_svr = grid_search.best_estimator_

# Fit the SVR model with the best hyperparameters to the training data
best_model_svr.fit(X_train, y_train)

# Make predictions on the training set
train_predictions_svr = best_model_svr.predict(X_train)

# Make predictions on the test set
test_predictions_svr = best_model_svr.predict(X_test)

# Calculate performance metrics for the training set
train_mse_svr = mean_squared_error(y_train, train_predictions_svr)
train_rmse_svr = np.sqrt(train_mse_svr)
train_mae_svr = mean_absolute_error(y_train, train_predictions_svr)
train_mape_svr = np.sqrt(mean_absolute_error(y_train, train_predictions_svr))
train_r2_svr = r2_score(y_train, train_predictions_svr)

# Calculate performance metrics for the test set
test_mse_svr = mean_squared_error(y_test, test_predictions_svr)
test_rmse_svr = np.sqrt(test_mse_svr)
test_mae_svr = mean_absolute_error(y_test, test_predictions_svr)
test_mape_svr = np.sqrt(mean_absolute_error(y_test, test_predictions_svr))
test_r2_svr = r2_score(y_test, test_predictions_svr)

# Print the best hyperparameters and performance metrics
print("Best Hyperparameters (SVR):", best_params_svr)

# Print the performance metrics
print("SVR Model Performance:")
print("Train MSE:", round(train_mse_svr, 4))
print("Train RMSE:", round(train_rmse_svr, 4))
print("Train MAE:", round(train_mae_svr, 4))
print("Train MAPE:", round(train_mape_svr, 4))
print("Train R2:", round(train_r2_svr, 4))
print()
print("Test MSE:", round(test_mse_svr, 4))
print("Test RMSE:", round(test_rmse_svr, 4))
print("Test MAE:", round(test_mae_svr, 4))
print("Test MAPE:", round(test_mape_svr, 4))
print("Test R2:", round(test_r2_svr, 4))


Best Hyperparameters (SVR): {'C': 1, 'epsilon': 0.1, 'kernel': 'rbf'}
SVR Model Performance:
Train MSE: 0.1636
Train RMSE: 0.4044
Train MAE: 0.2382
Train MAPE: 0.488
Train R2: 0.8359

Test MSE: 0.2027
Test RMSE: 0.4502
Test MAE: 0.2686
Test MAPE: 0.5183
Test R2: 0.7987


## XGB REGRESSOR BASE MODEL

In [9]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Create an instance of XGBRegressor
xgb_model = XGBRegressor()

# Fit the model to the training data
xgb_model.fit(X_train, y_train)

# Make predictions on the train set
train_predictions_xgb = xgb_model.predict(X_train)

# Make predictions on the test set
test_predictions_xgb = xgb_model.predict(X_test)

# Calculate metrics for train set
train_mae_xgb = mean_absolute_error(y_train, train_predictions_xgb)
train_mape_xgb = np.sqrt(mean_absolute_error(y_train, train_predictions_xgb))
train_mse_xgb = mean_squared_error(y_train, train_predictions_xgb)
train_rmse_xgb = np.sqrt(train_mse_xgb)
train_r2_xgb = r2_score(y_train, train_predictions_xgb)

# Calculate metrics for test set
test_mae_xgb = mean_absolute_error(y_test, test_predictions_xgb)
test_mape_xgb = np.sqrt(mean_absolute_error(y_test, test_predictions_xgb))
test_mse_xgb = mean_squared_error(y_test, test_predictions_xgb)
test_rmse_xgb = np.sqrt(test_mse_xgb)
test_r2_xgb = r2_score(y_test, test_predictions_xgb)

# Print the train results
print("Train MAE (XGBRegressor):", round(train_mae_xgb, 4))
print("Train MAPE (XGBRegressor):", round(train_mape_xgb, 4))
print("Train MSE (XGBRegressor):", round(train_mse_xgb, 4))
print("Train RMSE (XGBRegressor):", round(train_rmse_xgb, 4))
print("Train R2 (XGBRegressor):", round(train_r2_xgb, 4))

# Print the test results
print("Test MAE (XGBRegressor):", round(test_mae_xgb, 4))
print("Test MAPE (XGBRegressor):", round(test_mape_xgb, 4))
print("Test MSE (XGBRegressor):", round(test_mse_xgb, 4))
print("Test RMSE (XGBRegressor):", round(test_rmse_xgb, 4))
print("Test R2 (XGBRegressor):", round(test_r2_xgb, 4))


Train MAE (XGBRegressor): 0.1865
Train MAPE (XGBRegressor): 0.4319
Train MSE (XGBRegressor): 0.0712
Train RMSE (XGBRegressor): 0.2668
Train R2 (XGBRegressor): 0.9286
Test MAE (XGBRegressor): 0.2715
Test MAPE (XGBRegressor): 0.5211
Test MSE (XGBRegressor): 0.1844
Test RMSE (XGBRegressor): 0.4294
Test R2 (XGBRegressor): 0.8169


## XGB REGRESSOR TUNED MODEL

In [10]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Create an instance of XGBRegressor
xgb_model = XGBRegressor()

# Define the parameter grid for Grid Search
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.1, 0.01, 0.001],
    'reg_alpha': [0.01, 0.1, 1],
    'reg_lambda': [0.01, 0.1, 1]
}

# Perform Grid Search to find the best hyperparameters
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and model
best_params_xgb = grid_search.best_params_
best_model_xgb = grid_search.best_estimator_

# Train the model with the best hyperparameters
best_model_xgb.fit(X_train, y_train)

# Make predictions on the train set
train_predictions_xgb = best_model_xgb.predict(X_train)

# Make predictions on the test set
test_predictions_xgb = best_model_xgb.predict(X_test)

# Calculate metrics for train set
train_mae_xgb = mean_absolute_error(y_train, train_predictions_xgb)
train_mape_xgb = np.sqrt(mean_absolute_error(y_train, train_predictions_xgb))
train_mse_xgb = mean_squared_error(y_train, train_predictions_xgb)
train_rmse_xgb = np.sqrt(train_mse_xgb)
train_r2_xgb = r2_score(y_train, train_predictions_xgb)

# Calculate metrics for test set
test_mae_xgb = mean_absolute_error(y_test, test_predictions_xgb)
test_mape_xgb = np.sqrt(mean_absolute_error(y_test, test_predictions_xgb))
test_mse_xgb = mean_squared_error(y_test, test_predictions_xgb)
test_rmse_xgb = np.sqrt(test_mse_xgb)
test_r2_xgb = r2_score(y_test, test_predictions_xgb)

# Print the best hyperparameters and metrics
print("Best Hyperparameters (XGBRegressor):", best_params_xgb)

# Print the train results
print("Train MAE (XGBRegressor):", round(train_mae_xgb, 4))
print("Train MAPE (XGBRegressor):", round(train_mape_xgb, 4))
print("Train MSE (XGBRegressor):", round(train_mse_xgb, 4))
print("Train RMSE (XGBRegressor):", round(train_rmse_xgb, 4))
print("Train R2 (XGBRegressor):", round(train_r2_xgb, 4))

# Print the test results
print("Test MAE (XGBRegressor):", round(test_mae_xgb, 4))
print("Test MAPE (XGBRegressor):", round(test_mape_xgb, 4))
print("Test MSE (XGBRegressor):", round(test_mse_xgb, 4))
print("Test RMSE (XGBRegressor):", round(test_rmse_xgb, 4))
print("Test R2 (XGBRegressor):", round(test_r2_xgb, 4))


Best Hyperparameters (XGBRegressor): {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 300, 'reg_alpha': 1, 'reg_lambda': 1}
Train MAE (XGBRegressor): 0.2173
Train MAPE (XGBRegressor): 0.4661
Train MSE (XGBRegressor): 0.0987
Train RMSE (XGBRegressor): 0.3142
Train R2 (XGBRegressor): 0.901
Test MAE (XGBRegressor): 0.2657
Test MAPE (XGBRegressor): 0.5155
Test MSE (XGBRegressor): 0.1752
Test RMSE (XGBRegressor): 0.4186
Test R2 (XGBRegressor): 0.826


## RF REGRESSOR TUNED MODEL

### RF TUNED MODEL ACTUAL VS PREDICTED SCATTER PLOT

## BOOSTING REGRESSOR BASE MODEL

## Bagging regressor hyper tuned model

In [11]:
from sklearn.ensemble import BaggingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Create the Bagging Regression model
bagging_model = BaggingRegressor(random_state=42)

# Define the parameter grid for Grid Search
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_samples': [0.5, 0.7, 0.9],
    'max_features': [0.5, 0.7, 0.9]
}

# Perform Grid Search to find the best hyperparameters
grid_search = GridSearchCV(estimator=bagging_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and model
best_params_bagging = grid_search.best_params_
best_model_bagging = grid_search.best_estimator_

# Train the model with the best hyperparameters
best_model_bagging.fit(X_train, y_train)

# Make predictions on the train set
train_predictions_bagging = best_model_bagging.predict(X_train)

# Make predictions on the test set
test_predictions_bagging = best_model_bagging.predict(X_test)

# Calculate metrics for train set
train_mae_bagging = mean_absolute_error(y_train, train_predictions_bagging)
train_mape_bagging = np.sqrt(mean_absolute_error(y_train, train_predictions_bagging))
train_mse_bagging = mean_squared_error(y_train, train_predictions_bagging)
train_rmse_bagging = np.sqrt(train_mse_bagging)
train_r2_bagging = r2_score(y_train, train_predictions_bagging)

# Calculate metrics for test set
test_mae_bagging = mean_absolute_error(y_test, test_predictions_bagging)
test_mape_bagging = np.sqrt(mean_absolute_error(y_test, test_predictions_bagging))
test_mse_bagging = mean_squared_error(y_test, test_predictions_bagging)
test_rmse_bagging = np.sqrt(test_mse_bagging)
test_r2_bagging = r2_score(y_test, test_predictions_bagging)

# Print the best hyperparameters and metrics
print("Best Hyperparameters (Bagging Regression):", best_params_bagging)

# Print the train results
print("Train MAE (Bagging Regression):", train_mae_bagging)
print("Train MAPE (Bagging Regression):", train_mape_bagging)
print("Train MSE (Bagging Regression):", train_mse_bagging)
print("Train RMSE (Bagging Regression):", train_rmse_bagging)
print("Train R2 (Bagging Regression):", train_r2_bagging)

# Print the test results
print("Test MAE (Bagging Regression):", test_mae_bagging)
print("Test MAPE (Bagging Regression):", test_mape_bagging)
print("Test MSE (Bagging Regression):", test_mse_bagging)
print("Test RMSE (Bagging Regression):", test_rmse_bagging)
print("Test R2 (Bagging Regression):", test_r2_bagging)


Best Hyperparameters (Bagging Regression): {'max_features': 0.9, 'max_samples': 0.7, 'n_estimators': 300}
Train MAE (Bagging Regression): 0.13898238015986641
Train MAPE (Bagging Regression): 0.3728034068512068
Train MSE (Bagging Regression): 0.05153041111456656
Train RMSE (Bagging Regression): 0.2270031081605857
Train R2 (Bagging Regression): 0.9483121375801444
Test MAE (Bagging Regression): 0.2723203394328679
Test MAPE (Bagging Regression): 0.521843213458667
Test MSE (Bagging Regression): 0.1951580649321646
Test RMSE (Bagging Regression): 0.4417669803552146
Test R2 (Bagging Regression): 0.8061584759849227


### Linear Regression 

In [12]:
from sklearn.linear_model import LinearRegression

In [13]:
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)
linear_regression_predictions_train = regression_model.predict(X_train)
linear_regression_predictions_test = regression_model.predict(X_test)
linear_regression_mae_train = mean_absolute_error(y_train, linear_regression_predictions_train)
linear_regression_mae_test = mean_absolute_error(y_test, linear_regression_predictions_test)
linear_regression_mse_train = mean_squared_error(y_train, linear_regression_predictions_train)
linear_regression_mse_test = mean_squared_error(y_test, linear_regression_predictions_test)
linear_regression_r2_train = r2_score(y_train, linear_regression_predictions_train)
linear_regression_r2_test = r2_score(y_test, linear_regression_predictions_test)
linear_regression_rmse_train = np.sqrt(mean_squared_error(y_train, linear_regression_predictions_train))
linear_regression_rmse_test = np.sqrt(mean_squared_error(y_test, linear_regression_predictions_test))
linear_regression_mape_train = np.mean(np.abs((y_train - linear_regression_predictions_train) / y_train)) * 100
linear_regression_mape_test = np.mean(np.abs((y_test - linear_regression_predictions_test) / y_test)) * 100
print("Linear Regression:")
print("Train MAE:", linear_regression_mae_train)
print("Test MAE:", linear_regression_mae_test)
print("Train MSE:", linear_regression_mse_train)
print("Test MSE:", linear_regression_mse_test)
print("Train RMSE:", linear_regression_rmse_train)
print("Test RMSE:", linear_regression_rmse_test)
print("Train R2:", linear_regression_r2_train)
print("Test R2:", linear_regression_r2_test)
print("Train MAPE:", linear_regression_mape_train)
print("Test MAPE:", linear_regression_mape_test)
print()

Linear Regression:
Train MAE: 0.3445648423093868
Test MAE: 0.33939630025042095
Train MSE: 0.2684411171563523
Test MSE: 0.26169021881243343
Train RMSE: 0.5181130351152655
Test RMSE: 0.5115566623673603
Train R2: 0.7307386603114516
Test R2: 0.7400751495867043
Train MAPE: 268.94823162407016
Test MAPE: 252.68368529351793



In [14]:
from sklearn.ensemble import StackingRegressor


In [15]:
base_models = [('Random Forest', best_model_rf), ('XGBoost', best_model_xgb), ('Bagging Regressor', best_model_bagging)]


NameError: name 'best_model_rf' is not defined

In [None]:
meta_model = best_model_xgb  # You can choose any of the base models as the meta-model

In [None]:
stacked_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)
stacked_model.fit(X_train, y_train)
train_predictions_stacked = stacked_model.predict(X_train)
test_predictions_stacked = stacked_model.predict(X_test)
train_mse_stacked = mean_squared_error(y_train, train_predictions_stacked)
train_rmse_stacked = np.sqrt(train_mse_stacked)
train_mae_stacked = mean_absolute_error(y_train, train_predictions_stacked)
train_mape_stacked = np.sqrt(mean_absolute_error(y_train, train_predictions_stacked))
train_r2_stacked = r2_score(y_train, train_predictions_stacked)


In [None]:
test_mse_stacked = mean_squared_error(y_test, test_predictions_stacked)
test_rmse_stacked = np.sqrt(test_mse_stacked)
test_mae_stacked = mean_absolute_error(y_test, test_predictions_stacked)
test_mpe_stacked = np.sqrt(mean_absolute_error(y_test, test_predictions_stacked))
test_r2_stacked = r2_score(y_test, test_predictions_stacked)
print("Stacked Model Performance:")
print("Train MSE:", train_mse_stacked)
print("Train RMSE:", train_rmse_stacked)
print("Train MAE:", train_mae_stacked)
print("Train MAPE:", train_mape_stacked)
print("Train R2:", train_r2_stacked)
print()
print("Test MSE:", test_mse_stacked)
print("Test RMSE:", test_rmse_stacked)
print("Test MAE:", test_mae_stacked)
print("Test MAPE:", test_mape_stacked)
print("Test R2:", test_r2_stacked)


In [16]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define the base models
base_models = [
    ('xgb', best_model_xgb),
    ('bagging', best_model_bagging),
    ('Linear Reg',regression_model),
    ('SVR',best_model_svr)
]

# Define the meta-model
meta_model = LinearRegression()

# Create the stacked model with regularization
stacked_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)

# Fit the stacked model to the training data
stacked_model.fit(X_train, y_train)

# Make predictions on the training set
train_predictions_stacked = stacked_model.predict(X_train)

# Make predictions on the test set
test_predictions_stacked = stacked_model.predict(X_test)

# Calculate performance metrics for the training set
train_mse_stacked = mean_squared_error(y_train, train_predictions_stacked)
train_rmse_stacked = np.sqrt(train_mse_stacked)
train_mae_stacked = mean_absolute_error(y_train, train_predictions_stacked)
train_mape_stacked = np.sqrt(mean_absolute_error(y_train, train_predictions_stacked))
train_r2_stacked = r2_score(y_train, train_predictions_stacked)


# Calculate performance metrics for the test set
test_mse_stacked = mean_squared_error(y_test, test_predictions_stacked)
test_rmse_stacked = np.sqrt(test_mse_stacked)
test_mae_stacked = mean_absolute_error(y_test, test_predictions_stacked)
test_mape_stacked = np.sqrt(mean_absolute_error(y_test, test_predictions_stacked))
test_r2_stacked = r2_score(y_test, test_predictions_stacked)

print("Stacked Model Performance:")
print("Train MSE:", train_mse_stacked)
print("Train RMSE:", train_rmse_stacked)
print("Train MAE:", train_mae_stacked)
print("Train MAPE:", train_mape_stacked)
print("Train R2:", train_r2_stacked)
print()
print("Test MSE:", test_mse_stacked)
print("Test RMSE:", test_rmse_stacked)
print("Test MAE:", test_mae_stacked)
print("Test MAPE:", test_mape_stacked)
print("Test R2:", test_r2_stacked)


Stacked Model Performance:
Train MSE: 0.0970057851058944
Train RMSE: 0.3114575173372677
Train MAE: 0.20903309991227517
Train MAPE: 0.4572013778547427
Train R2: 0.9026978134652184

Test MSE: 0.17063933777287021
Test RMSE: 0.41308514591167544
Test MAE: 0.26116582917543274
Test MAPE: 0.5110438622813435
Test R2: 0.8305117992314897


In [17]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define the base models
base_models = [
    ('xgb', best_model_xgb),
    ('bagging', best_model_bagging),
    ('Linear Reg',regression_model),
    ('SVR',best_model_svr)
]

# Define the meta-model
meta_model = SVR()

# Create the stacked model with regularization
stacked_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)

# Fit the stacked model to the training data
stacked_model.fit(X_train, y_train)

# Make predictions on the training set
train_predictions_stacked = stacked_model.predict(X_train)

# Make predictions on the test set
test_predictions_stacked = stacked_model.predict(X_test)

# Calculate performance metrics for the training set
train_mse_stacked = mean_squared_error(y_train, train_predictions_stacked)
train_rmse_stacked = np.sqrt(train_mse_stacked)
train_mae_stacked = mean_absolute_error(y_train, train_predictions_stacked)
train_mape_stacked = np.sqrt(mean_absolute_error(y_train, train_predictions_stacked))
train_r2_stacked = r2_score(y_train, train_predictions_stacked)


# Calculate performance metrics for the test set
test_mse_stacked = mean_squared_error(y_test, test_predictions_stacked)
test_rmse_stacked = np.sqrt(test_mse_stacked)
test_mae_stacked = mean_absolute_error(y_test, test_predictions_stacked)
test_mape_stacked = np.sqrt(mean_absolute_error(y_test, test_predictions_stacked))
test_r2_stacked = r2_score(y_test, test_predictions_stacked)

print("Stacked Model Performance:")
print("Train MSE:", train_mse_stacked)
print("Train RMSE:", train_rmse_stacked)
print("Train MAE:", train_mae_stacked)
print("Train MAPE:", train_mape_stacked)
print("Train R2:", train_r2_stacked)
print()
print("Test MSE:", test_mse_stacked)
print("Test RMSE:", test_rmse_stacked)
print("Test MAE:", test_mae_stacked)
print("Test MAPE:", test_mape_stacked)
print("Test R2:", test_r2_stacked)



Stacked Model Performance:
Train MSE: 0.10225078589283597
Train RMSE: 0.31976676796195685
Train MAE: 0.19228870063532272
Train MAPE: 0.43850735528075546
Train R2: 0.8974367865647201

Test MSE: 0.16978903667941503
Test RMSE: 0.4120546525394599
Test MAE: 0.25532792103400387
Test MAPE: 0.5052998328062299
Test R2: 0.8313563641736781


In [18]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define the base models
base_models = [
    ('xgb', best_model_xgb),
    ('bagging', best_model_bagging),
    ('Linear Reg',regression_model),
    ('SVR',best_model_svr)
]

# Define the meta-model
meta_model = XGBRegressor()

# Create the stacked model with regularization
stacked_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)

# Fit the stacked model to the training data
stacked_model.fit(X_train, y_train)

# Make predictions on the training set
train_predictions_stacked = stacked_model.predict(X_train)

# Make predictions on the test set
test_predictions_stacked = stacked_model.predict(X_test)

# Calculate performance metrics for the training set
train_mse_stacked = mean_squared_error(y_train, train_predictions_stacked)
train_rmse_stacked = np.sqrt(train_mse_stacked)
train_mae_stacked = mean_absolute_error(y_train, train_predictions_stacked)
train_mape_stacked = np.sqrt(mean_absolute_error(y_train, train_predictions_stacked))
train_r2_stacked = r2_score(y_train, train_predictions_stacked)


# Calculate performance metrics for the test set
test_mse_stacked = mean_squared_error(y_test, test_predictions_stacked)
test_rmse_stacked = np.sqrt(test_mse_stacked)
test_mae_stacked = mean_absolute_error(y_test, test_predictions_stacked)
test_mape_stacked = np.sqrt(mean_absolute_error(y_test, test_predictions_stacked))
test_r2_stacked = r2_score(y_test, test_predictions_stacked)

print("Stacked Model Performance:")
print("Train MSE:", train_mse_stacked)
print("Train RMSE:", train_rmse_stacked)
print("Train MAE:", train_mae_stacked)
print("Train MAPE:", train_mape_stacked)
print("Train R2:", train_r2_stacked)
print()
print("Test MSE:", test_mse_stacked)
print("Test RMSE:", test_rmse_stacked)
print("Test MAE:", test_mae_stacked)
print("Test MAPE:", test_mape_stacked)
print("Test R2:", test_r2_stacked)


Stacked Model Performance:
Train MSE: 0.1168760563001865
Train RMSE: 0.34187140316233894
Train MAE: 0.20547542108845576
Train MAPE: 0.45329396762857516
Train R2: 0.8827668286055719

Test MSE: 0.1846000992325944
Test RMSE: 0.42965113665926036
Test MAE: 0.26710962571375507
Test MAPE: 0.5168264947869401
Test R2: 0.8166452173984275


In [19]:


# Define the base models
base_models = [
    ('xgb', best_model_xgb),
    ('bagging', best_model_bagging),
    ('Linear Reg',regression_model),
    ('SVR',best_model_svr)
]

# Define the meta-model
meta_model = BaggingRegressor()

# Create the stacked model with regularization
stacked_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)

# Fit the stacked model to the training data
stacked_model.fit(X_train, y_train)

# Make predictions on the training set
train_predictions_stacked = stacked_model.predict(X_train)

# Make predictions on the test set
test_predictions_stacked = stacked_model.predict(X_test)

# Calculate performance metrics for the training set
train_mse_stacked = mean_squared_error(y_train, train_predictions_stacked)
train_rmse_stacked = np.sqrt(train_mse_stacked)
train_mae_stacked = mean_absolute_error(y_train, train_predictions_stacked)
train_mape_stacked = np.sqrt(mean_absolute_error(y_train, train_predictions_stacked))
train_r2_stacked = r2_score(y_train, train_predictions_stacked)


# Calculate performance metrics for the test set
test_mse_stacked = mean_squared_error(y_test, test_predictions_stacked)
test_rmse_stacked = np.sqrt(test_mse_stacked)
test_mae_stacked = mean_absolute_error(y_test, test_predictions_stacked)
test_mape_stacked = np.sqrt(mean_absolute_error(y_test, test_predictions_stacked))
test_r2_stacked = r2_score(y_test, test_predictions_stacked)

print("Stacked Model Performance:")
print("Train MSE:", train_mse_stacked)
print("Train RMSE:", train_rmse_stacked)
print("Train MAE:", train_mae_stacked)
print("Train MAPE:", train_mape_stacked)
print("Train R2:", train_r2_stacked)
print()
print("Test MSE:", test_mse_stacked)
print("Test RMSE:", test_rmse_stacked)
print("Test MAE:", test_mae_stacked)
print("Test MAPE:", test_mape_stacked)
print("Test R2:", test_r2_stacked)


Stacked Model Performance:
Train MSE: 0.12574495898825724
Train RMSE: 0.3546053566829712
Train MAE: 0.22358221713016008
Train MAPE: 0.4728448129462351
Train R2: 0.8738708269622528

Test MSE: 0.19717625917667253
Test RMSE: 0.44404533459622403
Test MAE: 0.2813481087940218
Test MAPE: 0.5304225756828435
Test R2: 0.8041538965264727
