In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tabulate import tabulate
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

In [35]:
# Load the dataset using Pandas
file_path = r"E:\Deployment of House price\archive\boston.csv"
boston_df = pd.read_csv(file_path)

# Create a new DataFrame 'dataset' using the entire 'boston_df'
dataset = boston_df.copy()

X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [36]:
models = [
    ('Neural Network', Sequential([
        Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(1)
    ])),
    ('Linear Regression', LinearRegression()),
    ('Ridge Regression', Ridge()),
    ('Lasso Regression', Lasso()),
    ('ElasticNet', ElasticNet()),
    ('Decision Tree', DecisionTreeRegressor()),
    ('Random Forest', RandomForestRegressor()),
    ('Support Vector Machine', SVR()),
    ('XGBoost', XGBRegressor()),
    ('LightGBM', LGBMRegressor()),
    ('K-Nearest Neighbors', KNeighborsRegressor())
]

In [37]:
results = []

for name, model in models:
    if name == 'Neural Network':
        model.compile(optimizer=Adam(lr=0.0001), loss='mean_squared_error', metrics=['mae', 'mse'])
        history = model.fit(X_train, y_train, epochs=2000, batch_size=128, validation_data=(X_test, y_test), verbose=0)

        evaluation = model.evaluate(X_test, y_test)
        y_pred = model.predict(X_test)
        r_squared = r2_score(y_test, y_pred)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        evaluation = (mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred))

    results.append([name, evaluation[0], evaluation[1]])

# Display results in a table
headers = ["Model", "MSE", "R-Squared"]
print(tabulate(results, headers=headers))

  super().__init__(name, **kwargs)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000131 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 880
[LightGBM] [Info] Number of data points in the train set: 354, number of used features: 13
[LightGBM] [Info] Start training from score 23.015819
Model                        MSE    R-Squared
----------------------  --------  -----------
Neural Network          12.376       2.27021
Linear Regression       21.5174      0.711226
Ridge Regression        21.5487      0.710807
Lasso Regression        26.5325      0.643922
ElasticNet              27.4901      0.63107
Decision Tree           19.2857      0.741177
Random Forest            9.79755     0.868512
Support Vector Machine  25.9572      0.651643
XGBoost                  9.57647     0.871479
LightGBM                11.7014      0.842962
K-Nearest Neighbors     18.835       0.747225


In [45]:
# Display results for all models
headers = ["Model", "MSE", "R-Squared"]
print(tabulate(results, headers=headers))

# Sort models by closeness to RMSE value of 1
results.sort(key=lambda x: abs(np.sqrt(x[1]) - 1))

# Filter models with RMSE closest to 1
close_models = [(name, mse, r_squared) for name, mse, r_squared in results]

# Print models with RMSE closest to 1
if close_models:
    print("\nModels with RMSE closest to 1:")
    print(tabulate(close_models[:3], headers=headers))
else:
    print("No models found within the specified RMSE threshold.")


Model                        MSE    R-Squared
----------------------  --------  -----------
Neural Network          12.376       2.27021
Linear Regression       21.5174      0.711226
Ridge Regression        21.5487      0.710807
Lasso Regression        26.5325      0.643922
ElasticNet              27.4901      0.63107
Decision Tree           19.2857      0.741177
Random Forest            9.79755     0.868512
Support Vector Machine  25.9572      0.651643
XGBoost                  9.57647     0.871479
LightGBM                11.7014      0.842962
K-Nearest Neighbors     18.835       0.747225

Models with RMSE closest to 1:
Model               MSE    R-Squared
-------------  --------  -----------
XGBoost         9.57647     0.871479
Random Forest   9.79755     0.868512
LightGBM       11.7014      0.842962


In [52]:


# Define additional evaluation metrics
from sklearn.metrics import mean_absolute_error, explained_variance_score



for i in range(combinations):
    params = grid_search.cv_results_['params'][i]
    print(f"Combination {i+1}/{combinations}: {params}")
    stacking_regressor.set_params(**params)
    stacking_regressor.fit(X_train, y_train)
    y_pred_stacking = stacking_regressor.predict(X_test)
    
    mse_stacking = mean_squared_error(y_test, y_pred_stacking)
    mae_stacking = mean_absolute_error(y_test, y_pred_stacking)  # Calculate MAE
    evs_stacking = explained_variance_score(y_test, y_pred_stacking)  # Calculate Explained Variance Score
    r2_stacking = r2_score(y_test, y_pred_stacking)
    
    print("MSE:", mse_stacking)
    print("MAE:", mae_stacking)
    print("Explained Variance Score:", evs_stacking)
    print("R^2 Score:", r2_stacking)
    print("-------------------------------------------")


Combination 1/32: {'random_forest__max_depth': 10, 'random_forest__n_estimators': 100, 'xgboost__learning_rate': 0.1, 'xgboost__max_depth': 3, 'xgboost__n_estimators': 100}
MSE: 9.352039281206498
MAE: 2.1052778379120567
Explained Variance Score: 0.8745105518049241
R^2 Score: 0.8744913332356646
-------------------------------------------
Combination 2/32: {'random_forest__max_depth': 10, 'random_forest__n_estimators': 100, 'xgboost__learning_rate': 0.1, 'xgboost__max_depth': 3, 'xgboost__n_estimators': 150}
MSE: 9.014705550774869
MAE: 2.052745273258276
Explained Variance Score: 0.8791409478181021
R^2 Score: 0.8790185069876179
-------------------------------------------
Combination 3/32: {'random_forest__max_depth': 10, 'random_forest__n_estimators': 100, 'xgboost__learning_rate': 0.1, 'xgboost__max_depth': 5, 'xgboost__n_estimators': 100}
MSE: 8.869886254416471
MAE: 2.011056325887157
Explained Variance Score: 0.8822138959476454
R^2 Score: 0.8809620485255814
-----------------------------

In [51]:
# Get the best combination based on the mean_test_score
best_combination_index = grid_search.best_index_
best_combination_params = grid_search.cv_results_['params'][best_combination_index]
best_combination_mean_test_score = grid_search.cv_results_['mean_test_score'][best_combination_index]

# Set the best parameters to the stacking regressor
stacking_regressor.set_params(**best_combination_params)

# Fit the stacking regressor with the best parameters on the entire training set
stacking_regressor.fit(X_train, y_train)

# Predict on the test set with the best stacking regressor
y_pred_stacking = stacking_regressor.predict(X_test)

# Calculate final metrics
mse_stacking = mean_squared_error(y_test, y_pred_stacking)
r2_stacking = r2_score(y_test, y_pred_stacking)

# Print the best combination and its metrics
print("\nBest Combination:")
print(f"Parameters: {best_combination_params}")
print(f"Mean Test Score (neg MSE): {best_combination_mean_test_score}")
print("Final Metrics for the Best Combination:")
print("MSE:", mse_stacking)
print("R^2 Score:", r2_stacking)



Best Combination:
Parameters: {'random_forest__max_depth': 10, 'random_forest__n_estimators': 100, 'xgboost__learning_rate': 0.1, 'xgboost__max_depth': 5, 'xgboost__n_estimators': 150}
Mean Test Score (neg MSE): -12.92924203292074
Final Metrics for the Best Combination:
MSE: 8.884323161734745
R^2 Score: 0.8807682985919858


In [53]:

# Choose the best combination of hyperparameters
best_params = grid_search.best_params_

# Set the best hyperparameters to the stacking regressor
stacking_regressor.set_params(**best_params)

# Fit the stacking regressor on the entire training set
stacking_regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred_stacking = stacking_regressor.predict(X_test)

# Compare predictions with original values
predictions_comparison = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred_stacking})
print(predictions_comparison)


     Actual  Predicted
173    23.6  23.526325
274    32.4  31.331410
491    13.6  15.434159
72     22.8  22.665266
452    16.1  17.906559
..      ...        ...
441    17.1  13.342366
23     14.5  14.871062
225    50.0  44.063010
433    14.3  15.321793
447    12.6  16.135467

[152 rows x 2 columns]


## Pickling The Model file For Deployment

In [58]:
# Choose the best combination of hyperparameters
best_params = grid_search.best_params_

# Set the best hyperparameters to the stacking regressor
stacking_regressor.set_params(**best_params)

# Fit the stacking regressor on the entire training set
stacking_regressor.fit(X_train, y_train)

# Save the model with the best parameters
pickle.dump(stacking_regressor, open('regmodel.pkl', 'wb'))


In [60]:
# Load the StackingRegressor model from the existing pickled file
loaded_stacking_regressor = pickle.load(open('regmodel.pkl', 'rb'))

# Make predictions on the validation set
y_pred_validation = loaded_stacking_regressor.predict(X_test)

# Save the new model with a different pickle name
new_pickle_name = 'new_regmodel.pkl'
pickle.dump(loaded_stacking_regressor, open(new_pickle_name, 'wb'))

# Print the predictions (y_pred_validation) or use them as needed
print("Predictions on Validation Set:", y_pred_validation)


Predictions on Validation Set: [23.52632477 31.33140996 15.4341594  22.66526626 17.90655899 21.82373786
 18.53372578 14.35962154 20.83836348 20.80488066 21.08068277 19.38093412
  7.83619736 21.3082965  19.17773611 24.85415195 19.01883444  7.9729043
 45.42404869 15.6262435  24.39267056 24.6558468  14.25999301 21.47846893
 15.41315834 15.43313713 21.00596616 12.89305873 20.3643901  21.33457047
 20.90658293 23.36751746 24.7730893  19.80434608 14.74071198 16.19288452
 34.03930417 18.38840179 22.01603409 24.29064813 16.27042263 29.80731375
 45.00363283 19.77778364 22.16579588 13.86434335 15.79736812 24.05451316
 18.88739697 27.9269177  21.68508578 36.11639861 17.79572212 26.63793209
 45.5917598  21.50911023 15.38244694 32.13771907 21.63017308 18.55872862
 23.49273266 35.08188139 30.81443367 18.9172922  25.82616932 16.36356388
 13.73998048 22.57433338 28.60794573 14.74910054 20.98313724 30.34479327
 10.4015078  20.96063748 21.36116974  5.67693786 20.32756939 46.86958212
 10.98165731 11.21603