# Import Libraries

In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.base import BaseEstimator, RegressorMixin
import matplotlib.pyplot as plt
import numpy as np

In [None]:
class KerasRegressor(BaseEstimator, RegressorMixin):
    def __init__(self):
        self.model = None

    def fit(self, X, y, epochs=50, batch_size=32, verbose=1):
        input_dim = X.shape[1]
        self.model = Sequential()
        self.model.add(Dense(128, activation='relu', input_dim=input_dim))
        self.model.add(Dense(64, activation='relu'))
        self.model.add(Dense(1))
        self.model.compile(optimizer='adam', loss='mse')
        self.model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=verbose)
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

In [None]:
data = pd.read_csv("2w.csv")

# Feature Engineering

In [None]:
data['interaction'] = data['CF(PCU/min)'] * data['%_of_4W']
data['interaction*2'] = data['CF*2(PCU/min)'] * data['%_of_4W*2']

In [None]:
features = data[['CF(PCU/min)', 'CF*2(PCU/min)', '%_of_4W', '%_of_4W*2', 'interaction', 'interaction*2']].values
target = data['SD'].values

In [None]:
scaler = MinMaxScaler()
features = scaler.fit_transform(features)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [None]:
rf_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10, 20],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
rf_model = RandomForestRegressor(random_state=42)
grid_search_rf = GridSearchCV(rf_model, rf_params, cv=5, scoring='r2')
grid_search_rf.fit(X_train, y_train)

In [None]:
best_rf_model = grid_search_rf.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
r2_rf = r2_score(y_test, y_pred_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
mse_rf = mean_squared_error(y_test, y_pred_rf)

In [None]:
print(f'Best Random Forest Regressor R2 Score: {r2_rf:.4f}')
print(f'Best Random Forest Regressor Hyperparameters: {grid_search_rf.best_params_}')

In [None]:
gb_params = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
gb_model = GradientBoostingRegressor(random_state=42)
grid_search_gb = GridSearchCV(gb_model, gb_params, cv=5, scoring='r2')
grid_search_gb.fit(X_train, y_train)

In [None]:
best_gb_model = grid_search_gb.best_estimator_
y_pred_gb = best_gb_model.predict(X_test)
r2_gb = r2_score(y_test, y_pred_gb)
mae_gb = mean_absolute_error(y_test, y_pred_gb)
mse_gb = mean_squared_error(y_test, y_pred_gb)

In [None]:
print(f'Best Gradient Boosting Regressor R2 Score: {r2_gb:.4f}')
print(f'Best Gradient Boosting Regressor Hyperparameters: {grid_search_gb.best_params_}')

In [None]:
svr_params = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

In [None]:
svr_model = SVR()
grid_search_svr = GridSearchCV(svr_model, svr_params, cv=5, scoring='r2')
grid_search_svr.fit(X_train, y_train)

In [None]:
best_svr_model = grid_search_svr.best_estimator_
y_pred_svr = best_svr_model.predict(X_test)
r2_svr = r2_score(y_test, y_pred_svr)
mae_svr = mean_absolute_error(y_test, y_pred_svr)
mse_svr = mean_squared_error(y_test, y_pred_svr)

In [None]:
print(f'Best Support Vector Regressor R2 Score: {r2_svr:.4f}')
print(f'Best Support Vector Regressor Hyperparameters: {grid_search_svr.best_params_}')

In [None]:
xgb_params = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_child_weight': [1, 3, 5]
}

In [None]:
xgb_model = XGBRegressor()
grid_search_xgb = GridSearchCV(xgb_model, xgb_params, cv=5, scoring='r2')
grid_search_xgb.fit(X_train, y_train)

In [None]:
best_xgb_model = grid_search_xgb.best_estimator_
y_pred_xgb = best_xgb_model.predict(X_test)
r2_xgb = r2_score(y_test, y_pred_xgb)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)

In [None]:
print(f'Best XGBoost Regressor R2 Score: {r2_xgb:.4f}')
print(f'Best XGBoost Regressor Hyperparameters: {grid_search_xgb.best_params_}')

In [None]:
adaboost_params = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2]
}

In [None]:
adaboost_model = AdaBoostRegressor()
grid_search_adaboost = GridSearchCV(adaboost_model, adaboost_params, cv=5, scoring='r2')
grid_search_adaboost.fit(X_train, y_train)

In [None]:
best_adaboost_model = grid_search_adaboost.best_estimator_
y_pred_adaboost = best_adaboost_model.predict(X_test)
r2_adaboost = r2_score(y_test, y_pred_adaboost)
mae_adaboost = mean_absolute_error(y_test, y_pred_adaboost)
mse_adaboost = mean_squared_error(y_test, y_pred_adaboost)

In [None]:
print(f'Best AdaBoost Regressor R2 Score: {r2_adaboost:.4f}')
print(f'Best AdaBoost Regressor Hyperparameters: {grid_search_adaboost.best_params_}')

In [None]:
lgbm_params = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

In [None]:
lgbm_model = LGBMRegressor()
grid_search_lgbm = GridSearchCV(lgbm_model, lgbm_params, cv=5, scoring='r2')
grid_search_lgbm.fit(X_train, y_train)

In [None]:
best_lgbm_model = grid_search_lgbm.best_estimator_
y_pred_lgbm = best_lgbm_model.predict(X_test)
r2_lgbm = r2_score(y_test, y_pred_lgbm)
mae_lgbm = mean_absolute_error(y_test, y_pred_lgbm)
mse_lgbm = mean_squared_error(y_test, y_pred_lgbm)

In [None]:
print(f'Best LightGBM Regressor R2 Score: {r2_lgbm:.4f}')
print(f'Best LightGBM Regressor Hyperparameters: {grid_search_lgbm.best_params_}')

In [None]:
nn_regressor = KerasRegressor()
nn_regressor.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

In [None]:
y_pred_nn = nn_regressor.predict(X_test)
r2_nn = r2_score(y_test, y_pred_nn)
mae_nn = mean_absolute_error(y_test, y_pred_nn)
mse_nn = mean_squared_error(y_test, y_pred_nn)

In [None]:
print(f'Neural Network R2 Score: {r2_nn:.4f}')

In [None]:
knn_params = {
    'n_neighbors': [3, 5, 10],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

In [None]:
knn_model = KNeighborsRegressor()
grid_search_knn = GridSearchCV(knn_model, knn_params, cv=5, scoring='r2')
grid_search_knn.fit(X_train, y_train)

In [None]:
best_knn_model = grid_search_knn.best_estimator_
y_pred_knn = best_knn_model.predict(X_test)
r2_knn = r2_score(y_test, y_pred_knn)
mae_knn = mean_absolute_error(y_test, y_pred_knn)
mse_knn = mean_squared_error(y_test, y_pred_knn)

In [None]:
print(f'Best K-Nearest Neighbors Regressor R2 Score: {r2_knn:.4f}')
print(f'Best K-Nearest Neighbors Regressor Hyperparameters: {grid_search_knn.best_params_}')

In [None]:
dt_params = {
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
dt_model = DecisionTreeRegressor()
grid_search_dt = GridSearchCV(dt_model, dt_params, cv=5, scoring='r2')
grid_search_dt.fit(X_train, y_train)

In [None]:
best_dt_model = grid_search_dt.best_estimator_
y_pred_dt = best_dt_model.predict(X_test)
r2_dt = r2_score(y_test, y_pred_dt)
mae_dt = mean_absolute_error(y_test, y_pred_dt)
mse_dt = mean_squared_error(y_test, y_pred_dt)

In [None]:
print(f'Best Decision Tree Regressor R2 Score: {r2_dt:.4f}')
print(f'Best Decision Tree Regressor Hyperparameters: {grid_search_dt.best_params_}')

In [None]:
mlp_params = {
    'hidden_layer_sizes': [(64, 32), (128, 64, 32)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01]
}

In [None]:
mlp_model = MLPRegressor(max_iter=500)
grid_search_mlp = GridSearchCV(mlp_model, mlp_params, cv=5, scoring='r2')
grid_search_mlp.fit(X_train, y_train)

In [None]:
best_mlp_model = grid_search_mlp.best_estimator_
y_pred_mlp = best_mlp_model.predict(X_test)
r2_mlp = r2_score(y_test, y_pred_mlp)
mae_mlp = mean_absolute_error(y_test, y_pred_mlp)
mse_mlp = mean_squared_error(y_test, y_pred_mlp)

In [None]:
print(f'Best MLP Regressor R2 Score: {r2_mlp:.4f}')
print(f'Best MLP Regressor Hyperparameters: {grid_search_mlp.best_params_}')

In [None]:
estimators = [
    ('random_forest', best_rf_model),
    ('gradient_boosting', best_gb_model),
    ('support_vector', best_svr_model),
    ('xgboost', best_xgb_model),
    ('adaboost', best_adaboost_model),
    ('lgbm', best_lgbm_model),
    ('neural_network', nn_regressor),
    ('knn', best_knn_model),
    ('decision_tree', best_dt_model),
    ('mlp', best_mlp_model)
]

In [None]:
stacked_model = StackingRegressor(estimators=estimators, final_estimator=RandomForestRegressor())

In [None]:
cross_val_r2 = cross_val_score(stacked_model, X_train, y_train, cv=5, scoring='r2')

In [None]:
highest_r2 = np.max(cross_val_r2)

In [None]:
print(f'Cross-validated R2 Score: {highest_r2:.4f}')

In [None]:
best_regressor = None
best_r2 = -float('inf')


In [None]:
for name, model in estimators:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    
    if r2 > best_r2:
        best_r2 = r2
        best_regressor = model
        best_y_pred = y_pred

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual')
plt.plot(best_y_pred, label='Prediction')
plt.xlabel('Data Points')
plt.ylabel('Service Delay (Sec/Veh)')
plt.legend()
plt.title('Actual vs. Predicted Values')
plt.show()

In [None]:
models = ['Random Forest', 'Gradient Boosting', 'Support Vector', 'XGBoost', 'AdaBoost', 'LightGBM', 'Neural Network', 'K-Nearest Neighbors', 'Decision Tree', 'MLP']
mae_values = [mae_rf, mae_gb, mae_svr, mae_xgb, mae_adaboost, mae_lgbm, mae_nn, mae_knn, mae_dt, mae_mlp]
mse_values = [mse_rf, mse_gb, mse_svr, mse_xgb, mse_adaboost, mse_lgbm, mse_nn, mse_knn, mse_dt, mse_mlp]
r2_values = [r2_rf, r2_gb, r2_svr, r2_xgb, r2_adaboost, r2_lgbm, r2_nn, r2_knn, r2_dt, r2_mlp]

In [None]:
for model, mae, mse, r2 in zip(models, mae_values, mse_values, r2_values):
    print(f'Model: {model}')
    print(f'Mean Absolute Error (MAE): {mae:.4f}')
    print(f'Mean Squared Error (MSE): {mse:.4f}')
    print(f'R2 Score: {r2:.4f}')
    print('-' * 40)

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(models, mae_values, alpha=0.8, label='MAE')
plt.title('MAE for Different Models')
plt.ylabel('Mean Absolute Error')
plt.xlabel('Various Models')
plt.xticks(rotation=45, ha='right')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(models, mse_values, alpha=0.8, label='MSE', color='orange')
plt.title('MSE for Different Models')
plt.ylabel('MSE')
plt.xlabel('Various ML Models')
plt.xticks(rotation=45, ha='right')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.bar(models, r2_values, alpha=0.8, label='R2 Score', color='indigo')
plt.title('R2 Score for Different Models')
plt.xticks(rotation=45, ha='right')
plt.legend()
plt.show()
