In [63]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, StackingRegressor
from sklearn.neural_network import MLPRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import r2_score
from bayes_opt import BayesianOptimization
import pickle

In [None]:
%run D:/UTD/UTDFall2023/Calibration-of-LoRaNodes-using-Super-Learners/plots/Scatter_Plot.ipynb
%run D:/UTD/UTDFall2023/Calibration-of-LoRaNodes-using-Super-Learners/plots/QQ_Plot.ipynb

In [66]:
def Stacking_Regression(X_train, X_test, y_train, y_test,filtered_data,param_dict ):
    ml_type = "SL"
    def stacking_regressor_cv(n_estimators, learning_rate, max_depth, min_samples_split, min_samples_leaf):
        # Ensure parameters are in the correct format
        n_estimators = int(n_estimators)
        max_depth = int(max_depth)
        min_samples_split = int(min_samples_split)
        min_samples_leaf = int(min_samples_leaf)

        # Define base learners
        rf_regressor = RandomForestRegressor(n_estimators = param_dict[0]['n_estimators'], max_depth = param_dict[0]['max_depth'], 
                                             min_samples_leaf = param_dict[0]['min_samples_leaf'], 
                                             min_samples_split = param_dict[0]['min_samples_split'] ,random_state = 0)
        bagging_regressor = BaggingRegressor(estimator = DecisionTreeRegressor(), n_estimators = param_dict[1]['n_estimators'],
                                             max_samples = param_dict[1]['max_samples'], max_features = param_dict[1]['max_features'],
                                             random_state = 42)
        lgbm_regressor = LGBMRegressor(      n_estimators = param_dict[2]['n_estimators'], max_depth = param_dict[2]['max_depth'],
                                             learning_rate = param_dict[2]['learning_rate'], num_leaves = param_dict[2]['num_leaves'],
                                             min_child_samples = param_dict[2]['min_child_samples'], random_state = 1)
        nn_regressor = MLPRegressor(hidden_layer_sizes=(64, 64), activation='relu', random_state=1, max_iter=500)

        # Stacking Regressor with Gradient Boosting as Meta Learner
        stacked_regressor = StackingRegressor(
            estimators=[
                ('rf', rf_regressor),
                ('bagging', bagging_regressor),
                ('lgbm', lgbm_regressor),
                ('nn', nn_regressor)
            ],
            final_estimator=GradientBoostingRegressor(
                n_estimators=n_estimators,
                learning_rate=learning_rate,
                max_depth=max_depth,
                min_samples_split=min_samples_split,
                min_samples_leaf=min_samples_leaf,
                random_state=1
            )
        )

        # Fit the model
        stacked_regressor.fit(X_train, y_train)

        # Predict and calculate R2 score
        predict_test = stacked_regressor.predict(X_test)
        return r2_score(y_test, predict_test)

    # Bayesian Optimization
    pbounds = {
        'n_estimators': (50, 200),
        'learning_rate': (0.01, 0.2),
        'max_depth': (3, 10),
        'min_samples_split': (2, 10),
        'min_samples_leaf': (1, 5)
    }

    optimizer = BayesianOptimization(
        f=stacking_regressor_cv,
        pbounds=pbounds,
        random_state=1
    )
    optimizer.maximize(init_points=5, n_iter=15)

    # Optimal parameters
    optimal_params = optimizer.max['params']
    print("Optimal Parameters:", optimal_params)

    # Create and train final Stacking Regressor model with optimal parameters
    final_stacked_regressor = StackingRegressor(
        estimators=[
            ('rf', RandomForestRegressor(n_estimators=50, random_state=0)),
            ('bagging', BaggingRegressor(estimator=RandomForestRegressor(), n_estimators=10, random_state=42)),
            ('lgbm', LGBMRegressor(n_estimators=100, random_state=1)),
            ('nn', MLPRegressor(random_state=1))
        ],
        final_estimator=GradientBoostingRegressor(
            n_estimators=int(optimal_params['n_estimators']),
            learning_rate=optimal_params['learning_rate'],
            max_depth=int(optimal_params['max_depth']),
            min_samples_split=int(optimal_params['min_samples_split']),
            min_samples_leaf=int(optimal_params['min_samples_leaf']),
            random_state=1
        )
    )
    final_stacked_regressor.fit(X_train, y_train)

    # Predictions
    predict_train = final_stacked_regressor.predict(X_train)
    predict_test = final_stacked_regressor.predict(X_test)

    # DataFrame Creation
    train_df = pd.DataFrame({'Actual': y_train, 'Predicted': predict_train, 'Category': 'Training'}, index=y_train.index)
    test_df = pd.DataFrame({'Actual': y_test, 'Predicted': predict_test, 'Category': 'Testing'}, index=y_test.index)

    # Concatenating and Sorting
    combined_df = pd.concat([train_df, test_df])
    combined_df = combined_df.sort_index()

    # R2 Score
    r2_score_train = round(metrics.r2_score(y_train, predict_train), 2)
    r2_score_test = round(metrics.r2_score(y_test, predict_test), 2)
    
    print('R2 Score Train:', r2_score_train)
    print('R2 Score Test:', r2_score_test)

    if (v == 'dCn'):
        unit = 'dCn'
    else:
        unit = 'pm_conc'
    
    Scatter_Plot(combined_df, train_df, test_df, r2_score_train, r2_score_test, v, unit, ml_type)
    qq_plot(test_df, v, unit, ml_type)

    # Return the final model and its parameters
    return r2_score_test
