# Combining Methods: Ensembles

#### Import libraries

In [1]:
import os
import numpy as np 
import pandas as pd 
import math
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, make_scorer, fbeta_score
import statsmodels.api as sm 
import matplotlib.pyplot as plt 
import seaborn as sns 
from dmba import classificationSummary, gainsChart, liftChart 

SEED = 1

# Problem 13.1: Acceptance of Consumer Loan

Universal Bank has begun a program to encourage its existing customers to borrow via a consumer loan program.  The bank has promoted the loan to 5000 customers, of whom 480 accepted the offer. The data are available in file __UniversalBank.csv__. The bank now wants to develop a model to predict which customers have the greatest probability of accepting the loan, to reduce promotion costs and send the offer only to a subset of its customers. 

We will develop several models, then combine them in an ensemble. The models we will use are 
1. logistic regression, 
2. $k$-nearest neighbors with $k=3$, and 
3. classification trees
4. Naive Bayes

**Create a dataframe for the `UniversalBank.csv` data**

- Bin the following variables so they can be used in Naive Bayes:
  - Age (5 bins)
  - Experience (10 bins)
  - Income (5 bins)
  - CC Average (6 bins)
  - Mortgage (10 bins)
- Education and Family can be used as is, without binning
- ID and Zip code can be ignored
- Use one-hot-encoding to convert the categorical data into indicator variables
- Partition the data: 60% training, 40% validation.

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
bank_df = pd.read_csv(os.path.join('..', 'data', 'UniversalBank.csv'))
# Drop ID and zip code columns
bank_df.drop(columns=['ID', 'ZIP Code'], inplace=True)
bank_df.columns = [c.replace(' ','_') for c in bank_df.columns]

bank_df.info()

In [None]:
# Convert continuous variables into bins
bank_df['Age'] = pd.cut(bank_df['Age'], 5, labels=range(1, 6)).astype('category')
bank_df['Experience'] = pd.cut(bank_df['Experience'], 10, labels=range(1, 11)).astype('category')
bank_df['Income'] = pd.cut(bank_df['Income'], 5, labels=range(1, 6)).astype('category')
bank_df['CCAvg'] = pd.cut(bank_df['CCAvg'], 6, labels=range(1, 7)).astype('category')
bank_df['Mortgage'] = pd.cut(bank_df['Mortgage'], 10, labels=range(1, 11)).astype('category')

# Use one-hot-encoding for the categorical variables
bank_df = pd.get_dummies(bank_df, prefix_sep='_')

X = bank_df.drop(columns=['Personal_Loan'])
X = pd.get_dummies(X)

y = bank_df['Personal_Loan']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=SEED)
print('Training set:', X_train.shape, 'Test set:', X_test.shape)

<h3 style="color:teal"> Expected Output: </h3>

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Age                 5000 non-null   int64  
 1   Experience          5000 non-null   int64  
 2   Income              5000 non-null   int64  
 3   Family              5000 non-null   int64  
 4   CCAvg               5000 non-null   float64
 5   Education           5000 non-null   int64  
 6   Mortgage            5000 non-null   int64  
 7   Personal_Loan       5000 non-null   int64  
 8   Securities_Account  5000 non-null   int64  
 9   CD_Account          5000 non-null   int64  
 10  Online              5000 non-null   int64  
 11  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(11)
memory usage: 468.9 KB


Training set: (3000, 42) Test set: (2000, 42)


### 13.1.a

Fit models to the data for 
1. logistic regression, 
2. $k$-nearest neighbors with $k=3$, 
3. classification trees, and 
4. Naive Bayes. 

Use Personal Loan as the outcome variable.  Report the validation confusion matrix for each of the models.

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
# Logistic regression

classificationSummary(y_test, logit_reg.predict(X_test))

# k-nearest neighbors

classificationSummary(y_test, knn.predict(X_test))

In [None]:
# classification tree
# user grid search to find optimized tree
param_grid = {
    'max_depth': [5, 10, 15, 20, 25], 
    'min_impurity_decrease': [0, 0.001, 0.005, 0.01], 
    'min_samples_split': [10, 20, 30, 40, 50], 
}
gridSearch = GridSearchCV(DecisionTreeClassifier(random_state=SEED), param_grid, cv=5, n_jobs=-1)
gridSearch.fit(X_train, y_train)
print('Initial parameters: ', gridSearch.best_params_)

# Run 2nd grid search here


print('Improved parameters: ', gridSearch.best_params_)

dt = gridSearch.best_estimator_

classificationSummary(y_test, dt.predict(X_test))

<h3 style="color:teal"> Expected Output: </h3>

Confusion Matrix (Accuracy 0.9490)

       Prediction
Actual    0    1
     0 1773   34
     1   68  125
Confusion Matrix (Accuracy 0.9350)

       Prediction
Actual    0    1
     0 1796   11
     1  119   74


Initial parameters:  {'max_depth': 10, 'min_impurity_decrease': 0.001, 'min_samples_split': 10}
Improved parameters:  {'max_depth': 7, 'min_impurity_decrease': 0, 'min_samples_split': 15}
Confusion Matrix (Accuracy 0.9670)

       Prediction
Actual    0    1
     0 1789   18
     1   48  145


In [6]:
# Naive-Bayes
nb = MultinomialNB(alpha=0.01)
nb.fit(X_train, y_train)
classificationSummary(y_test, nb.predict(X_test))

Confusion Matrix (Accuracy 0.8860)

       Prediction
Actual    0    1
     0 1657  150
     1   78  115


### 13.1.b/c
Create a data frame with the actual outcome, predicted probabilities/outcome for each of the models. Display the first 10 rows.

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
result = pd.DataFrame({
    'actual': y_test,
    'log_reg_prob': logit_reg.predict_proba(X_test)[:, 1],
    'log_reg_pred': logit_reg.predict(X_test),
    'knn_prob': knn.predict_proba(X_test)[:, 1],
    'knn_pred': knn.predict(X_test),
    'dt_prob': dt.predict_proba(X_test)[:, 1],
    'dt_pred': dt.predict(X_test),
    'nb_prob': nb.predict_proba(X_test)[:, 1],
    'nb_pred': nb.predict(X_test),
})

pred_cols = ['log_reg_pred', 
             'knn_pred', 
             'dt_pred', 
             'nb_pred']

# Calculate voting majority

prob_cols = ['log_reg_prob', 
             'knn_prob', 
             'dt_prob', 
             'nb_prob']

# Calculate average probability and prediction


result.head(10)

In [None]:
print('Majority vote')
classificationSummary(result['actual'], result['majority'])

print('Average probability')
classificationSummary(result['actual'], result['average_pred'])

<h3 style="color:teal"> Expected Output: </h3>

Unnamed: 0,actual,log_reg_prob,log_reg_pred,knn_prob,knn_pred,dt_prob,dt_pred,nb_prob,nb_pred,majority,average,average_pred
2764,0,0.001906725,0,0.0,0,0.0,0,0.004384,0,0,0.001572781,0
4767,0,1.346563e-07,0,0.0,0,0.0,0,1e-06,0,0,2.846322e-07,0
3814,0,3.91299e-07,0,0.0,0,0.0,0,2e-06,0,0,6.143519e-07,0
3499,0,0.01116179,0,0.0,0,0.0,0,0.126946,0,0,0.03452705,0
2735,0,0.00281648,0,0.0,0,0.007463,0,0.005312,0,0,0.003897707,0
3922,0,4.139157e-06,0,0.0,0,0.0,0,2e-06,0,0,1.439887e-06,0
2701,0,0.001161518,0,0.0,0,0.0,0,0.002338,0,0,0.000874956,0
1179,0,0.2154792,0,0.0,0,0.270833,0,0.08102,0,0,0.1418332,0
932,0,0.6325349,1,0.333333,0,0.8,1,0.572092,1,1,0.58449,1
792,0,0.703461,1,0.666667,1,0.871795,1,0.672648,1,1,0.7286426,1


Majority vote
Confusion Matrix (Accuracy 0.9495)

       Prediction
Actual    0    1
     0 1797   10
     1   91  102
Average probability
Confusion Matrix (Accuracy 0.9595)

       Prediction
Actual    0    1
     0 1796   11
     1   70  123


### 13.1.d

Compare the error rates for the four individual methods and the two ensemble methods.

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
models = {'Logistic regression': 'log_reg_pred', 
          'k-Nearest Neighbor': 'knn_pred', 
          'Decision Tree': 'dt_pred', 
          'Naive Bayes': 'nb_pred',
          'Majority Vote': 'majority',
          'Average Probability': 'average_pred'}
error_rates = []

# Loop through dict and append error rates


pd.DataFrame(error_rates)

<h3 style="color:teal"> Expected Output: </h3>

Unnamed: 0,Model,Error Rate
0,Logistic regression,0.949
1,k-Nearest Neighbor,0.935
2,Decision Tree,0.967
3,Naive Bayes,0.886
4,Majority Vote,0.9495
5,Average Probability,0.9595


**Use F2 Score as a Custom Evaluation Metric**

In this dataset, using `accuracy_score` as your evaluation metric may not be appropriate due to the class imbalance — only about 10% of customers have previously accepted the personal loan offer. 

While precision and recall provide helpful insights, we often want a single metric that balances both. The **F1 Score** does this by giving equal weight to precision and recall. However, in situations like this — where *missing positives* (False Negatives) is more costly than False Positives — the **F2 Score** is a better choice because it gives more importance to recall.

### Important Note:
> In `sklearn`, the F2 Score (`fbeta_score` with `beta=2`) is *not* directly available as a built-in scoring option for tools like `cross_val_score` or `GridSearchCV`. To use it, you’ll need to create a custom scoring function.

**Create Two Examples of Custom F2 Scorers:**
- Example 1: Using `fbeta_score` directly with `make_scorer`
- Example 2: Writing your own custom scoring function to calculate the F2 score


Use both of your custom scorers to calculate and display the F2 Score for your Decision Tree model from earlier.

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
# Custom function to calculate F2 Score manually


# Show how to use make_scorer using fbeta_score directly


dt_pred = dt.predict(X_test)

print(f'Decision Tree Accuracy: {accuracy_score(y_test, dt_pred):.2%}')
print(f'Decision Tree F2 Score: {custom_f2_score(y_test, dt_pred):.2%}')

<h3 style="color:teal"> Expected Output: </h3>

Decision Tree Accuracy: 96.70%
Decision Tree F2 Score: 77.54%


**Using VotingClassifier for Hard and Soft Voting**

Previously, we manually combined predictions from multiple models using majority vote and average probabilities. Instead of doing this manually, we can simplify the process by using `VotingClassifier` from `sklearn` — an ensemble learning method that automatically handles hard and soft voting.

1. Recreate your ensemble from earlier using `VotingClassifier` in two different ways:
   - Hard Voting → `voting='hard'` (majority rule based on predicted class labels)
   - Soft Voting → `voting='soft'` (based on the average of predicted probabilities)

2. For each version (hard and soft voting), display the following evaluation metrics:
   - `accuracy_score`
   - `F2 Score` (using your custom scorer from the previous step)

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
# Create VotingClassifier for hard


vc_hard_pred = vc_hard.predict(X_test)

print(f'Voting Classifier Hard Vote Accuracy: {accuracy_score(y_test, vc_hard_pred):.2%}')
print(f'Voting Classifier Hard Vote F2 Score: {custom_f2_score(y_test, vc_hard_pred):.2%}')

In [None]:
# Now create VotingClassifier for soft


vc_soft_pred = vc_soft.predict(X_test)

print(f'Voting Classifier Soft Vote Accuracy: {accuracy_score(y_test, vc_soft_pred):.2%}')
print(f'Voting Classifier Soft Vote F2 Score: {custom_f2_score(y_test, vc_soft_pred):.2%}')

<h3 style="color:teal"> Expected Output: </h3>

Voting Classifier Hard Vote Accuracy: 94.95%
Voting Classifier Hard Vote F2 Score: 57.69%


Voting Classifier Soft Vote Accuracy: 95.95%
Voting Classifier Soft Vote F2 Score: 67.88%


**Train a Meta-Learner for Stacking**

In this step, you will create a *meta-learner* to combine the predictions from the base models.

- Train a simple `LogisticRegression` model using the *training predictions* generated from your base models as the input features.

> *Reminder:* The meta-learner is trained on the outputs of your base models — not the original dataset — and will learn how to best combine those predictions to make the final classification.

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
# Create dataframe for model predictions


X = train_result[pred_cols]
X = sm.add_constant(X)

logit_reg2 = sm.Logit(y_train, X).fit()

logit_reg2.summary()

In [None]:
# Create predictions on test


logit_reg2_pred = (logit_reg2.predict(X_test2) > 0.50).astype(int)

print(f'Stacked Logistic Regression Accuracy: {accuracy_score(y_test, logit_reg2_pred):.2%}')
print(f'Stacked Logistic Regression F2 Score: {custom_f2_score(y_test, logit_reg2_pred):.2%}')

<h3 style="color:teal"> Expected Output: </h3>

Optimization terminated successfully.
         Current function value: 0.074166
         Iterations 9


0,1,2,3
Dep. Variable:,Personal_Loan,No. Observations:,3000.0
Model:,Logit,Df Residuals:,2995.0
Method:,MLE,Df Model:,4.0
Date:,"Sun, 13 Apr 2025",Pseudo R-squ.:,0.7649
Time:,20:59:41,Log-Likelihood:,-222.5
converged:,True,LL-Null:,-946.37
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-4.3962,0.175,-25.145,0.000,-4.739,-4.054
log_reg_pred,2.4442,0.500,4.892,0.000,1.465,3.423
knn_pred,5.1360,0.627,8.192,0.000,3.907,6.365
dt_pred,5.1535,0.400,12.883,0.000,4.369,5.937
nb_pred,-0.2656,0.477,-0.557,0.578,-1.201,0.670


Stacked Logistic Regression Accuracy: 96.65%
Stacked Logistic Regression F2 Score: 79.28%


**Use `StackingClassifier` Instead**

Instead of manually generating and combining predictions from your base models, you can simplify the process by using `sklearn`'s built-in `StackingClassifier`.

- Recreate your stacked model using `StackingClassifier` to automatically handle combining your base models and training the meta-learner.  
- Use the same base models and meta-learner from your previous stacking example.

After fitting your `StackingClassifier`, display the following performance metrics on the test set:
- `accuracy_score`
- F2 Score (using your custom F2 scorer)

> 🎯 *Tip:* This approach saves time and ensures consistent evaluation without needing to manually generate training and test predictions from each base model.

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
# Create StackingClassifier


sc_pred = sc.predict(X_test)

print(f'Stacking Classifier Accuracy: {accuracy_score(y_test, sc_pred):.2%}')
print(f'Stacking Classifier F2 Score: {custom_f2_score(y_test, sc_pred):.2%}')

<h3 style="color:teal"> Expected Output: </h3>

Stacking Classifier Accuracy: 96.95%
Stacking Classifier F2 Score: 77.96%


**Tune an AdaBoost Model Using the Custom F2 Scorer**

In this step, you will tune an `AdaBoostClassifier` using `GridSearchCV` and your custom F2 scoring function from earlier.

---

### Parameter Grid to Search:

Use the following parameters for your grid search:

```python
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0, 10.0],
    'estimator__max_depth': [1, 3, 5]
}
```

> *Note:* The `estimator__max_depth` parameter controls the depth of the base decision tree used within AdaBoost.

1. Perform a grid search using `GridSearchCV` with:
   - The parameter grid above  
   - 5-fold cross-validation  
   - Your custom F2 scorer  

2. Display:
   - The best parameter combination  
   - The best F2 Score from cross-validation  

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
# Create Base weak learner decision tree

# Define AdaBoost Classifier

# Parameter grid to search
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0, 10.0],
    'estimator__max_depth': [1, 3, 5]
}

# Grid Search with 5-fold CV


grid_search.fit(X_train, y_train)

# Best parameters
print(f'Best AdaBoost Parameters:" {grid_search.best_params_}')
# Best score
print(f'Best AdaBoost Grid Search Score: {np.mean(grid_search.best_score_):.2%}')

<h3 style="color:teal"> Expected Output: </h3>

Best AdaBoost Parameters:" {'estimator__max_depth': 5, 'learning_rate': 1.0, 'n_estimators': 100}
Best AdaBoost Grid Search Score: 87.00%


**Demonstrate Cross-Validation using the Custom Scorer**

<h4 style="color:blue"> Write Your Code Below: </h4>

In [None]:
base_estimator2 = DecisionTreeClassifier(max_depth=5, random_state=SEED)

ada2 = AdaBoostClassifier(estimator=base_estimator2, learning_rate=1.0, n_estimators=100, random_state=SEED).fit(X_train, y_train)

# Run cross_val_score using custom scorer


print(f'AdaBoost Cross Validation Scores: {np.mean(ada_scores):.2%}')

ada_pred = ada2.predict(X_test)
print(f'AdaBoost Accuracy: {accuracy_score(y_test, ada_pred):.2%}')
print(f'AdaBoost F2 Score: {custom_f2_score(y_test, ada_pred):.2%}')

<h3 style="color:teal"> Expected Output: </h3>

AdaBost Cross Validation Scores: 87.00%
AdaBoost Accuracy: 96.75%
AdaBoost F2 Score: 81.24%
