# Model Stacking

- Ensemble with different types of classifiers
    - Ex. Decision trees, logistic regression, are fitted on the same training data
    - Results are combined based on:
        - Majority voting (classification)
        - Average (regression)
- Ensemble with one type of classifier
    - Boostrap samples are drawn from the training data
    - Model fitted on each boostrap sample
    - All the results are combined to create an ensemble
    - Suitable for highly flexible models that are prone to overfitting / high variance

## Combining Methods

- Majority voting / average
- Method of application of meta-classifiers on outcomes
    - Binary outcome form individual classifiers
    - Meta-classifier is applied on top of these classifier outcomes
- Method of application of meta-classifiers on probabilities
    - Probabilities are obtained from individual classifiers
    - Applying meta-classifier

In [5]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

In [6]:
df = pd.read_csv('employee_attrition.csv')
num_col = list(df.describe().columns)
col_categorical = list(set(df.columns).difference(num_col))
remove_list = ['EmployeeCount', 'EmployeeNumber', 'StandardHours']
col_numerical = [
    e for e in num_col if e not in remove_list
]
attrition_to_num = {
    'Yes' : 0,
    'No' : 1
}
df['Attrition_num'] = df['Attrition'].map(attrition_to_num)
col_categorical.remove('Attrition')
df_cat = pd.get_dummies(df[col_categorical])
X = pd.concat([df[col_numerical], df_cat], axis=1)
y = df['Attrition_num']

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [8]:
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn import preprocessing

def printScore(clf, X_train, X_test, y_train, y_test, train=True):
    lb = preprocessing.LabelBinarizer()
    lb.fit(y_train)
    if train:
        res = clf.predict(X_train)
        print('Train Results:\n')
        print('Accuracy: %.2f\n' % accuracy_score(y_train, res))
        print('Classification Report: \n {} \n'.format(classification_report(y_train, res)))
        print('Confusion Matrix: \n {} \n'.format(confusion_matrix(y_train, res)))
        print('ROC AUC: {0:.4f}\n'.format(roc_auc_score(lb.transform(y_train), lb.transform(res))))
    else:
        res_test = clf.predict(X_test)
        print('Test Results:\n')
        print('Accuracy: %.2f\n' % accuracy_score(y_test, res_test))
        print('Classification Report: \n {} \n'.format(classification_report(y_test, res_test)))
        print('Confusion Matrix: \n {} \n'.format(confusion_matrix(y_test, res_test)))
        print('ROC AUC: {0:.4f}\n'.format(roc_auc_score(lb.transform(y_test), lb.transform(res_test))))

In [9]:
from sklearn.tree import DecisionTreeClassifier
tree_clf = DecisionTreeClassifier()
tree_clf.fit(X_train, y_train);

In [10]:
printScore(tree_clf, X_train, X_test, y_train, y_test)
printScore(tree_clf, X_train, X_test, y_train, y_test, train=False)

Train Results:

Accuracy: 1.00

Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       182
           1       1.00      1.00      1.00       920

    accuracy                           1.00      1102
   macro avg       1.00      1.00      1.00      1102
weighted avg       1.00      1.00      1.00      1102
 

Confusion Matrix: 
 [[182   0]
 [  0 920]] 

ROC AUC: 1.0000

Test Results:

Accuracy: 0.79

Classification Report: 
               precision    recall  f1-score   support

           0       0.27      0.25      0.26        55
           1       0.87      0.88      0.88       313

    accuracy                           0.79       368
   macro avg       0.57      0.57      0.57       368
weighted avg       0.78      0.79      0.78       368
 

Confusion Matrix: 
 [[ 14  41]
 [ 37 276]] 

ROC AUC: 0.5682



In [11]:
from sklearn.ensemble import RandomForestClassifier

In [12]:
rf_clf = RandomForestClassifier(n_estimators=100)
rf_clf.fit(X_train, y_train);

In [13]:
printScore(rf_clf, X_train, X_test, y_train, y_test)
printScore(rf_clf, X_train, X_test, y_train, y_test, train=False)

Train Results:

Accuracy: 1.00

Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       182
           1       1.00      1.00      1.00       920

    accuracy                           1.00      1102
   macro avg       1.00      1.00      1.00      1102
weighted avg       1.00      1.00      1.00      1102
 

Confusion Matrix: 
 [[182   0]
 [  0 920]] 

ROC AUC: 1.0000

Test Results:

Accuracy: 0.87

Classification Report: 
               precision    recall  f1-score   support

           0       0.90      0.16      0.28        55
           1       0.87      1.00      0.93       313

    accuracy                           0.87       368
   macro avg       0.89      0.58      0.60       368
weighted avg       0.88      0.87      0.83       368
 

Confusion Matrix: 
 [[  9  46]
 [  1 312]] 

ROC AUC: 0.5802



In [14]:
# probability comparison between both estimators
en_en = pd.DataFrame()
en_en['tree'] = pd.DataFrame(tree_clf.predict_proba(X_train))[1]
en_en['rf'] = pd.DataFrame(rf_clf.predict_proba(X_train))[1]
en_en = pd.concat([en_en, pd.DataFrame(y_train).reset_index(drop=True)], axis=1)
en_en.head()

Unnamed: 0,tree,rf,Attrition_num
0,1.0,1.0,1
1,1.0,0.94,1
2,0.0,0.21,0
3,0.0,0.09,0
4,1.0,0.99,1


# Meta Classifier

In [15]:
from sklearn.linear_model import LogisticRegression

In [16]:
m_clf = LogisticRegression(fit_intercept=False, solver='lbfgs')
m_clf.fit(en_en[['tree', 'rf']], en_en['Attrition_num'])

LogisticRegression(fit_intercept=False)

In [17]:
en_test = pd.DataFrame()
en_test['tree'] = pd.DataFrame(tree_clf.predict_proba(X_test))[1]
en_test['rf'] = pd.DataFrame(rf_clf.predict_proba(X_test))[1]
en_test['combined'] = m_clf.predict(en_test[['tree', 'rf']])
en_test = pd.concat([en_test, pd.DataFrame(y_test).reset_index(drop=True)], axis=1)
en_test

Unnamed: 0,tree,rf,combined,Attrition_num
0,1.0,0.82,1,1
1,1.0,0.86,1,1
2,1.0,0.97,1,1
3,1.0,0.93,1,0
4,1.0,0.85,1,1
...,...,...,...,...
363,1.0,0.88,1,1
364,1.0,0.97,1,1
365,1.0,0.94,1,1
366,1.0,0.84,1,1


In [18]:
print(pd.crosstab(en_test['Attrition_num'], en_test['combined']))

combined        0    1
Attrition_num         
0              14   41
1              37  276


## Ensemble of Ensembles - 2

In [19]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier

In [20]:
class_weight = {0:0.834, 1:0.166}

In [21]:
pd.Series(list(y_train)).value_counts() / pd.Series(list(y_train)).count()

1    0.834846
0    0.165154
dtype: float64

In [23]:
forest = RandomForestClassifier(
    class_weight=class_weight, n_estimators=100
)

In [24]:
ada = AdaBoostClassifier(
    base_estimator=forest, n_estimators=100,
    learning_rate=0.5, random_state=42
)

In [25]:
ada.fit(X_train, y_train.ravel())

AdaBoostClassifier(base_estimator=RandomForestClassifier(class_weight={0: 0.834,
                                                                       1: 0.166}),
                   learning_rate=0.5, n_estimators=100, random_state=42)

In [26]:
printScore(ada, X_train, X_test, y_train, y_test)
printScore(ada, X_train, X_test, y_train, y_test, train=False)

Train Results:

Accuracy: 1.00

Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       182
           1       1.00      1.00      1.00       920

    accuracy                           1.00      1102
   macro avg       1.00      1.00      1.00      1102
weighted avg       1.00      1.00      1.00      1102
 

Confusion Matrix: 
 [[182   0]
 [  0 920]] 

ROC AUC: 1.0000

Test Results:

Accuracy: 0.86

Classification Report: 
               precision    recall  f1-score   support

           0       1.00      0.09      0.17        55
           1       0.86      1.00      0.93       313

    accuracy                           0.86       368
   macro avg       0.93      0.55      0.55       368
weighted avg       0.88      0.86      0.81       368
 

Confusion Matrix: 
 [[  5  50]
 [  0 313]] 

ROC AUC: 0.5455



In [33]:
bag_clf = BaggingClassifier(
    base_estimator=ada, n_estimators=50, 
    max_samples=1, max_features=1, bootstrap=True, 
    bootstrap_features=False, n_jobs=-1,random_state=42
)

In [30]:
bag_clf.fit(X_train, y_train.ravel())

BaggingClassifier(base_estimator=AdaBoostClassifier(base_estimator=RandomForestClassifier(class_weight={0: 0.834,
                                                                                                        1: 0.166}),
                                                    learning_rate=0.5,
                                                    n_estimators=100,
                                                    random_state=42),
                  max_features=1, max_samples=1, n_estimators=50, n_jobs=-1,
                  random_state=42)

In [31]:
printScore(bag_clf, X_train, X_test, y_train, y_test)
printScore(bag_clf, X_train, X_test, y_train, y_test, train=False)

Train Results:

Accuracy: 0.83

Classification Report: 
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       182
           1       0.83      1.00      0.91       920

    accuracy                           0.83      1102
   macro avg       0.42      0.50      0.45      1102
weighted avg       0.70      0.83      0.76      1102
 

Confusion Matrix: 
 [[  0 182]
 [  0 920]] 

ROC AUC: 0.5000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Results:

Accuracy: 0.85

Classification Report: 
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        55
           1       0.85      1.00      0.92       313

    accuracy                           0.85       368
   macro avg       0.43      0.50      0.46       368
weighted avg       0.72      0.85      0.78       368
 

Confusion Matrix: 
 [[  0  55]
 [  0 313]] 

ROC AUC: 0.5000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
