# Imports and Setup

## Imports

In [None]:
# Evaluation metrics
from sklearn import metrics

# Train-test split and hyperparameter tuning
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

# Import classification models
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier, BaggingClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier

## Test-train split

In [None]:
# Input: X Labels, y Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Estimate Ensemble Models

## Bagging Estimator

In [None]:
bagging_estimator = BaggingClassifier(random_state = 1)
bagging_estimator.fit(X_train, y_train)

## Gradient Boosting

In [None]:
gb_estimator = GradientBoostingClassifier(random_state = 1)
gb_estimator.fit(X_train, y_train)

## AdaBoost

In [None]:
ada_estimator = AdaBoostClassifier(random_state = 1)
ada_estimator.fit(X_train, y_train)

## XGBoost

In [None]:
xgb_estimator = XGBClassifier(random_state = 1, eval_metric = 'logloss')
xgb_estimator.fit(X_train, y_train)

## Random Forest

In [None]:
rf_estimator = RandomForestClassifier(class_weight="balanced", random_state = 1, oob_score = True, bootstrap = True)
rf_estimator.fit(X_train, y_train)

# Hyperparameter Tuning Example

In [None]:
# 1) Get new RF Classifier
rf_tuned = RandomForestClassifier(class_weight="balanced", random_state=1)

# 2) Choose parameters to try
parameters = {"n_estimators": [100,200,300],
    "min_samples_leaf": np.arange(5, 10),
    "max_features": np.arange(0.2, 0.8, 0.1),
    "max_samples": np.arange(0.2, 0.8, 0.1),
             }
# 3) Get scorer to compare parameter combinations
acc_scorer = metrics.make_scorer(metrics.f1_score)

# 4) Perform grid search
grid_obj = GridSearchCV(rf_tuned, parameters, scoring=acc_scorer,cv=5)
grid_obj = grid_obj.fit(X_train, y_train)

# 5) Select best estimator and fit data
rf_tuned = grid_obj.best_estimator_
rf_tuned.fit(X_train, y_train)

# Score Models Example

In [None]:
pred_train = rf_estimator.predict(X_train)
pred_test = rf_estimator.predict(X_test)
    
train_acc = rf_estimator.score(X_train,y_train)
test_acc = rf_estimator.score(X_test,y_test)
    
train_recall = metrics.recall_score(y_train,pred_train)
test_recall = metrics.recall_score(y_test,pred_test)
    
train_precision = metrics.precision_score(y_train,pred_train)
test_precision = metrics.precision_score(y_test,pred_test)
    
train_f1 = metrics.f1_score(y_train,pred_train)
test_f1 = metrics.f1_score(y_test,pred_test)