# Bagging Classifier

In [207]:
import pickle 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import recall_score, hamming_loss, f1_score, precision_score 


### Import the pickled dataframes from the Eda_cleaning notebook:

In [14]:
with open("pump_predictors_data_cleaned.pickle", "rb") as f: # "wb" means "write as bytes"
    dum_df = pickle.load(f)
    
    
with open("pump_target_data_cleaned.pickle", "rb") as f: # "wb" means "write as bytes"
    target = pickle.load(f)

In [15]:
# Train, test, split:
X_train, X_test, y_train, y_test = train_test_split(dum_df, target)



In [208]:
def get_score(model, X_train, y_train, y_train_hat):
    rec = recall_score(y_train, y_train_hat, average = 'weighted')
    hamming = hamming_loss(y_train, y_train_hat)
    f1 = f1_score(y_train, y_train_hat, average = 'weighted')
    prec = precision_score(y_train, y_train_hat, average = 'weighted')
    acc = model.score(X_train, y_train)
    print(f"""
        Recall Score: {rec} 
        Hamming Loss: {hamming} 
        f1 Score: {f1} 
        Precision Score: {prec}
        Accuracy: {acc}""")

In [16]:
bt2 = BaggingClassifier(max_features=0.6, max_samples= 0.4,
 n_estimators= 200)

bt2.fit(X_train, y_train)

y_train_pred_bt2 = bt2.predict(X_train)

get_score(bt2, X_train, y_train, y_train_pred_bt2)


        Recall Score: 0.904736251402918 
        Hamming Loss: 0.09526374859708193 
        f1 Score: 0.9010870919337772 
        Precision Score: 0.9061715069016353
        Accuracy: 0.904736251402918


In [17]:
bt2_cv_score = cross_val_score(bt2, X_train, y_train, cv=7)
mean_bt2_cv_score = np.mean(bt2_cv_score)
print(f"Mean Cross Validation Best Param Score: {mean_bt2_cv_score :.2%}")



Mean Cross Validation Best Param Score: 79.94%
