# Performance Metrics

In [20]:
from sklearn.model_selection import cross_validate
from sklearn.dummy import DummyClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score

In [19]:
X_train = bank_train_set.drop(["duration", "y"], axis=1)
y_train = bank_train_set["y"].astype("int").to_numpy()
X_test = bank_test_set.drop(["duration", "y"], axis=1)
y_test = bank_test_set["y"].astype("int").to_numpy()

In [21]:
scoring = ["f1", "precision", "recall", "roc_auc"]
# Initialize Model
nb_model = GaussianNB()
logit_model = LogisticRegression(class_weight="balanced")
knn_model = KNeighborsClassifier(n_neighbors=5)
# Train model and get CV results 
nb_cv = cross_validate(nb_model, X_train, y_train, scoring=scoring, cv = 5)
logit_cv = cross_validate(logit_model, X_train, y_train, scoring=scoring, cv = 5)
knn_cv = cross_validate(knn_model, X_train, y_train, scoring=scoring, cv = 5)
# Calculate CV result mean
nb_result = pd.DataFrame(nb_cv).mean().rename("Naive Bayes")
logit_result = pd.DataFrame(logit_cv).mean().rename("Logistic Regression")
knn_result = pd.DataFrame(knn_cv).mean().rename("KNN")
# Store and output result
result = pd.concat([nb_result, logit_result, knn_result], axis=1)
result

Unnamed: 0,Naive Bayes,Logistic Regression,KNN
fit_time,0.014816,0.084774,0.612159
score_time,0.011017,0.007934,10.643387
test_f1,0.311337,0.419526,0.335483
test_precision,0.199409,0.312006,0.507897
test_recall,0.771852,0.640625,0.25054
test_roc_auc,0.750778,0.781366,0.706039


In [23]:
from sklearn.dummy import DummyClassifier


In [24]:
X_test = preprocessor.transform(bank_test_set.drop(["duration", "y"], axis=1))
y_test = bank_test_set["y"].astype("int").to_numpy()
# Initialize and fit Model
dummy_model = DummyClassifier(strategy="prior").fit(X_train, y_train)
nb_model = GaussianNB().fit(X_train, y_train)
logit_model = LogisticRegression(class_weight="balanced").fit(X_train, y_train)
knn_model = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)
# Predict and calculate score
dummy_predict = dummy_model.predict(X_test)
dummy_f1 = f1_score(y_test, dummy_predict)
dummy_precision = precision_score(y_test, dummy_predict)
dummy_recall = recall_score(y_test, dummy_predict)
dummy_roc_auc = roc_auc_score(y_test, dummy_predict)
nb_predict = nb_model.predict(X_test)
nb_f1 = f1_score(y_test, nb_predict)
nb_precision = precision_score(y_test, nb_predict)
nb_recall = recall_score(y_test, nb_predict)
nb_roc_auc = roc_auc_score(y_test, nb_predict)
logit_predict = logit_model.predict(X_test)
logit_f1 = f1_score(y_test, logit_predict)
logit_precision = precision_score(y_test, logit_predict)
logit_recall = recall_score(y_test, logit_predict)
logit_roc_auc = roc_auc_score(y_test, logit_predict)
knn_predict = knn_model.predict(X_test)
knn_f1 = f1_score(y_test, knn_predict)
knn_precision = precision_score(y_test, knn_predict)
knn_recall = recall_score(y_test, knn_predict)
knn_roc_auc = roc_auc_score(y_test, knn_predict)
# Store and output result
result = pd.DataFrame(data={"Dummy Classifier": [dummy_f1, dummy_precision, dummy_recall, dummy_roc_auc],
                            "Naive Bayes": [nb_f1, nb_precision, nb_recall, nb_roc_auc],
                            "Logistic Regression": [logit_f1, logit_precision, logit_recall, logit_roc_auc],
                            "KNN": [knn_f1, knn_precision, knn_recall, knn_roc_auc]},
                       index=["F1 Score", "Precision Score", "Recall Score", "ROC AUC Score"])
result

Unnamed: 0,Dummy Classifier,Naive Bayes,Logistic Regression,KNN
F1 Score,0.0,0.345568,0.44007,0.372325
Precision Score,0.0,0.224894,0.326552,0.550633
Recall Score,0.0,0.74569,0.674569,0.28125
ROC AUC Score,0.5,0.709712,0.748981,0.626056
