In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import joblib
from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay

# Get project path
cwd = os.getcwd()
print(cwd)

# Download test dataset
bank_test = pd.read_csv(os.path.join(cwd, "data", "bank-additional.csv"), sep=";")

# Split dataset into X_test and y_test
X_test = bank_test.drop(columns=["y"])
# Map output
y_test = bank_test["y"].map({"yes": 1, "no": 0})

# Best trained model
model_id = "saved_pipelines\Random_Forest_pipeline_1762872489.pkl" 

# Load corresponding pipeline
pipeline = joblib.load(os.path.join(cwd, model_id))

# Predict
y_pred = pipeline.predict(X_test)

# Display metrics, objective is to maximize recall since y distribution is very imbalanced
print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1 score :", f1_score(y_test, y_pred))

# Plot Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()
