## <span style='color:#ff5f27'> 📝 Imports

In [None]:
from xgboost import XGBClassifier, plot_importance
import matplotlib.pyplot as plt
import os
from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score, 
    confusion_matrix,
)

## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

In [None]:
cheque_fg = fs.get_feature_group(
    name="cheque_fg",
    version=1,
)

## <span style="color:#ff5f27;">🪄 Feature View Creation </span>


In [None]:
# Get or create the 'cheque_fraud_detection' feature view
feature_view = fs.get_or_create_feature_view(
    name="cheque_fraud_detection",
    version=1,
    description='Parsed Cheque Data for Fraud Detection',
    query=cheque_fg.select(["spelling_is_correct", "amount_letter_number_match", "valid"]),
    labels=['valid'],
)

In [None]:
X_train, X_test, y_train, y_test = feature_view.train_test_split(
    test_size=0.2
)
X_train.head(3)

In [None]:
y_train.head(3)

## <span style="color:#ff5f27;">🧬 Model Building </span>


In [None]:
# Create an instance of the XGBoost Classifier
xgb_classifier = XGBClassifier()

# Fit the XGBoost Classifier to the training data
xgb_classifier.fit(X_train, y_train)

## <span style="color:#ff5f27;">⚖️ Model Validation </span>

In [None]:
# Make predictions on the testing set
y_pred = xgb_classifier.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Optionally, display a confusion matrix
cm = confusion_matrix(y_test, y_pred)

res_dict = { 
    "Accuracy": str(round(accuracy, 2)),
    "Precision": str(round(precision, 2)),
    "Recall": str(round(recall, 2)),
    "F1 Score": str(round(f1, 2)),
}

# Print the metrics
print(f"⛳️ Accuracy: {accuracy:.2f}")
print(f"⛳️ Precision: {precision:.2f}")
print(f"⛳️ Recall: {recall:.2f}")
print(f"⛳️ F1 Score: {f1:.2f}")
print(f"⛳️ Confusion Matrix:\n{cm}")

In [None]:
# Create directories
model_dir = "cheque_fraud_detection_model"
images_dir = os.path.join(model_dir, "images")
os.makedirs(images_dir, exist_ok=True)

In [None]:
# Plotting feature importances using the plot_importance function from XGBoost
plot_importance(xgb_classifier)

feature_importance_path = images_dir + "/feature_importance.png"

plt.savefig(feature_importance_path)
plt.show()

## <span style="color:#ff5f27;">🗄 Model Registry </span>

In [None]:
# Retrieve the model registry
mr = project.get_model_registry()

In [None]:
# Save model
xgb_classifier.save_model(os.path.join(model_dir, "model.json"))

In [None]:
cheque_fraud_detection_model = mr.python.create_model(
    name="cheque_fraud_detection_model", 
    metrics=res_dict,
    input_example=X_test.sample().values, 
    description="Cheque Fraud Detection Model",
    feature_view=feature_view,
)

# Saving the model artifacts to the 'cheque_fraud_detection_model' directory in the model registry
cheque_fraud_detection_model.save(model_dir)

---