In [None]:
from sklearn.model_selection import cross_val_score, RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import joblib
import numpy as np
import matplotlib.pyplot as plt

# Cross-validation
cv_scores = cross_val_score(rf_model, X_train, y_train, cv=5)
print("CV Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())

# Hyperparameter tuning
param_grid = {
    "n_estimators": [100, 200, 300],
    "max_depth": [None, 5, 10],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

search = RandomizedSearchCV(
    RandomForestClassifier(random_state=42),
    param_distributions=param_grid,
    n_iter=10,
    cv=5,
    scoring="accuracy",
    random_state=42,
    n_jobs=-1
)
search.fit(X_train, y_train)

print("Best Parameters:", search.best_params_)
print("Best CV Accuracy:", search.best_score_)

# Save the best model
joblib.dump(search.best_estimator_, "best_random_forest_model.pkl")

# Confusion matrix for best model
y_pred_best = search.best_estimator_.predict(X_test)
cm = confusion_matrix(y_test, y_pred_best)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.title("Confusion Matrix: Best Random Forest")
plt.show()

# Classification report
print(classification_report(y_test, y_pred_best))