# Predicting the Likelihood of Marketing Engagement


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve, auc
import matplotlib.pyplot as plt

class RandomForestWrapper:
    """
    The given code defines a class called RandomForestWrapper, which serves as a wrapper 
    around the scikit-learn RandomForestClassifier model. Overall, this code provides a 
    convenient wrapper class for training, predicting, and evaluating a random forest 
    model using scikit-learn. It encapsulates the functionalities of the RandomForestClassifier 
    model and provides additional evaluation and visualization capabilities.
    """
    def __init__(self, num_estimators=100, max_features="sqrt"):
        self.num_estimators = num_estimators
        self.max_features = max_features
        self.rf_model = RandomForestClassifier(n_estimators=num_estimators, max_features=max_features)
    
    def fit(self, X_train, y_train):
        self.rf_model.fit(X_train, y_train)
    
    def predict(self, X_test):
        return self.rf_model.predict(X_test)
    
    def predict_proba(self, X_test):
        return self.rf_model.predict_proba(X_test)
    
    def evaluate(self, X_train, y_train, X_test, y_test):
        # Predict using the first sub-estimator
        first_estimator_preds = self.rf_model.estimators_[0].predict(X_test)
        print("Predictions from the first sub-estimator:")
        print(first_estimator_preds[:5])
        
        # Feature importances
        feature_importances = self.rf_model.feature_importances_
        feature_importance_df = pd.DataFrame(list(zip(feature_importances, X_train.columns)),
                                             columns=['feature.importance', 'feature'])
        print("Feature Importances:")
        print(feature_importance_df)
        
        # Accuracy, precision, and recall
        train_preds = self.rf_model.predict(X_train)
        test_preds = self.rf_model.predict(X_test)
        
        train_accuracy = accuracy_score(y_train, train_preds)
        test_accuracy = accuracy_score(y_test, test_preds)
        train_precision = precision_score(y_train, train_preds)
        test_precision = precision_score(y_test, test_preds)
        train_recall = recall_score(y_train, train_preds)
        test_recall = recall_score(y_test, test_preds)
        
        print("Train Accuracy:", train_accuracy)
        print("Test Accuracy:", test_accuracy)
        print("Train Precision:", train_precision)
        print("Test Precision:", test_precision)
        print("Train Recall:", train_recall)
        print("Test Recall:", test_recall)
        
        # ROC curve and AUC
        train_probs = self.rf_model.predict_proba(X_train)[:, 1]
        test_probs = self.rf_model.predict_proba(X_test)[:, 1]
        
        train_fpr, train_tpr, _ = roc_curve(y_train, train_probs)
        test_fpr, test_tpr, _ = roc_curve(y_test, test_probs)
        
        train_auc = auc(train_fpr, train_tpr)
        test_auc = auc(test_fpr, test_tpr)
        
        print("Train AUC:", train_auc)
        print("Test AUC:", test_auc)
        
        # Plot ROC curve
        plt.figure(figsize=(10, 7))
        plt.plot(test_fpr, test_tpr, color='darkorange', label='Out-Sample ROC curve (area = %0.4f)' % test_auc)
        plt.plot(train_fpr, train_tpr, color='navy', label='In-Sample ROC curve (area = %0.4f)' % train_auc)
        plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
        plt.grid()
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('RandomForest Model ROC Curve')
        plt.legend(loc="lower right")
        plt.show()
