In [6]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [7]:
# Import data
data_new = pd.read_pickle(r'data\04_fct\fct_demographic_offers_and_transactions.pkl')

# Filter data for male and female customers
male_data = data_new[data_new['gender_M'] == 1]
female_data = data_new[data_new['gender_F'] == 1]

In [8]:
# Extract features and target variable for female customers
features_female = female_data[['mobile', 'social', 'web']]
target_female = female_data['offer_viewed']

# Split the data into training and testing sets
X_train_female, X_test_female, y_train_female, y_test_female = train_test_split(features_female, target_female, test_size=0.3, random_state=42)

# Initialize and train the random forest classifier
rf_clf_female = RandomForestClassifier(random_state=42)
rf_clf_female.fit(X_train_female, y_train_female)

# Make predictions
y_rf_pred_female = rf_clf_female.predict(X_test_female)

# Calculate accuracy
rf_accuracy_female = accuracy_score(y_test_female, y_rf_pred_female)

# Extract feature importances
rf_feature_importances_female = rf_clf_female.feature_importances_

# Create a DataFrame to display feature importances
rf_importance_df_female = pd.DataFrame({'Feature': ['mobile', 'social', 'web'], 'Importance': rf_feature_importances_female})

rf_accuracy_female, rf_importance_df_female


(0.7986263334794681,
   Feature  Importance
 0  mobile    0.233115
 1  social    0.732477
 2     web    0.034408)

In [9]:
# Extract features and target variable for male customers
features_male = male_data[['mobile', 'social', 'web']]
target_male = male_data['offer_viewed']

# Split the data into training and testing sets
X_train_male, X_test_male, y_train_male, y_test_male = train_test_split(features_male, target_male, test_size=0.3, random_state=42)

# Initialize and train the random forest classifier
rf_clf_male = RandomForestClassifier(random_state=42)
rf_clf_male.fit(X_train_male, y_train_male)

# Make predictions
y_rf_pred_male = rf_clf_male.predict(X_test_male)

# Calculate accuracy
rf_accuracy_male = accuracy_score(y_test_male, y_rf_pred_male)

# Extract feature importances
rf_feature_importances_male = rf_clf_male.feature_importances_

# Create a DataFrame to display feature importances
rf_importance_df_male = pd.DataFrame({'Feature': ['mobile', 'social', 'web'], 'Importance': rf_feature_importances_male})

rf_accuracy_male, rf_importance_df_male


(0.7933431641036444,
   Feature  Importance
 0  mobile    0.194094
 1  social    0.764368
 2     web    0.041538)

In [12]:
# Train the Random Forest classifier for female customers
rf_clf_female = RandomForestClassifier(random_state=42)
rf_clf_female.fit(X_train_female, y_train_female)

# Calculate permutation importance for female customers
perm_importance_female = permutation_importance(rf_clf_female, X_test_female, y_test_female, n_repeats=10, random_state=42)

# Train the Random Forest classifier for male customers
rf_clf_male = RandomForestClassifier(random_state=42)
rf_clf_male.fit(X_train_male, y_train_male)

# Calculate permutation importance for male customers
perm_importance_male = permutation_importance(rf_clf_male, X_test_male, y_test_male, n_repeats=10, random_state=42)

# Create DataFrames to display permutation importances
perm_importance_df_female = pd.DataFrame({'Feature': ['mobile', 'social', 'web'],
                                          'Importance': perm_importance_female.importances_mean,
                                          'Importance STD': perm_importance_female.importances_std})

perm_importance_df_male = pd.DataFrame({'Feature': ['mobile', 'social', 'web'],
                                        'Importance': perm_importance_male.importances_mean,
                                        'Importance STD': perm_importance_male.importances_std})


perm_importance_df_female, perm_importance_df_male


(  Feature  Importance  Importance STD
 0  mobile    0.029373        0.002633
 1  social    0.014000        0.002043
 2     web    0.003829        0.001550,
   Feature  Importance  Importance STD
 0  mobile    0.032505        0.001073
 1  social    0.020876        0.001315
 2     web    0.000000        0.000000)

Conclusion
* Mobile Channel: More important for both genders but slightly more important for males.
* Social Channel: More important for males than females.
* Web Channel: Shows some importance for females but no importance for males.

In [15]:


# Predictions and metrics for female customers
y_test_pred_female = rf_clf_female.predict(X_test_female)
y_test_prob_female = rf_clf_female.predict_proba(X_test_female)[:, 1]

accuracy_female = accuracy_score(y_test_female, y_test_pred_female)
conf_matrix_female = confusion_matrix(y_test_female, y_test_pred_female)
class_report_female = classification_report(y_test_female, y_test_pred_female)
roc_auc_female = roc_auc_score(y_test_female, y_test_prob_female)

# Predictions and metrics for male customers
y_test_pred_male = rf_clf_male.predict(X_test_male)
y_test_prob_male = rf_clf_male.predict_proba(X_test_male)[:, 1]

accuracy_male = accuracy_score(y_test_male, y_test_pred_male)
conf_matrix_male = confusion_matrix(y_test_male, y_test_pred_male)
class_report_male = classification_report(y_test_male, y_test_pred_male)
roc_auc_male = roc_auc_score(y_test_male, y_test_prob_male)

# Display the results
results_female = {
    "Accuracy": accuracy_female,
    "Confusion Matrix": conf_matrix_female,
    "Classification Report": class_report_female,
    "ROC-AUC Score": roc_auc_female
}

results_male = {
    "Accuracy": accuracy_male,
    "Confusion Matrix": conf_matrix_male,
    "Classification Report": class_report_male,
    "ROC-AUC Score": roc_auc_male
}

results_female, results_male


({'Accuracy': 0.7986263334794681,
  'Confusion Matrix': array([[ 427, 1115],
         [ 263, 5038]], dtype=int64),
  'Classification Report': '              precision    recall  f1-score   support\n\n           0       0.62      0.28      0.38      1542\n           1       0.82      0.95      0.88      5301\n\n    accuracy                           0.80      6843\n   macro avg       0.72      0.61      0.63      6843\nweighted avg       0.77      0.80      0.77      6843\n',
  'ROC-AUC Score': 0.8112081977533545},
 {'Accuracy': 0.7933431641036444,
  'Confusion Matrix': array([[ 633, 1653],
         [ 309, 6899]], dtype=int64),
  'Classification Report': '              precision    recall  f1-score   support\n\n           0       0.67      0.28      0.39      2286\n           1       0.81      0.96      0.88      7208\n\n    accuracy                           0.79      9494\n   macro avg       0.74      0.62      0.63      9494\nweighted avg       0.77      0.79      0.76      9494\n',


- **ROC-AUC Score**: 0.8404

### Analysis

1. **Accuracy**:
 - The accuracy is similar for both genders, with females at 79.86% and males at 79.33%.

2. **Confusion Matrix**:
 - Both genders show a higher number of false positives (1115 for females and 1653 for males) compared to false negatives (263 for females and 309 for males).
 - The model performs well in identifying true positives for both genders.

3. **Classification Report**:
 - **Precision**: Slightly higher for males (0.81) compared to females (0.82) for the positive class.
 - **Recall**: Higher for both genders for the positive class, with males at 0.96 and females at 0.95.
 - **F1-Score**: Consistently higher for the positive class for both genders.

4. **ROC-AUC Score**:
 - The ROC-AUC score is higher for males (0.8404) compared to females (0.8112), indicating better performance in distinguishing between classes for male customers.

### Conclusion

- The model performs similarly for both genders, with slight differences in precision, recall, and ROC-AUC scores.
- The mobile and social channels are the most important for both genders, with the mobile channel being slightly more important for males.
- The web channel has limited importance overall, with some importance for females but none for males.
