In [None]:
import matplotlib as plt
import pandas as pd

def calculate_roi(model, X_test, y_test, df_original, avg_deposit=5000, profit_margin=0.05, contact_cost=10):
    """Calculate ROI and other business metrics for marketing campaign optimization"""
    
    # Make predictions
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Create dataframe with predictions for analysis
    results_df = pd.DataFrame({
        'actual': y_test,
        'probability': y_pred_proba
    })
    
    # Calculate ROI at different thresholds
    thresholds = np.arange(0.1, 0.9, 0.1)
    roi_metrics = []
    
    for threshold in thresholds:
        # Apply threshold
        results_df['predicted'] = (results_df['probability'] >= threshold).astype(int)
        
        # Calculate metrics
        true_positives = sum((results_df['predicted'] == 1) & (results_df['actual'] == 1))
        false_positives = sum((results_df['predicted'] == 1) & (results_df['actual'] == 0))
        targeted_contacts = sum(results_df['predicted'])
        
        # Baseline approach (contact everyone)
        total_contacts = len(y_test)
        baseline_cost = total_contacts * contact_cost
        baseline_conversions = sum(y_test)
        baseline_revenue = baseline_conversions * avg_deposit * profit_margin
        baseline_profit = baseline_revenue - baseline_cost
        baseline_roi = baseline_profit / baseline_cost if baseline_cost > 0 else 0
        
        # Targeted approach
        targeted_cost = targeted_contacts * contact_cost
        targeted_revenue = true_positives * avg_deposit * profit_margin
        targeted_profit = targeted_revenue - targeted_cost
        targeted_roi = targeted_profit / targeted_cost if targeted_cost > 0 else 0
        
        # Calculate savings
        cost_reduction = baseline_cost - targeted_cost
        roi_improvement = targeted_roi - baseline_roi
        
        # Store metrics
        roi_metrics.append({
            'threshold': threshold,
            'targeted_contacts': targeted_contacts,
            'total_contacts': total_contacts,
            'targeted_percentage': targeted_contacts/total_contacts*100,
            'true_positives': true_positives,
            'conversion_rate': true_positives/targeted_contacts*100 if targeted_contacts > 0 else 0,
            'baseline_roi': baseline_roi*100,
            'targeted_roi': targeted_roi*100,
            'roi_improvement': roi_improvement*100,
            'cost_reduction': cost_reduction,
            'profit_improvement': targeted_profit - baseline_profit
        })
    
    # Convert to DataFrame
    roi_df = pd.DataFrame(roi_metrics)
    
    # Find optimal threshold
    optimal_idx = roi_df['targeted_roi'].idxmax()
    optimal_threshold = roi_df.loc[optimal_idx, 'threshold']
    optimal_roi = roi_df.loc[optimal_idx, 'targeted_roi']
    
    # Plot ROI metrics across thresholds
    plt.figure(figsize=(14, 8))
    plt.subplot(2, 1, 1)
    plt.plot(roi_df['threshold'], roi_df['targeted_roi'], 'b-', marker='o', label='Targeted ROI (%)')
    plt.plot(roi_df['threshold'], roi_df['baseline_roi'], 'r--', label='Baseline ROI (%)')
    plt.axvline(x=optimal_threshold, color='green', linestyle='--', 
                label=f'Optimal threshold: {optimal_threshold:.1f}')
    plt.title('ROI Optimization by Threshold')
    plt.xlabel('Probability Threshold')
    plt.ylabel('ROI (%)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(2, 1, 2)
    plt.plot(roi_df['threshold'], roi_df['targeted_percentage'], 'g-', marker='o', 
             label='% Customers Contacted')
    plt.plot(roi_df['threshold'], roi_df['conversion_rate'], 'purple', marker='s', 
             label='Conversion Rate (%)')
    plt.axvline(x=optimal_threshold, color='green', linestyle='--')
    plt.xlabel('Probability Threshold')
    plt.ylabel('Percentage (%)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Business impact report at optimal threshold
    optimal_result = roi_df.loc[optimal_idx]
    print(f"\nBusiness Impact Analysis (Optimal Threshold = {optimal_threshold:.2f}):")
    print(f"Contact only {optimal_result['targeted_percentage']:.1f}% of customers")
    print(f"Conversion rate: {optimal_result['conversion_rate']:.2f}%")
    print(f"ROI improvement: {optimal_result['roi_improvement']:.2f}%")
    print(f"Cost reduction: ${optimal_result['cost_reduction']:,.2f}")
    print(f"Profit improvement: ${optimal_result['profit_improvement']:,.2f}")
    
    return roi_df, optimal_threshold
