In [3]:
import pandas as pd
import numpy as np
from IPython.display import display, Markdown

# 1. Model Performance Summary Table
display(Markdown("## 1. Model Performance Summary"))
print("---------------------------------------------------------------------------------")

# Data is pulled directly from the final metrics
performance_data = {
    'Metric': ['AUC-ROC', 'Geometric Mean (G-Mean)', 'Recall (Fraud)', 'False Negatives (FN)', 'False Positives (FP)'],
    'Score': ['0.9805', '0.9563', '0.9876', '3', '2,203'],
    'Target': ['↑ (Excellent)', '↑ (Strong Balance)', '≈ 1.0 (Critical)', '≈ 0', '↓ (Operational Cost)'],
    'Interpretation': ['Excellent overall discriminative power.',
                       'Strong balance between performance on fraud and non-fraud cases.',
                       'Near-Perfect: Detected 98.76% of all fraud/leakage cases.',
                       'Only 3 out of 241 fraud cases were missed (Goal Achieved).',
                       'High operational cost: 90% of all flagged transactions were clean.'],
}
df_performance = pd.DataFrame(performance_data)
# Display the table using markdown format
display(Markdown(df_performance.to_markdown(index=False, numalign="left", stralign="left")))

# --- 2. Operational Strategy Table ---
display(Markdown("\n## 2. Operational Strategy"))
print("---------------------------------------------------------------------------------")

operational_data = {
    'Component': ['Alert Action', 'Priority Shift'],
    'Strategy': ['Manual Review Queue. Do NOT auto-block transactions.',
                 'Assurance priority shifts from pattern discovery to efficient False Positive filtering within the review queue.'],
    'Rationale': ['The 99% Recall is worth the operational cost of managing the high FP volume (2,203 FPs).',
                  'The model flags everything; analysts must quickly clear the benign alerts.'],
}
df_operational = pd.DataFrame(operational_data)
display(Markdown(df_operational.to_markdown(index=False, numalign="left", stralign="left")))

# --- 3. Actionable Business Insights (Odds Ratio Analysis) Table ---
display(Markdown("\n## 3. Actionable Business Insights (Odds Ratio Analysis)"))
print("---------------------------------------------------------------------------------")

# Use the actual Odds Ratio output provided (Note: Using full float values for accuracy before display)
odds_ratio_data = {
    'Feature': [
        'Transaction_Type_P2P_Transfer', 'Amount_ETB', 'System_Fee_ETB', 'Billing_System_Status_TIMEOUT',
        'Transaction_Type_Agent_Cash_Out', 'Receiver_Region_Oromia', 'Is_Cross_Region',
        'Txn_Count_Sender_1H', 'Agent_Cust_Pair_Count_7D', 'Receiver_Region_South Ethiopia',
        'Sender_Region_South Ethiopia', 'Sender_Region_Oromia', 'Transaction_Type_Airtime_Purchase',
        'Transaction_Type_Bill_Payment'
    ],
    'Odds Ratio': [
        13249.662, 4.216, 4.149, 1.515, 1.427, 1.175, 1.073, 1.056,
        0.931, 0.722, 0.025, 0.020, 0.005, 0.001
    ],
    'Impact on Fraud Likelihood': [
        'Extreme Risk: Odds of fraud are over 13,000x higher in this channel.',
        'High-Value Indicator: 4.2x increase in odds for every standard deviation increase.',
        'High-Value Indicator (correlates with Amount).',
        'System Leakage: 51.5% increase in odds of leakage/fraud.',
        'High-Risk Channel: 42.7% more likely to be fraudulent.',
        'Geographic Risk: 17.5% more likely to be fraudulent.',
        'Slight Mobility Risk: 7.3% higher risk.',
        'Velocity Indicator: 5.6% increase in odds of fraud.',
        'Slightly Decreases Odds (Unexpected finding).',
        'Low Risk.', 'Major Safety Indicator (Low risk for current fraud profile).',
        'Major Safety Indicator (Low risk for current fraud profile).', 'Very Low Risk.',
        'Very Low Risk.'
    ],
}
df_odds_ratio = pd.DataFrame(odds_ratio_data)
df_odds_ratio = df_odds_ratio.sort_values(by='Odds Ratio', ascending=False).reset_index(drop=True)

# Add the Strategic Business Action column
df_odds_ratio['Strategic Business Action'] = df_odds_ratio.apply(
    lambda row: "Implement Strict Controls (2FA, Velocity Limits)" if row['Odds Ratio'] > 1000
    else ("Engineering Priority (Investigate Root Cause)" if row['Feature'] == 'Billing_System_Status_TIMEOUT'
    else ("Audit Agent Network/Targeted Training" if row['Feature'] in ['Transaction_Type_Agent_Cash_Out', 'Receiver_Region_Oromia']
    else "Standard Monitoring/Low Priority")),
    axis=1
)

display(Markdown(df_odds_ratio.to_markdown(index=True, floatfmt=".3f", numalign="left", stralign="left")))

# 4. Export Odds Ratio Data to CSV
CSV_FILE_NAME = 'final_odds_ratio_analysis.csv'
df_odds_ratio.to_csv(CSV_FILE_NAME, index=False)
print(f"\nOdds Ratio Analysis successfully exported to {CSV_FILE_NAME}")

## 1. Model Performance Summary

---------------------------------------------------------------------------------


| Metric                  | Score   | Target               | Interpretation                                                     |
|:------------------------|:--------|:---------------------|:-------------------------------------------------------------------|
| AUC-ROC                 | 0.9805  | ↑ (Excellent)        | Excellent overall discriminative power.                            |
| Geometric Mean (G-Mean) | 0.9563  | ↑ (Strong Balance)   | Strong balance between performance on fraud and non-fraud cases.   |
| Recall (Fraud)          | 0.9876  | ≈ 1.0 (Critical)     | Near-Perfect: Detected 98.76% of all fraud/leakage cases.          |
| False Negatives (FN)    | 3       | ≈ 0                  | Only 3 out of 241 fraud cases were missed (Goal Achieved).         |
| False Positives (FP)    | 2,203   | ↓ (Operational Cost) | High operational cost: 90% of all flagged transactions were clean. |


## 2. Operational Strategy

---------------------------------------------------------------------------------


| Component      | Strategy                                                                                                        | Rationale                                                                                |
|:---------------|:----------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------|
| Alert Action   | Manual Review Queue. Do NOT auto-block transactions.                                                            | The 99% Recall is worth the operational cost of managing the high FP volume (2,203 FPs). |
| Priority Shift | Assurance priority shifts from pattern discovery to efficient False Positive filtering within the review queue. | The model flags everything; analysts must quickly clear the benign alerts.               |


## 3. Actionable Business Insights (Odds Ratio Analysis)

---------------------------------------------------------------------------------


|    | Feature                           | Odds Ratio   | Impact on Fraud Likelihood                                                         | Strategic Business Action                        |
|:---|:----------------------------------|:-------------|:-----------------------------------------------------------------------------------|:-------------------------------------------------|
| 0  | Transaction_Type_P2P_Transfer     | 13249.662    | Extreme Risk: Odds of fraud are over 13,000x higher in this channel.               | Implement Strict Controls (2FA, Velocity Limits) |
| 1  | Amount_ETB                        | 4.216        | High-Value Indicator: 4.2x increase in odds for every standard deviation increase. | Standard Monitoring/Low Priority                 |
| 2  | System_Fee_ETB                    | 4.149        | High-Value Indicator (correlates with Amount).                                     | Standard Monitoring/Low Priority                 |
| 3  | Billing_System_Status_TIMEOUT     | 1.515        | System Leakage: 51.5% increase in odds of leakage/fraud.                           | Engineering Priority (Investigate Root Cause)    |
| 4  | Transaction_Type_Agent_Cash_Out   | 1.427        | High-Risk Channel: 42.7% more likely to be fraudulent.                             | Audit Agent Network/Targeted Training            |
| 5  | Receiver_Region_Oromia            | 1.175        | Geographic Risk: 17.5% more likely to be fraudulent.                               | Audit Agent Network/Targeted Training            |
| 6  | Is_Cross_Region                   | 1.073        | Slight Mobility Risk: 7.3% higher risk.                                            | Standard Monitoring/Low Priority                 |
| 7  | Txn_Count_Sender_1H               | 1.056        | Velocity Indicator: 5.6% increase in odds of fraud.                                | Standard Monitoring/Low Priority                 |
| 8  | Agent_Cust_Pair_Count_7D          | 0.931        | Slightly Decreases Odds (Unexpected finding).                                      | Standard Monitoring/Low Priority                 |
| 9  | Receiver_Region_South Ethiopia    | 0.722        | Low Risk.                                                                          | Standard Monitoring/Low Priority                 |
| 10 | Sender_Region_South Ethiopia      | 0.025        | Major Safety Indicator (Low risk for current fraud profile).                       | Standard Monitoring/Low Priority                 |
| 11 | Sender_Region_Oromia              | 0.020        | Major Safety Indicator (Low risk for current fraud profile).                       | Standard Monitoring/Low Priority                 |
| 12 | Transaction_Type_Airtime_Purchase | 0.005        | Very Low Risk.                                                                     | Standard Monitoring/Low Priority                 |
| 13 | Transaction_Type_Bill_Payment     | 0.001        | Very Low Risk.                                                                     | Standard Monitoring/Low Priority                 |


Odds Ratio Analysis successfully exported to final_odds_ratio_analysis.csv
