In [1]:
import numpy as np
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

from config import fetch_data


## **STATISTICAL COMPARISON**

### Segment Comparision Error Bars

In [2]:
segment_comparison_error_bars_query = "SELECT * FROM gold.analytics_customer_segment_statistical_comparison"
segment_comparison_error_bars_df = fetch_data(segment_comparison_error_bars_query)

segment_comparison_error_bars_df = segment_comparison_error_bars_df.sort_values("mean_clv")

fig = go.Figure()

fig.add_trace(go.Scatter(
    y=segment_comparison_error_bars_df["customer_segment"],
    x=segment_comparison_error_bars_df["mean_clv"],
    error_x=dict(
        type="data",
        symmetric=False,
        array=segment_comparison_error_bars_df["clv_ci_upper"] - segment_comparison_error_bars_df["mean_clv"],
        arrayminus=segment_comparison_error_bars_df["mean_clv"] - segment_comparison_error_bars_df["clv_ci_lower"],
        thickness=1.5,
        width=4,
        color="rgba(99,110,250,0.6)"
    ),
    mode="markers",
    marker=dict(
        size=14,
        color="#636EFA",
        line=dict(width=1.5, color="white")
    ),
    name="Mean CLV",
    customdata=segment_comparison_error_bars_df[["clv_ci_lower", "clv_ci_upper", "sample_size"]],
    hovertemplate=(
        "<b>%{y}</b><br>"
        "Mean CLV: <b>$%{x:,.2f}</b><br>"
        "95% CI: $%{customdata[0]:,.2f} – $%{customdata[1]:,.2f}<br>"
        "Customers: %{customdata[2]:,}"
        "<extra></extra>"
    )
))

fig.update_layout(
    title=dict(
        text="Customer Lifetime Value by Segment<br><sup>Mean CLV with 95% Confidence Intervals</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    xaxis=dict(
        title="Mean CLV ($)",
        showgrid=True,
        gridcolor="rgba(0,0,0,0.05)",
        zeroline=False
    ),
    yaxis=dict(
        title="Customer Segment",
        showgrid=False
    ),
    template="plotly_white",
    height=550,
    margin=dict(l=140, r=40, t=80, b=40),
)

fig.show()

In [3]:
segment_comparison_error_bars_df

Unnamed: 0,customer_segment,sample_size,mean_clv,stddev_clv,median_clv,q1_clv,q3_clv,clv_ci_lower,clv_ci_upper,mean_income,...,median_income,mean_credit_score,stddev_credit_score,mean_transactions,stddev_transactions,mean_balance,stddev_balance,mean_churn_risk_pct,stddev_churn_risk_pct,last_updated
3,Premium,4059,50170.27,28555.91,50114.0,25681.0,74846.0,49291.77,51048.77,261514.68,...,259131.0,549.0,142.67,0.13,0.36,-66562.87,256334.37,49.58,29.07,2025-12-26 03:41:29.050037+00:00
2,Mass Market,4039,50269.85,28573.79,49727.0,25471.5,75558.0,49388.63,51151.08,261470.79,...,263099.0,550.0,143.68,0.13,0.37,-58186.1,251651.6,50.55,28.84,2025-12-26 03:41:29.050037+00:00
1,Affluent,4033,50527.47,28911.47,50412.0,25603.0,76306.0,49635.17,51419.77,260765.47,...,262077.0,549.0,145.45,0.12,0.36,-62243.08,254752.42,50.16,28.79,2025-12-26 03:41:29.050037+00:00
0,Business,3981,50908.54,28677.18,51036.0,25441.0,75921.0,50017.71,51799.37,258801.06,...,257499.0,549.0,145.71,0.12,0.37,-52058.43,248106.76,50.11,28.52,2025-12-26 03:41:29.050037+00:00


### Segment Metrics Forest Plot

In [4]:
segment_metrics_forest_plot_query = "SELECT * FROM gold.analytics_customer_segment_statistical_comparison"
segment_metrics_forest_plot_df = fetch_data(segment_metrics_forest_plot_query)

fig = make_subplots(
    rows=1, cols=3,
    shared_yaxes=True,
    subplot_titles=[
        "Customer Lifetime Value",
        "Annual Income",
        "Credit Score"
    ],
    horizontal_spacing=0.08
)

# ---------- CLV ----------
fig.add_trace(
    go.Scatter(
        x=segment_metrics_forest_plot_df["mean_clv"],
        y=segment_metrics_forest_plot_df["customer_segment"],
        mode="markers",
        marker=dict(
            size=12,
            color="#636EFA",
            line=dict(width=1.5, color="white")
        ),
        error_x=dict(
            type="data",
            symmetric=False,
            array=segment_metrics_forest_plot_df["clv_ci_upper"] - segment_metrics_forest_plot_df["mean_clv"],
            arrayminus=segment_metrics_forest_plot_df["mean_clv"] - segment_metrics_forest_plot_df["clv_ci_lower"],
            thickness=1.5,
            width=4,
            color="rgba(99,110,250,0.6)"
        ),
        hovertemplate=(
            "<b>%{y}</b><br>"
            "Mean CLV: <b>$%{x:,.2f}</b><br>"
            "<extra></extra>"
        ),
        showlegend=False
    ),
    row=1, col=1
)

# ---------- Income ----------
fig.add_trace(
    go.Scatter(
        x=segment_metrics_forest_plot_df["mean_income"],
        y=segment_metrics_forest_plot_df["customer_segment"],
        mode="markers",
        marker=dict(
            size=12,
            color="#00CC96",
            line=dict(width=1.5, color="white")
        ),
        hovertemplate=(
            "<b>%{y}</b><br>"
            "Mean Income: <b>$%{x:,.2f}</b>"
            "<extra></extra>"
        ),
        showlegend=False
    ),
    row=1, col=2
)

# ---------- Credit Score ----------
fig.add_trace(
    go.Scatter(
        x=segment_metrics_forest_plot_df["mean_credit_score"],
        y=segment_metrics_forest_plot_df["customer_segment"],
        mode="markers",
        marker=dict(
            size=12,
            color="#EF553B",
            line=dict(width=1.5, color="white")
        ),
        hovertemplate=(
            "<b>%{y}</b><br>"
            "Mean Credit Score: <b>%{x:.0f}</b>"
            "<extra></extra>"
        ),
        showlegend=False
    ),
    row=1, col=3
)

# ---------- Axis Styling ----------
fig.update_xaxes(
    title_text="Mean CLV ($)",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False,
    row=1, col=1
)

fig.update_xaxes(
    title_text="Mean Income ($)",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False,
    row=1, col=2
)

fig.update_xaxes(
    title_text="Credit Score",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False,
    row=1, col=3
)

fig.update_yaxes(
    title_text="Customer Segment",
    showgrid=False
)

# ---------- Layout ----------
fig.update_layout(
    title=dict(
        text="Statistical Comparison Across Customer Segments<br>"
             "<sup>Point estimates shown; CLV includes 95% confidence intervals</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    template="plotly_white",
    height=550,
    margin=dict(l=160, r=40, t=90, b=40)
)

fig.show()

In [5]:
segment_metrics_forest_plot_df

Unnamed: 0,customer_segment,sample_size,mean_clv,stddev_clv,median_clv,q1_clv,q3_clv,clv_ci_lower,clv_ci_upper,mean_income,...,median_income,mean_credit_score,stddev_credit_score,mean_transactions,stddev_transactions,mean_balance,stddev_balance,mean_churn_risk_pct,stddev_churn_risk_pct,last_updated
0,Business,3981,50908.54,28677.18,51036.0,25441.0,75921.0,50017.71,51799.37,258801.06,...,257499.0,549.0,145.71,0.12,0.37,-52058.43,248106.76,50.11,28.52,2025-12-26 03:41:29.050037+00:00
1,Affluent,4033,50527.47,28911.47,50412.0,25603.0,76306.0,49635.17,51419.77,260765.47,...,262077.0,549.0,145.45,0.12,0.36,-62243.08,254752.42,50.16,28.79,2025-12-26 03:41:29.050037+00:00
2,Mass Market,4039,50269.85,28573.79,49727.0,25471.5,75558.0,49388.63,51151.08,261470.79,...,263099.0,550.0,143.68,0.13,0.37,-58186.1,251651.6,50.55,28.84,2025-12-26 03:41:29.050037+00:00
3,Premium,4059,50170.27,28555.91,50114.0,25681.0,74846.0,49291.77,51048.77,261514.68,...,259131.0,549.0,142.67,0.13,0.36,-66562.87,256334.37,49.58,29.07,2025-12-26 03:41:29.050037+00:00


## **A/B TESTING**

### A/B Test Channel Comparision

In [6]:
ab_test_channel_comparison_query = "SELECT * FROM gold.analytics_ab_test_channel_performance ORDER BY sample_size DESC"
ab_test_channel_comparison_df = fetch_data(ab_test_channel_comparison_query)

fig = go.Figure()

fig.add_trace(go.Bar(
    x=ab_test_channel_comparison_df["channel"],
    y=ab_test_channel_comparison_df["mean_transaction_amount"],
    error_y=dict(
        type="data",
        symmetric=False,
        array=ab_test_channel_comparison_df["amount_ci_upper"] - ab_test_channel_comparison_df["mean_transaction_amount"],
        arrayminus=ab_test_channel_comparison_df["mean_transaction_amount"] - ab_test_channel_comparison_df["amount_ci_lower"],
        thickness=1.5,
        width=4,
        color="rgba(0,204,150,0.6)"
    ),
    marker=dict(
        color="#00CC96",
        line=dict(width=1.2, color="white")
    ),
    customdata=ab_test_channel_comparison_df["sample_size"],
    hovertemplate=(
        "<b>%{x}</b><br>"
        "Mean Transaction: <b>$%{y:,.2f}</b><br>"
        "Sample Size: %{customdata:,}"
        "<extra></extra>"
    ),
    name="Mean Transaction Amount"
))

# ---------- Axis Styling ----------
fig.update_xaxes(
    title_text="Channel",
    tickangle=-20,
    showgrid=False
)

fig.update_yaxes(
    title_text="Mean Transaction Amount ($)",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False
)

# ---------- Layout ----------
fig.update_layout(
    title=dict(
        text="A/B Test Channel Performance Comparison<br>"
             "<sup>Mean transaction amount with 95% confidence intervals</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    template="plotly_white",
    height=520,
    margin=dict(l=60, r=40, t=90, b=80)
)

fig.show()

In [7]:
ab_test_channel_comparison_df

Unnamed: 0,channel,sample_size,mean_transaction_amount,stddev_transaction_amount,amount_ci_lower,amount_ci_upper,fraud_rate_pct,fraud_rate_se,decline_rate_pct,mean_processing_ms,avg_transactions_per_customer,unique_customers,total_volume,last_updated
0,ATM,561,1511.29,1100.04,1420.26,1602.32,0.0,0.0,47.77,2446.16,1.01,554,847833.76,2025-12-26 03:41:26.590091+00:00
1,POS,553,1611.21,1187.32,1512.25,1710.17,0.0,0.0,46.65,2710.78,1.02,542,890999.7,2025-12-26 03:41:26.590091+00:00
2,Online,545,1491.1,1167.33,1393.09,1589.1,0.55,0.62,48.62,2621.96,1.02,535,812646.82,2025-12-26 03:41:26.590091+00:00
3,Mobile,533,1536.0,1066.51,1445.45,1626.54,0.0,0.0,48.22,2458.71,1.02,524,818687.14,2025-12-26 03:41:26.590091+00:00
4,Branch,522,1547.24,1173.99,1446.52,1647.95,0.0,0.0,47.32,2527.3,1.01,516,807657.02,2025-12-26 03:41:26.590091+00:00


### A/B Test Metrics Comparison

In [8]:
ab_test_metrics_comparison_query = "SELECT * FROM gold.analytics_ab_test_channel_performance"
ab_test_metrics_comparison_df = fetch_data(ab_test_metrics_comparison_query)


fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=[
        "Mean Transaction Amount",
        "Fraud Rate",
        "Decline Rate",
        "Avg Transactions per Customer"
    ],
    horizontal_spacing=0.08,
    vertical_spacing=0.12
)

# ---------- Transaction Amount ----------
fig.add_trace(
    go.Bar(
        x=ab_test_metrics_comparison_df["channel"],
        y=ab_test_metrics_comparison_df["mean_transaction_amount"],
        marker=dict(color="#636EFA", line=dict(color="white", width=1)),
        hovertemplate="<b>%{x}</b><br>$%{y:,.2f}<extra></extra>"
    ),
    row=1, col=1
)

# ---------- Fraud Rate ----------
fig.add_trace(
    go.Bar(
        x=ab_test_metrics_comparison_df["channel"],
        y=ab_test_metrics_comparison_df["fraud_rate_pct"],
        marker=dict(color="#EF553B", line=dict(color="white", width=1)),
        hovertemplate="<b>%{x}</b><br>%{y:.2f}%<extra></extra>"
    ),
    row=1, col=2
)

# ---------- Decline Rate ----------
fig.add_trace(
    go.Bar(
        x=ab_test_metrics_comparison_df["channel"],
        y=ab_test_metrics_comparison_df["decline_rate_pct"],
        marker=dict(color="#FFA15A", line=dict(color="white", width=1)),
        hovertemplate="<b>%{x}</b><br>%{y:.2f}%<extra></extra>"
    ),
    row=2, col=1
)

# ---------- Transactions / Customer ----------
fig.add_trace(
    go.Bar(
        x=ab_test_metrics_comparison_df["channel"],
        y=ab_test_metrics_comparison_df["avg_transactions_per_customer"],
        marker=dict(color="#00CC96", line=dict(color="white", width=1)),
        hovertemplate="<b>%{x}</b><br>%{y:.2f}<extra></extra>"
    ),
    row=2, col=2
)

# ---------- Axis Styling ----------
fig.update_yaxes(
    title_text="Amount ($)",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False,
    row=1, col=1
)

fig.update_yaxes(
    title_text="Rate (%)",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False,
    row=1, col=2
)

fig.update_yaxes(
    title_text="Rate (%)",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False,
    row=2, col=1
)

fig.update_yaxes(
    title_text="Transactions / Customer",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False,
    row=2, col=2
)

fig.update_xaxes(
    tickangle=-20,
    showgrid=False
)

# ---------- Layout ----------
fig.update_layout(
    title=dict(
        text="Channel Performance: Multi-Metric A/B Test Comparison<br>"
             "<sup>Behavioral and risk metrics shown per channel</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    template="plotly_white",
    height=720,
    margin=dict(l=60, r=40, t=100, b=80),
    showlegend=False
)

fig.show()

In [9]:
ab_test_metrics_comparison_df

Unnamed: 0,channel,sample_size,mean_transaction_amount,stddev_transaction_amount,amount_ci_lower,amount_ci_upper,fraud_rate_pct,fraud_rate_se,decline_rate_pct,mean_processing_ms,avg_transactions_per_customer,unique_customers,total_volume,last_updated
0,ATM,561,1511.29,1100.04,1420.26,1602.32,0.0,0.0,47.77,2446.16,1.01,554,847833.76,2025-12-26 03:41:26.590091+00:00
1,POS,553,1611.21,1187.32,1512.25,1710.17,0.0,0.0,46.65,2710.78,1.02,542,890999.7,2025-12-26 03:41:26.590091+00:00
2,Online,545,1491.1,1167.33,1393.09,1589.1,0.55,0.62,48.62,2621.96,1.02,535,812646.82,2025-12-26 03:41:26.590091+00:00
3,Mobile,533,1536.0,1066.51,1445.45,1626.54,0.0,0.0,48.22,2458.71,1.02,524,818687.14,2025-12-26 03:41:26.590091+00:00
4,Branch,522,1547.24,1173.99,1446.52,1647.95,0.0,0.0,47.32,2527.3,1.01,516,807657.02,2025-12-26 03:41:26.590091+00:00


## **MODEL VALIDATION**

### Fraud detection ROC curve

In [10]:
fraud_detection_roc_curve_query = "SELECT * FROM gold.analytics_fraud_detection_accuracy ORDER BY recall_pct"
fraud_detection_roc_curve_df = fetch_data(fraud_detection_roc_curve_query)

fig = go.Figure()

# ---------- Precision–Recall Curve ----------
fig.add_trace(go.Scatter(
    x=fraud_detection_roc_curve_df["recall_pct"],
    y=fraud_detection_roc_curve_df["precision_pct"],
    mode="lines+markers",
    marker=dict(
        size=11,
        color=fraud_detection_roc_curve_df["f1_score_pct"],
        colorscale="Viridis",
        showscale=True,
        colorbar=dict(
            title="F1 Score (%)",
            thickness=14
        ),
        line=dict(width=1, color="white")
    ),
    line=dict(width=2, color="rgba(50,50,50,0.6)"),
    text=fraud_detection_roc_curve_df["predicted_risk"],
    hovertemplate=(
        "<b>Risk Threshold: %{text}</b><br>"
        "Recall: %{x:.1f}%<br>"
        "Precision: %{y:.1f}%<br>"
        "F1 Score: <b>%{marker.color:.1f}%</b>"
        "<extra></extra>"
    ),
    name="Model Performance"
))

# ---------- Best F1 Annotation ----------
best_f1 = fraud_detection_roc_curve_df.loc[fraud_detection_roc_curve_df["f1_score_pct"].idxmax()]

fig.add_trace(go.Scatter(
    x=[best_f1["recall_pct"]],
    y=[best_f1["precision_pct"]],
    mode="markers",
    marker=dict(
        size=16,
        symbol="star",
        color="gold",
        line=dict(width=1.5, color="black")
    ),
    hoverinfo="skip",
    showlegend=False
))

fig.add_annotation(
    x=best_f1["recall_pct"],
    y=best_f1["precision_pct"],
    text="Best F1 Trade-off",
    showarrow=True,
    arrowhead=3,
    ax=40,
    ay=-40,
    font=dict(size=12)
)

# ---------- Axis Styling ----------
fig.update_xaxes(
    title="Recall (Sensitivity %)",
    range=[0, 100],
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False
)

fig.update_yaxes(
    title="Precision (%)",
    range=[0, 100],
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False
)

# ---------- Layout ----------
fig.update_layout(
    title=dict(
        text="Fraud Detection Model — Precision–Recall Curve<br>"
             "<sup>Threshold-based trade-offs; color indicates F1 score</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    template="plotly_white",
    height=620,
    margin=dict(l=70, r=60, t=100, b=60),
    showlegend=False
)

fig.show()

In [11]:
fraud_detection_roc_curve_df

Unnamed: 0,predicted_risk,total_predictions,true_positive,false_positive,false_negative,true_negative,precision_pct,recall_pct,specificity_pct,f1_score_pct,accuracy_pct,false_positive_rate_pct,last_updated
0,Low Risk,2713,2,2711,2711,2,0.07,0.07,0.07,0.07,0.15,99.93,2025-12-26 03:41:29.211381+00:00
1,High Risk,1,1,0,0,1,100.0,100.0,100.0,100.0,200.0,0.0,2025-12-26 03:41:29.211381+00:00


### Fraud Confusion Matrix Heatmap

In [12]:
fraud_confusion_matrix_heatmap_query = "SELECT * FROM gold.analytics_fraud_detection_accuracy"
fraud_confusion_matrix_heatmap_df = fetch_data(fraud_confusion_matrix_heatmap_query)

n_panels = len(fraud_confusion_matrix_heatmap_df)

fig = make_subplots(
    rows=1, cols=n_panels,
    subplot_titles=fraud_confusion_matrix_heatmap_df['predicted_risk'].tolist(),
    horizontal_spacing=0.05
)

# Find max value for consistent color scaling
max_val = max(fraud_confusion_matrix_heatmap_df[['true_positive', 'false_negative', 
                                                 'false_positive', 'true_negative']].max())

for i, (_, row) in enumerate(fraud_confusion_matrix_heatmap_df.iterrows(), 1):
    matrix = [[row['true_positive'], row['false_negative']],
              [row['false_positive'], row['true_negative']]]

    fig.add_trace(
        go.Heatmap(
            z=matrix,
            x=['Predicted Fraud', 'Predicted Legit'],
            y=['Actual Fraud', 'Actual Legit'],
            text=np.array(matrix),
            texttemplate='%{text}',
            colorscale='RdYlGn_r',
            zmin=0,
            zmax=max_val,
            showscale=(i == n_panels),
            hovertemplate='<b>%{y}</b> / <b>%{x}</b><br>Count: %{z}<extra></extra>'
        ),
        row=1, col=i
    )

# ---------- Layout ----------
fig.update_layout(
    title=dict(
        text="Fraud Detection Confusion Matrices by Risk Level<br>"
             "<sup>Counts of TP, FP, FN, TN per predicted risk level</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    template="plotly_white",
    height=500,
    margin=dict(l=50, r=50, t=90, b=50)
)

fig.show()


In [13]:
fraud_confusion_matrix_heatmap_df

Unnamed: 0,predicted_risk,total_predictions,true_positive,false_positive,false_negative,true_negative,precision_pct,recall_pct,specificity_pct,f1_score_pct,accuracy_pct,false_positive_rate_pct,last_updated
0,High Risk,1,1,0,0,1,100.0,100.0,100.0,100.0,200.0,0.0,2025-12-26 03:41:29.211381+00:00
1,Low Risk,2713,2,2711,2711,2,0.07,0.07,0.07,0.07,0.15,99.93,2025-12-26 03:41:29.211381+00:00


## **HYPOTHESIS TESTING**

### Credit Approval Hypothesis Test

In [14]:
credit_approval_hypothesis_test_query = """
    SELECT * FROM gold.analytics_credit_approval_hypothesis_test 
    WHERE sample_size >= 30 
    ORDER BY approval_rate_pct DESC LIMIT 20
"""
credit_approval_hypothesis_test_df = fetch_data(credit_approval_hypothesis_test_query)

credit_approval_hypothesis_test_df['label'] = (credit_approval_hypothesis_test_df['credit_score_band'] + ' - ' + credit_approval_hypothesis_test_df['income_bracket'] + ' - ' + credit_approval_hypothesis_test_df['dti_category'])

fig = go.Figure()

fig.add_trace(go.Scatter(
    y=credit_approval_hypothesis_test_df["label"],
    x=credit_approval_hypothesis_test_df["approval_rate_pct"],
    mode="markers",
    marker=dict(
        size=12,
        color="#636EFA",
        line=dict(width=1.5, color="white")
    ),
    error_x=dict(
        type="data",
        symmetric=False,
        array=credit_approval_hypothesis_test_df["approval_rate_ci_upper"] - credit_approval_hypothesis_test_df["approval_rate_pct"],
        arrayminus=credit_approval_hypothesis_test_df["approval_rate_pct"] - credit_approval_hypothesis_test_df["approval_rate_ci_lower"],
        thickness=1.5,
        width=4,
        color="rgba(99,110,250,0.6)"
    ),
    customdata=credit_approval_hypothesis_test_df["sample_size"],
    hovertemplate=(
        "<b>%{y}</b><br>"
        "Approval Rate: <b>%{x:.1f}%</b><br>"
        "Sample Size: %{customdata:,}<extra></extra>"
    )
))

# ---------- Axis Styling ----------
fig.update_xaxes(
    title="Approval Rate (%)",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False
)

fig.update_yaxes(
    title="",
    showgrid=False
)

# ---------- Layout ----------
fig.update_layout(
    title=dict(
        text="Credit Approval Rates with 95% Confidence Intervals<br>"
             "<sup>Top 20 segments by approval rate</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    template="plotly_white",
    height=700,
    margin=dict(l=250, r=60, t=100, b=60)
)

fig.show()

In [15]:
credit_approval_hypothesis_test_df

Unnamed: 0,credit_score_band,income_bracket,employment_status,dti_category,sample_size,approved_applications,approval_rate_pct,approval_rate_se_pct,approval_rate_ci_lower,approval_rate_ci_upper,mean_credit_score,mean_income,mean_dti_pct,mean_requested_amount,approval_odds,last_updated,label
0,Poor,Very High,Employed,Good,53,0,0.0,0.0,0.0,0.0,440.0,352254.85,24.91,54052.15,0.0,2025-12-26 03:41:27.347726+00:00,Poor - Very High - Good
1,Poor,Very High,Retired,Good,61,0,0.0,0.0,0.0,0.0,454.0,325260.08,25.9,48204.82,0.0,2025-12-26 03:41:27.347726+00:00,Poor - Very High - Good
2,Poor,Very High,Retired,Poor,57,0,0.0,0.0,0.0,0.0,454.0,317523.61,53.33,53255.63,0.0,2025-12-26 03:41:27.347726+00:00,Poor - Very High - Poor
3,Poor,Very High,Self-Employed,Good,52,0,0.0,0.0,0.0,0.0,443.0,336546.67,24.62,51250.48,0.0,2025-12-26 03:41:27.347726+00:00,Poor - Very High - Good
4,Poor,Very High,Student,Good,56,0,0.0,0.0,0.0,0.0,425.0,338625.96,24.29,56084.23,0.0,2025-12-26 03:41:27.347726+00:00,Poor - Very High - Good
5,Poor,Very High,Student,Poor,50,0,0.0,0.0,0.0,0.0,438.0,319155.24,52.2,48623.1,0.0,2025-12-26 03:41:27.347726+00:00,Poor - Very High - Poor
6,Poor,Very High,Unemployed,Good,58,0,0.0,0.0,0.0,0.0,443.0,325922.14,24.66,51812.59,0.0,2025-12-26 03:41:27.347726+00:00,Poor - Very High - Good


### Churn Confidence Intervals

In [16]:
churn_confidence_intervals_query = " SELECT * FROM gold.analytics_churn_rate_confidence_intervals ORDER BY churn_rate_pct DESC LIMIT 20"
churn_confidence_intervals_df = fetch_data(churn_confidence_intervals_query)


churn_confidence_intervals_df['label'] = (churn_confidence_intervals_df['customer_segment'] + ' - ' +
                churn_confidence_intervals_df['age_group'] + ' - ' +
                churn_confidence_intervals_df['tenure_group'])

fig = go.Figure()

fig.add_trace(go.Scatter(
    y=churn_confidence_intervals_df["label"],
    x=churn_confidence_intervals_df["churn_rate_pct"],
    mode="markers",
    marker=dict(
        size=12,
        color="#EF553B",
        line=dict(width=1.5, color="white")
    ),
    error_x=dict(
        type="data",
        symmetric=False,
        array=churn_confidence_intervals_df["churn_rate_ci_upper"] - churn_confidence_intervals_df["churn_rate_pct"],
        arrayminus=churn_confidence_intervals_df["churn_rate_pct"] - churn_confidence_intervals_df["churn_rate_ci_lower"],
        thickness=1.5,
        width=4,
        color="rgba(239,85,59,0.6)"
    ),
    customdata=churn_confidence_intervals_df["total_customers"],
    hovertemplate=(
        "<b>%{y}</b><br>"
        "Churn Rate: <b>%{x:.2f}%</b><br>"
        "Total Customers: %{customdata:,}<extra></extra>"
    )
))

# ---------- Axis Styling ----------
fig.update_xaxes(
    title="Churn Rate (%)",
    showgrid=True,
    gridcolor="rgba(0,0,0,0.05)",
    zeroline=False
)

fig.update_yaxes(
    title="",
    showgrid=False
)

# ---------- Layout ----------
fig.update_layout(
    title=dict(
        text="Churn Rate Estimates with 95% Confidence Intervals<br>"
             "<sup>Top 20 segments by churn rate</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    template="plotly_white",
    height=800,
    margin=dict(l=250, r=60, t=100, b=60)
)

fig.show()

In [17]:
churn_confidence_intervals_df

Unnamed: 0,customer_segment,age_group,tenure_group,total_customers,churned_customers,churn_rate_pct,churn_rate_se_pct,churn_rate_ci_lower,churn_rate_ci_upper,high_risk_pct,mean_churn_score_pct,last_updated,label
0,Premium,35-44,6-12 months,35.0,11.0,31.43,7.85,16.05,46.81,54.29,63.37,2025-12-26 03:41:27.121213+00:00,Premium - 35-44 - 6-12 months
1,Business,45-54,12-24 months,61.0,19.0,31.15,5.93,19.53,42.77,44.26,54.24,2025-12-26 03:41:27.121213+00:00,Business - 45-54 - 12-24 months
2,Affluent,25-34,6-12 months,36.0,11.0,30.56,7.68,15.51,45.6,33.33,48.52,2025-12-26 03:41:27.121213+00:00,Affluent - 25-34 - 6-12 months
3,Business,18-24,0-6 months,33.0,10.0,30.3,8.0,14.62,45.98,48.48,55.05,2025-12-26 03:41:27.121213+00:00,Business - 18-24 - 0-6 months
4,Affluent,25-34,0-6 months,31.0,9.0,29.03,8.15,13.05,45.01,48.39,55.64,2025-12-26 03:41:27.121213+00:00,Affluent - 25-34 - 0-6 months
5,Mass Market,35-44,6-12 months,31.0,9.0,29.03,8.15,13.05,45.01,29.03,45.92,2025-12-26 03:41:27.121213+00:00,Mass Market - 35-44 - 6-12 months
6,Business,65+,12-24 months,31.0,9.0,29.03,8.15,13.05,45.01,35.48,56.03,2025-12-26 03:41:27.121213+00:00,Business - 65+ - 12-24 months
7,Affluent,65+,12-24 months,38.0,11.0,28.95,7.36,14.53,43.37,44.74,48.65,2025-12-26 03:41:27.121213+00:00,Affluent - 65+ - 12-24 months
8,Affluent,45-54,24+ months,896.0,258.0,28.79,1.51,25.83,31.76,41.29,51.29,2025-12-26 03:41:27.121213+00:00,Affluent - 45-54 - 24+ months
9,Premium,25-34,6-12 months,32.0,9.0,28.13,7.95,12.55,43.7,37.5,40.04,2025-12-26 03:41:27.121213+00:00,Premium - 25-34 - 6-12 months


## **DISTRIBUTION ANALYSIS**

### Clv Distribution By Segment

In [18]:
clv_distribution_by_segment_query = "SELECT * FROM gold.analytics_customer_lifetime_value_distribution"
clv_distribution_by_segment_df = fetch_data(clv_distribution_by_segment_query)

segments = clv_distribution_by_segment_df['customer_segment'].unique()
colors = px.colors.qualitative.Set2

fig = go.Figure()

for i, segment in enumerate(segments):
    segment_data = clv_distribution_by_segment_df[
        clv_distribution_by_segment_df['customer_segment'] == segment
    ]
    
    fig.add_trace(go.Box(
        y=segment_data['mean_clv'],
        x=[segment]*len(segment_data),
        name=segment,
        boxmean='sd',  # show mean and SD
        marker_color=colors[i % len(colors)],
        line=dict(width=1.5),
        hovertemplate="<b>%{x}</b><br>CLV: $%{y:,.2f}<extra></extra>"
    ))

# ---------- Layout ----------
fig.update_layout(
    title=dict(
        text="Customer Lifetime Value Distribution by Segment<br>"
             "<sup>Boxplots show mean ± SD</sup>",
        x=0.01,
        font=dict(size=18)
    ),
    xaxis_title="Customer Segment",
    yaxis_title="CLV ($)",
    template="plotly_white",
    height=520,
    margin=dict(l=60, r=40, t=90, b=60),
    showlegend=False
)

fig.show()

In [19]:
clv_distribution_by_segment_df

Unnamed: 0,clv_bin,customer_segment,customer_count,pct_of_segment,mean_clv,median_clv,q1_clv,q3_clv,min_clv,max_clv,stddev_clv,skewness_indicator,last_updated
0,1K-5K,Affluent,176,4.36,3290.97,3595.5,2170.75,4422.75,1009.0,4983.0,1214.02,-0.25,2025-12-26 03:41:28.519410+00:00
1,5K-10K,Affluent,211,5.23,7429.18,7372.0,6244.0,8693.5,5036.0,9985.0,1433.4,0.04,2025-12-26 03:41:28.519410+00:00
2,10K-25K,Affluent,597,14.8,17370.43,17329.0,13683.0,21002.0,10018.0,24991.0,4303.21,0.01,2025-12-26 03:41:28.519410+00:00
3,25K-50K,Affluent,1016,25.19,37265.19,37645.0,30772.5,43154.0,25095.0,49999.0,7184.79,-0.05,2025-12-26 03:41:28.519410+00:00
4,50K+,Affluent,2033,50.41,75454.47,76130.0,62894.0,87711.0,50005.0,99998.0,14432.05,-0.05,2025-12-26 03:41:28.519410+00:00
5,1K-5K,Business,158,3.97,3025.04,3055.0,2152.75,4004.5,1011.0,4986.0,1120.57,-0.03,2025-12-26 03:41:28.519410+00:00
6,5K-10K,Business,198,4.97,7571.07,7687.0,6336.75,8797.25,5070.0,9984.0,1438.85,-0.08,2025-12-26 03:41:28.519410+00:00
7,10K-25K,Business,609,15.3,17862.84,18035.0,13759.0,21896.0,10001.0,24952.0,4403.82,-0.04,2025-12-26 03:41:28.519410+00:00
8,25K-50K,Business,981,24.64,37416.15,37587.0,31194.0,43515.0,25004.0,49966.0,7239.45,-0.02,2025-12-26 03:41:28.519410+00:00
9,50K+,Business,2035,51.12,75236.44,75394.0,62597.5,87866.5,50035.0,99976.0,14491.49,-0.01,2025-12-26 03:41:28.519410+00:00


### Transaction Normality Test

In [20]:
transaction_normality_test_query = """
    SELECT * FROM gold.analytics_transaction_amount_normality_test 
    WHERE sample_size >= 100 
    ORDER BY sample_size DESC LIMIT 10
"""
transaction_normality_test_df = fetch_data(transaction_normality_test_query)
fig = go.Figure()

for _, row in transaction_normality_test_df.iterrows():
    label = f"{row['merchant_category']} - {row['channel']}"

    fig.add_trace(go.Bar(
        name=label,
        x=[label],
        y=[row['mean_amount']],
        error_y=dict(
            type='data',
            array=[row['stddev_amount']],
            visible=True
        ),
        marker_color=px.colors.qualitative.Set3[_ % len(
            px.colors.qualitative.Set3)],
        hovertemplate=f'<b>{label}</b><br>Mean: $%{{y:.2f}}<br>StdDev: ${row["stddev_amount"]:.2f}<br>CV: {row["cv_pct"]:.1f}%<extra></extra>'
    ))

fig.update_layout(
    title="Transaction Amount Distribution (Mean ± StdDev) - Top 10 by Volume",
    xaxis_title="Category - Channel",
    yaxis_title="Amount ($)",
    height=600,
    showlegend=False
)
fig.show()

In [21]:
transaction_normality_test_df

Unnamed: 0,merchant_category,channel,sample_size,mean_amount,median_amount,stddev_amount,q1_amount,q3_amount,iqr,min_amount,max_amount,cv_pct,skewness_indicator,lower_outlier_bound,upper_outlier_bound,last_updated
