In [14]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import shap
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import confusion_matrix, roc_curve, auc, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load and scale data
df_encoded = pd.read_csv('dataset/bank_to_build.csv')
features = list(df_encoded.drop(columns=['decoded_target']).columns)
target = 'decoded_target'

X = df_encoded[features]
y = df_encoded[target]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, stratify=y, random_state=42)

# Fit final model
final_model = HistGradientBoostingClassifier(max_iter=300,
                                             early_stopping=False,
                                             learning_rate=0.01,
                                             max_leaf_nodes=15,
                                             random_state=42)
final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)
y_proba = final_model.predict_proba(X_test)[:, 1]

# --- 1. Confusion Matrix ---
cm = confusion_matrix(y_test, y_pred)
labels = ['No', 'Yes']
cm_fig = px.imshow(cm, text_auto=True,
                   x=labels, y=labels,
                   labels=dict(x="Predicted", y="Actual", color="Count"),
                   title="Confusion Matrix")
cm_fig.update_layout(width=500, height=400)
cm_fig.show()

# --- 2. ROC Curve ---
fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
roc_fig = go.Figure()
roc_fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name='ROC Curve', line=dict(color='blue')))
roc_fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Random', line=dict(dash='dash')))
roc_fig.update_layout(title=f"ROC Curve (AUC = {roc_auc:.4f})",
                      xaxis_title='False Positive Rate',
                      yaxis_title='True Positive Rate',
                      width=600,
                      height=500)
roc_fig.show()

# --- 3. Lift Curve ---
lift_df = pd.DataFrame({'y_true': y_test.values, 'y_proba': y_proba})
lift_df.sort_values('y_proba', ascending=False, inplace=True)
lift_df['cum_response'] = lift_df['y_true'].cumsum()
lift_df['total_positives'] = lift_df['y_true'].sum()
lift_df['baseline'] = np.linspace(0, 1, len(lift_df))
lift_df['cum_gain'] = lift_df['cum_response'] / lift_df['total_positives']
lift_df['percent_contacted'] = np.arange(1, len(lift_df) + 1) / len(lift_df)

alift = np.trapezoid(lift_df['cum_gain'], lift_df['percent_contacted'])

lift_fig = go.Figure()
lift_fig.add_trace(go.Scatter(
    x=lift_df['percent_contacted'],
    y=lift_df['cum_gain'],
    mode='lines',
    name='Lift Curve',
    line=dict(color='blue')
))
lift_fig.add_trace(go.Scatter(
    x=lift_df['percent_contacted'],
    y=lift_df['baseline'],
    mode='lines',
    name='Random Targeting',
    line=dict(dash='dash', color='gray')
))
lift_fig.update_layout(title=f'Cumulative Lift Curve (ALIFT = {alift:.4f})',
                       xaxis_title='Proportion of Contacted Clients',
                       yaxis_title='Cumulative Gain',
                       width=700,
                       height=500)
lift_fig.show()

# --- 4. SHAP Summary Plot (Top 7) ---
X_test_df = pd.DataFrame(X_test, columns=features)
explainer = shap.Explainer(final_model)
shap_values = explainer(X_test_df)

shap_values_df = pd.DataFrame(shap_values.values, columns=features)
shap_means = shap_values_df.abs().mean().sort_values(ascending=False).head(7)
shap_bar_df = pd.DataFrame({
    'Feature': shap_means.index,
    'Mean SHAP Value': shap_means.values
})

shap_fig = px.bar(shap_bar_df, x='Mean SHAP Value', y='Feature', orientation='h',
                  title="Top 7 SHAP Feature Importances", color='Mean SHAP Value',
                  color_continuous_scale='Viridis')
shap_fig.update_layout(yaxis=dict(autorange='reversed'), width=700, height=450)
shap_fig.show()

In [15]:
import plotly.graph_objects as go

def plot_lift_curve_plotly(y_true, y_scores):
    # Crear DataFrame ordenado por score descendente
    data = pd.DataFrame({'y_true': y_true, 'y_score': y_scores})
    data.sort_values(by='y_score', ascending=False, inplace=True)
    data['n'] = np.arange(1, len(data) + 1)
    data['cumulative_positives'] = data['y_true'].cumsum()
    mean_positive_rate = data['y_true'].mean()
    data['lift'] = data['cumulative_positives'] / (data['n'] * mean_positive_rate)
    data['percent_targeted'] = data['n'] / len(data)

    # Calcular ALIFT
    alift = np.trapezoid(data['lift'], data['percent_targeted'])

    # Lift en el top 10%
    top_10_index = int(len(data) * 0.10)
    lift_top_10 = data['lift'].iloc[top_10_index]
    
    # Crear figura
    fig = go.Figure()

    # Curva Lift
    fig.add_trace(go.Scatter(
        x=data['percent_targeted'],
        y=data['lift'],
        mode='lines',
        name=f'Lift Curve (ALIFT = {alift:.2f})',
        line=dict(color='blue')
    ))

    # Línea horizontal = random targeting
    fig.add_trace(go.Scatter(
        x=[0, 1],
        y=[1, 1],
        mode='lines',
        name='Random Targeting',
        line=dict(dash='dash', color='gray')
    ))

    # Línea vertical al 10%
    fig.add_trace(go.Scatter(
        x=[0.10, 0.10],
        y=[1, lift_top_10],
        mode='lines',
        name='Top 10%',
        line=dict(dash='dot', color='red')
    ))

    # Línea horizontal desde (0, lift en top 10)
    fig.add_trace(go.Scatter(
        x=[0, 0.10],
        y=[lift_top_10, lift_top_10],
        mode='lines',
        name=f'Lift @ Top 10% = {lift_top_10:.2f}',
        line=dict(dash='dot', color='red')
    ))

    # Layout
    fig.update_layout(
        title="Lift Curve with ALIFT and Top 10% Highlighted",
        xaxis_title="Proportion of Sample Targeted",
        yaxis_title="Lift (vs Random)",
        width=500,
        height=700
    )

    # Layout modificado para presentación vertical
    fig.update_layout(
        title="Lift Curve with ALIFT and Top 10% Highlighted",
        xaxis_title="Proporción de Clientes Contactados",
        yaxis_title="Lift (vs Contacto Aleatorio)",
        width=600,   # más angosto
        height=700,  # más alto
        margin=dict(l=60, r=30, t=80, b=60),
        legend=dict(
            x=1,         # Posición horizontal (1 = derecha)
            y=1,         # Posición vertical (1 = arriba)
            xanchor='right',
            yanchor='top'
        )
    )

    fig.show()
    return alift, lift_top_10

# Uso:
alift_value, lift_top_10_value = plot_lift_curve_plotly(y_test, y_proba)

In [16]:
import joblib
joblib.dump(scaler, 'model/scaler.joblib')
joblib.dump(final_model, 'model/final_model.joblib')

['model/final_model.joblib']