In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import shap
import matplotlib.pyplot as plt

# Carregar o dataset (certifique-se de que o arquivo esteja no mesmo diretório)
df = pd.read_csv('UCI_Credit_Card.csv')

# Ajustar o nome da coluna alvo, se necessário
if 'default.payment.next.month' in df.columns:
    df.rename(columns={'default.payment.next.month': 'default'}, inplace=True)

# Remover ID se existir
df.drop(columns=['ID'], errors='ignore', inplace=True)

# Separar features e target
X = df.drop(columns=['default'])
y = df['default']

# Tratar valores ausentes (não deve ter, mas é bom garantir)
X.fillna(X.median(), inplace=True)

# Normalizar os dados
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Balanceamento com SMOTE
smote = SMOTE(random_state=42)
X_bal, y_bal = smote.fit_resample(X_scaled, y)

# Divisão treino/teste
X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.3, random_state=42)

# Modelo
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Avaliação
y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred = model.predict(X_test)

print(f"\n🔍 ROC AUC: {roc_auc_score(y_test, y_pred_proba):.4f}")
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))
print("\n🧩 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# SHAP summary plot (salvar ou mostrar)
#shap.summary_plot(shap_values[1], X_test, feature_names=X.columns)

# Explicabilidade com SHAP - calcula só numa amostra reduzida para acelerar
explainer = shap.TreeExplainer(model)

# Explicabilidade com SHAP
explainer = shap.TreeExplainer(model)

# Amostra reduzida para SHAP
X_test_sample = pd.DataFrame(X_test, columns=X.columns).sample(n=50, random_state=42)
shap_values = explainer.shap_values(X_test_sample)

# SHAP summary plot
#shap.summary_plot(shap_values[1], X_test_sample, feature_names=X.columns)

  from .autonotebook import tqdm as notebook_tqdm
[WinError 2] O sistema não pode encontrar o arquivo especificado
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^



🔍 ROC AUC: 0.9223

📊 Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85      7005
           1       0.87      0.82      0.84      7014

    accuracy                           0.85     14019
   macro avg       0.85      0.85      0.85     14019
weighted avg       0.85      0.85      0.85     14019


🧩 Confusion Matrix:
 [[6115  890]
 [1245 5769]]


APP v1 (Em Construção)

In [None]:
# ============================================================
# 0. Imports
# ============================================================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, auc, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import shap, matplotlib.pyplot as plt, base64, io

import dash
from dash import dcc, html, Input, Output, State, dash_table
import plotly.express as px
import plotly.figure_factory as ff

# ============================================================
# 1. Dados, pré-processamento e modelo
# ============================================================
df = pd.read_csv("UCI_Credit_Card.csv")

if "default.payment.next.month" in df.columns:
    df.rename(columns={"default.payment.next.month": "default"}, inplace=True)

df.drop(columns=["ID"], errors="ignore", inplace=True)

X = df.drop(columns=["default"])
y = df["default"]

X.fillna(X.median(), inplace=True)

scaler   = StandardScaler()
X_scaled = scaler.fit_transform(X)

smote      = SMOTE(random_state=42)
X_bal, y_bal = smote.fit_resample(X_scaled, y)

X_bal_df = pd.DataFrame(X_bal, columns=X.columns)
y_bal_sr = pd.Series(y_bal, name="default")

X_train, X_test, y_train, y_test = train_test_split(
    X_bal_df, y_bal_sr, test_size=0.3, random_state=42, stratify=y_bal_sr
)

model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

y_prob = model.predict_proba(X_test)[:, 1]
y_pred = (y_prob >= 0.5).astype(int)

roc_fpr, roc_tpr, _ = roc_curve(y_test, y_prob)
roc_auc             = auc(roc_fpr, roc_tpr)
conf_mat            = confusion_matrix(y_test, y_pred)
report              = classification_report(y_test, y_pred, output_dict=True)

# ============================================================
# 2. SHAP  (amostra reduzida)
# ============================================================
explainer             = shap.TreeExplainer(model)
X_test_sample         = X_test.sample(n=50, random_state=42)
shap_values_class1    = explainer.shap_values(X_test_sample)[1]
base_value_class1     = explainer.expected_value[1]

# ============================================================
# 3. Funções auxiliares de plot
# ============================================================
def plot_roc():
    fig = px.area(
        x=roc_fpr, y=roc_tpr,
        title=f"ROC Curve (AUC = {roc_auc:.4f})",
        labels={"x": "False Positive Rate", "y": "True Positive Rate"}
    )
    fig.add_shape(type="line", line=dict(dash="dash"), x0=0, x1=1, y0=0, y1=1)
    fig.update_yaxes(scaleanchor="x", scaleratio=1)
    fig.update_xaxes(constrain="domain")
    return fig

def plot_score_hist():
    df_scores = pd.DataFrame({"score": y_prob, "true": y_test})
    fig = px.histogram(df_scores, x="score", color=df_scores["true"].astype(str),
                       nbins=50, barmode="overlay",
                       labels={"score": "Probabilidade", "color": "Classe verdadeira"},
                       title="Distribuição das probabilidades previstas")
    return fig

def plot_conf_matrix(cm):
    labs = ["Não Default", "Default"]
    fig = ff.create_annotated_heatmap(
        z=cm, x=labs, y=labs,
        colorscale="Viridis", showscale=True, hoverinfo="z"
    )
    fig.update_layout(title="Matriz de Confusão")
    return fig

def plot_shap_summary_bar():
    vals = np.abs(shap_values_class1).mean(axis=0)
    fig = px.bar(x=vals, y=X_test.columns, orientation="h",
                 labels={"x": "Mean |SHAP value|", "y": "Feature"},
                 title="Importância média das features (SHAP)")
    fig.update_layout(yaxis={"categoryorder": "total ascending"})
    return fig

def plot_shap_beeswarm():
    plt.clf()
    shap.plots.beeswarm(
        shap.Explanation(values=shap_values_class1,
                         base_values=base_value_class1,
                         data=X_test_sample),
        show=False
    )
    buf = io.BytesIO()
    plt.savefig(buf, format="png", bbox_inches="tight")
    plt.close()
    return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()

def plot_shap_waterfall(idx):
    plt.clf()
    shap.plots.waterfall(
        shap.Explanation(values=shap_values_class1[idx],
                         base_values=base_value_class1,
                         data=X_test_sample.iloc[idx]),
        max_display=12, show=False
    )
    buf = io.BytesIO()
    plt.savefig(buf, format="png", bbox_inches="tight")
    plt.close()
    return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()

# ============================================================
# 4. Dash App
# ============================================================
app = dash.Dash(__name__, suppress_callback_exceptions=True)
server = app.server

app.layout = html.Div([
    html.H1("Dashboard – Risco de Crédito (Default)", style={"textAlign": "center"}),
    dcc.Tabs(id="tabs", value="overview", children=[
        dcc.Tab(label="📊 Overview",           value="overview"),
        dcc.Tab(label="📈 SHAP Values",        value="shap"),
        dcc.Tab(label="📉 Matr. Confusão",     value="confusion"),
        dcc.Tab(label="📤 Upload & Predict",   value="upload"),
        dcc.Tab(label="🔎 Perfil (Idade)",     value="profile"),
    ]),
    html.Div(id="tabs-content")
])

# ------------------------------------------------------------
# 4.1 Conteúdo das abas
# ------------------------------------------------------------
@app.callback(Output("tabs-content", "children"),
              Input("tabs", "value"))
def render_tab(tab):
    if tab == "overview":
        return html.Div([
            html.H3("Métricas do Modelo"),
            html.Pre(f"AUC: {roc_auc:.4f}"),
            dcc.Graph(figure=plot_roc()),
            dcc.Graph(figure=plot_score_hist()),
            html.Pre(str(pd.DataFrame(report).T.round(3)))
        ])

    elif tab == "shap":
        return html.Div([
            html.H3("Feature Importance (SHAP)"),
            dcc.Graph(figure=plot_shap_summary_bar()),
            html.Img(src=plot_shap_beeswarm(), style={"width": "90%", "maxWidth": "900px"}),
            html.Hr(),
            html.Label("Índice do cliente na amostra SHAP (0-49):"),
            dcc.Input(id="shap-index", type="number", min=0, max=len(X_test_sample)-1, value=0),
            html.Div(id="shap-waterfall-output")
        ])

    elif tab == "confusion":
        return html.Div([
            html.H3("Matriz de Confusão"),
            dcc.Graph(figure=plot_conf_matrix(conf_mat))
        ])

    elif tab == "upload":
        return html.Div([
            html.H3("Upload de CSV para Predição"),
            dcc.Upload(
                id="upload-data",
                children=html.Div(["Arraste ou selecione arquivo CSV"]),
                style={"width": "100%", "height": "60px", "lineHeight": "60px",
                       "borderWidth": "1px", "borderStyle": "dashed",
                       "borderRadius": "5px", "textAlign": "center",
                       "margin": "10px 0"},
                multiple=False
            ),
            html.Div(id="upload-output")
        ])

    elif tab == "profile":
        return html.Div([
            html.H3("Análise por Faixa Etária"),
            dcc.RangeSlider(
                id="age-slider",
                min=int(X_test["AGE"].min()),
                max=int(X_test["AGE"].max()),
                value=[int(X_test["AGE"].min()), int(X_test["AGE"].max())],
                marks={i: str(i) for i in range(int(X_test["AGE"].min()),
                                                int(X_test["AGE"].max())+1, 5)},
                step=1
            ),
            html.Div(id="age-slider-output"),
            dcc.Graph(id="profile-score-dist"),
            dcc.Graph(id="profile-shap-bar")
        ])

# ------------------------------------------------------------
# 4.2 Callbacks auxiliares
# ------------------------------------------------------------
@app.callback(Output("shap-waterfall-output", "children"),
              Input("shap-index", "value"))
def update_waterfall(idx):
    if idx is None or idx < 0 or idx >= len(X_test_sample):
        return "Índice inválido."
    return html.Img(src=plot_shap_waterfall(idx),
                    style={"width": "90%", "maxWidth": "900px"})

@app.callback(Output("upload-output", "children"),
              Input("upload-data", "contents"),
              State("upload-data", "filename"))
def handle_upload(contents, filename):
    if contents is None:
        return ""
    content_type, content_string = contents.split(",")
    decoded = base64.b64decode(content_string)
    try:
        df_new = pd.read_csv(io.StringIO(decoded.decode("utf-8")))

        # Verifica se colunas batem
        if set(df_new.columns) != set(X.columns):
            return html.Div("⚠️ Colunas do CSV não correspondem ao esperado.")

        df_scaled = scaler.transform(df_new)
        preds     = model.predict(df_scaled)
        probs     = model.predict_proba(df_scaled)[:, 1]

        df_new["Prediction"]  = preds
        df_new["Probability"] = probs.round(3)

        return html.Div([
            html.H5(f"Resultado para {filename}"),
            dash_table.DataTable(
                data=df_new.to_dict("records"),
                columns=[{"name": i, "id": i} for i in df_new.columns],
                page_size=10, style_table={"overflowX": "auto"}
            )
        ])
    except Exception as e:
        return html.Div([html.Pre(f"Erro: {str(e)}")])

@app.callback(
    Output("age-slider-output",  "children"),
    Output("profile-score-dist", "figure"),
    Output("profile-shap-bar",   "figure"),
    Input("age-slider",          "value")
)
def update_profile(age_range):
    if not age_range or len(age_range) != 2:
        return "Faixa etária inválida", {}, {}

    min_age, max_age = age_range
    mask = (X_test["AGE"] >= min_age) & (X_test["AGE"] <= max_age)
    df_profile = X_test[mask]

    if df_profile.empty:
        empty_fig = px.histogram(title="Sem dados na faixa")
        empty_bar = px.bar(title="Sem dados na faixa")
        return "Nenhum dado na faixa selecionada", empty_fig, empty_bar

    y_prob_series = pd.Series(y_prob, index=X_test.index)
    probs_profile = y_prob_series.loc[df_profile.index]

    fig_dist = px.histogram(
        x=probs_profile,
        nbins=30,
        labels={"x": "Probabilidade de Default"},
        title=f"Distribuição de Probabilidades • {min_age}-{max_age} anos"
    )

    # Corrigindo índice SHAP:
    sample_idx_map = {idx: pos for pos, idx in enumerate(X_test_sample.index)}
    valid_positions = [sample_idx_map[i] for i in df_profile.index if i in sample_idx_map]

    if valid_positions:
        shap_profile_vals = np.abs(shap_values_class1[valid_positions]).mean(axis=0)
    else:
        shap_profile_vals = np.zeros(X_test.shape[1])

    fig_shap = px.bar(
        x=shap_profile_vals,
        y=X_test.columns,
        orientation="h",
        labels={"x": "Média |SHAP|", "y": "Feature"},
        title=f"Importância média das features • {min_age}-{max_age} anos"
    )
    fig_shap.update_layout(yaxis={"categoryorder": "total ascending"})

    msg = f"Faixa etária: {min_age} a {max_age} anos"
    return msg, fig_dist, fig_shap

# ============================================================
if __name__ == "__main__":
    app.run(debug=True)


[2025-07-17 16:59:53,130] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\flask\app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\flask\app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\dash\dash.py", line 1484, in dispatch
    response_data = ctx.run(partial_func)
                    ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\dash\_callback.py", line 698, in add_context
    raise err
  File "c:\Users\Yuri_\AppData

App v2 (Em Construção)

In [None]:
import base64
import io
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix, roc_curve
from imblearn.over_sampling import SMOTE
import shap
import matplotlib.pyplot as plt

import dash
from dash import dcc, html, Output, Input

# ======== PIPELINE ML ========
df = pd.read_csv('UCI_Credit_Card.csv')

if 'default.payment.next.month' in df.columns:
    df.rename(columns={'default.payment.next.month': 'default'}, inplace=True)

df.drop(columns=['ID'], errors='ignore', inplace=True)

X = df.drop(columns=['default'])
y = df['default']

X.fillna(X.median(), inplace=True)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

smote = SMOTE(random_state=42)
X_bal, y_bal = smote.fit_resample(X_scaled, y)

X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.3, random_state=42)

model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred = model.predict(X_test)

roc_auc = roc_auc_score(y_test, y_pred_proba)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# SHAP explicabilidade
explainer = shap.TreeExplainer(model)
X_test_df = pd.DataFrame(X_test, columns=X.columns)
X_test_sample = X_test_df.sample(n=50, random_state=42)
shap_values = explainer.shap_values(X_test_sample)

# ======== FUNÇÕES DE GRÁFICOS ========
def plot_roc_curve():
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    plt.figure(figsize=(6,4))
    plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.4f})')
    plt.plot([0,1],[0,1],'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc='lower right')
    buf = io.BytesIO()
    plt.tight_layout()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    img_bytes = buf.read()
    encoded = base64.b64encode(img_bytes).decode()
    return f"data:image/png;base64,{encoded}"

def plot_confusion_matrix():
    plt.figure(figsize=(4,4))
    plt.imshow(conf_matrix, cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(2)
    plt.xticks(tick_marks, ['Adimplente', 'Inadimplente'])
    plt.yticks(tick_marks, ['Adimplente', 'Inadimplente'])
    thresh = conf_matrix.max() / 2.
    for i, j in np.ndindex(conf_matrix.shape):
        plt.text(j, i, conf_matrix[i, j], 
                 horizontalalignment="center",
                 color="white" if conf_matrix[i, j] > thresh else "black",
                 fontsize=14)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    buf = io.BytesIO()
    plt.tight_layout()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    img_bytes = buf.read()
    encoded = base64.b64encode(img_bytes).decode()
    return f"data:image/png;base64,{encoded}"

def plot_shap_summary():
    plt.figure(figsize=(8,6))
    shap.summary_plot(shap_values[1], X_test_sample, show=False)
    buf = io.BytesIO()
    plt.tight_layout()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    img_bytes = buf.read()
    encoded = base64.b64encode(img_bytes).decode()
    return f"data:image/png;base64,{encoded}"

# ======== DASH APP ========
app = dash.Dash(__name__)
app.title = 'Default Prediction Dashboard'

app.layout = html.Div([
    html.H2('Previsão de Inadimplência - Dashboard'),
    dcc.Tabs(id='tabs', value='tab-metrics', children=[
        dcc.Tab(label='Métricas e Avaliação', value='tab-metrics'),
        dcc.Tab(label='Importância das Variáveis', value='tab-shap'),
        dcc.Tab(label='Análise por Perfil', value='tab-profile'),
    ]),
    html.Div(id='tab-content')
])

@app.callback(
    Output('tab-content', 'children'),
    Input('tabs', 'value')
)
def render_tab(tab):
    if tab == 'tab-metrics':
        return html.Div([
            html.H3('Avaliação do Modelo'),
            html.P(f"ROC AUC: {roc_auc:.4f}"),
            html.Img(src=plot_roc_curve(), style={'max-width': '600px', 'height': 'auto'}),
            html.Br(),
            html.Img(src=plot_confusion_matrix(), style={'max-width': '400px', 'height': 'auto'}),
            html.Pre(class_report, style={'whiteSpace': 'pre-wrap', 'border': '1px solid #ccc', 'padding': '10px'})
        ], style={'padding': '10px'})
    
    elif tab == 'tab-shap':
        return html.Div([
            html.H3('Importância das Variáveis (SHAP)'),
            html.Img(src=plot_shap_summary(), style={'max-width': '800px', 'height': 'auto'})
        ], style={'padding': '10px'})
    
    elif tab == 'tab-profile':
        return html.Div([
            html.H3('Análise por Perfil'),
            html.P('🚧 Em construção...')
        ], style={'padding': '10px'})
    
    return html.Div('Tab inválida')

if __name__ == '__main__':
    app.run(debug=True)


[2025-07-17 16:40:36,861] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\flask\app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\flask\app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\dash\dash.py", line 1484, in dispatch
    response_data = ctx.run(partial_func)
                    ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Yuri_\AppData\Local\Programs\Python\Python312\Lib\site-packages\dash\_callback.py", line 698, in add_context
    raise err
  File "c:\Users\Yuri_\AppData