In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import os
os.chdir('/Users/fantini1999/analise-risco-credito-m1')
df = pd.read_csv('data/processed/train_clean.csv')

In [5]:
X = df.drop("SeriousDlqin2yrs", axis=1)
y = df["SeriousDlqin2yrs"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

y_train = y_train.astype(int)
y_test = y_test.astype(int)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

logreg = LogisticRegression()
logreg.fit(X_train_scaled, y_train)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

def avaliar_modelo(modelo, X_teste, y_teste):
    y_pred = modelo.predict(X_teste)
    y_prob = modelo.predict_proba(X_teste)[:,1] if hasattr(modelo, "predict_proba") else None

    print("Relatório de Classificação:")
    print(classification_report(y_teste, y_pred))
    print("Matriz de Confusão:")
    print(confusion_matrix(y_teste, y_pred))
    if y_prob is not None:
        print("AUC:", roc_auc_score(y_teste, y_prob))

print("== Regressão Logística ==")
avaliar_modelo(logreg, X_test_scaled, y_test)

print("\n== Random Forest ==")
avaliar_modelo(rf, X_test, y_test)

== Regressão Logística ==
Relatório de Classificação:
              precision    recall  f1-score   support

           0       0.94      1.00      0.97     19571
           3       0.56      0.04      0.07      1390

    accuracy                           0.93     20961
   macro avg       0.75      0.52      0.52     20961
weighted avg       0.91      0.93      0.91     20961

Matriz de Confusão:
[[19529    42]
 [ 1336    54]]
AUC: 0.6876796125819696

== Random Forest ==
Relatório de Classificação:
              precision    recall  f1-score   support

           0       0.94      0.99      0.97     19571
           3       0.54      0.17      0.26      1390

    accuracy                           0.94     20961
   macro avg       0.74      0.58      0.61     20961
weighted avg       0.92      0.94      0.92     20961

Matriz de Confusão:
[[19376   195]
 [ 1158   232]]
AUC: 0.8400202325493342
