# 03_modeling

## Importando bibliotecas

In [1]:
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_curve, auc

## Carregando os dados

In [2]:
# Carregar dados
X_train, X_test, y_train, y_test = joblib.load('../data/processed/train_test_data.pkl')

## Modelagem

In [3]:
# 📦 Treinar modelos
logreg = LogisticRegression(max_iter=1000,random_state=42)
logreg.fit(X_train, y_train)

# Salvar os modelos
import joblib
joblib.dump(logreg, '../data/processed/model_logreg.pkl')

['../data/processed/model_logreg.pkl']

In [4]:
nb = MultinomialNB()
nb.fit(X_train, y_train)

# 🎯 Avaliar
y_pred_log = logreg.predict(X_test)
y_pred_nb = nb.predict(X_test)

# Salvar os modelos
import joblib
joblib.dump(nb, '../data/processed/model_nb.pkl')

['../data/processed/model_nb.pkl']

In [5]:
print("📊 Regressão Logística:")
print(classification_report(y_test, y_pred_log))

print("\n📊 Naive Bayes:")
print(classification_report(y_test, y_pred_nb))

📊 Regressão Logística:
              precision    recall  f1-score   support

           0       0.91      0.27      0.42       327
           1       0.87      0.99      0.93      1673

    accuracy                           0.88      2000
   macro avg       0.89      0.63      0.67      2000
weighted avg       0.88      0.88      0.85      2000


📊 Naive Bayes:
              precision    recall  f1-score   support

           0       1.00      0.05      0.09       327
           1       0.84      1.00      0.91      1673

    accuracy                           0.84      2000
   macro avg       0.92      0.52      0.50      2000
weighted avg       0.87      0.84      0.78      2000



In [7]:
# 3 - Curva ROC e AUC (Area Under the Curve) - Mede a capacidade do modelo de distinguir entre as classes.
# Para modelos que oferecem probabilidade
y_proba = logreg.predict_proba(X_test)[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
print(f'📌 AUC Regressão Logística: {roc_auc:.2f}')


y_proba = nb.predict_proba(X_test)[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
print(f'📌 AUC Naive Bayes: {roc_auc:.2f}')

📌 AUC Regressão Logística: 0.93
📌 AUC Naive Bayes: 0.91
