# Etapa 1: Imports

In [2]:
pip install imbalanced-learn

Collecting imbalanced-learn
  Using cached imbalanced_learn-0.14.0-py3-none-any.whl.metadata (8.8 kB)
Using cached imbalanced_learn-0.14.0-py3-none-any.whl (239 kB)
Installing collected packages: imbalanced-learn
Successfully installed imbalanced-learn-0.14.0
Note: you may need to restart the kernel to use updated packages.


In [5]:
!pip install imbalanced-learn



In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report, roc_auc_score

# Etapa 2: Dados

In [8]:
from pathlib import Path

path = "data/BankChurners.csv"
candidates = [
    Path(path),
    Path.cwd() / path,
    Path.cwd() / "data" / Path(path).name,
    Path("..") / "data" / Path(path).name
]

found = None
for p in candidates:
    if p.exists():
        found = p
        break

if found is None:
    raise FileNotFoundError(
        f"Could not find '{path}'. Tried the following locations: {[str(p) for p in candidates]}"
    )

df = pd.read_csv(found)
df = df[df['Attrition_Flag'].isin(['Attrited Customer', 'Existing Customer'])]
df['Attrition'] = df['Attrition_Flag'].map({'Attrited Customer': 1, 'Existing Customer': 0})

features = [
    'Customer_Age', 'Dependent_count', 'Credit_Limit', 'Total_Trans_Amt', 'Total_Trans_Ct',
    'Avg_Utilization_Ratio', 'Total_Ct_Chng_Q4_Q1', 'Total_Amt_Chng_Q4_Q1'
]
X = df[features]
y = df['Attrition']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

# Etapa 3: Balanceamento SMOTE

In [9]:
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

# Etapa 4: Tunagem de hiperparâmetros
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [4, 6, 8],
    'min_samples_split': [2, 5],
}

grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, scoring='roc_auc')
grid.fit(X_res, y_res)

print("Melhores parâmetros:", grid.best_params_)

Melhores parâmetros: {'max_depth': 8, 'min_samples_split': 5, 'n_estimators': 100}


# Etapa 5: Avaliação final

In [10]:
modelo_final = grid.best_estimator_
y_pred = modelo_final.predict(X_test)
y_proba = modelo_final.predict_proba(X_test)[:,1]

print("\nRelatório pós-balanceamento e tunagem:")
print(classification_report(y_test, y_pred))
print("AUC Final:", roc_auc_score(y_test, y_proba))



Relatório pós-balanceamento e tunagem:
              precision    recall  f1-score   support

           0       0.98      0.92      0.95      2551
           1       0.69      0.92      0.79       488

    accuracy                           0.92      3039
   macro avg       0.84      0.92      0.87      3039
weighted avg       0.94      0.92      0.93      3039

AUC Final: 0.9776839362255881
