In [51]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, roc_curve
from imblearn.over_sampling import SMOTE


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import joblib

In [52]:
df_churn = pd.read_csv("../src/dataset_churn_balanceado.csv")

In [53]:
df_churn['Churn'].value_counts(normalize=True)

Churn
1    0.5
0    0.5
Name: proportion, dtype: float64

In [54]:
df_churn.shape

(12368, 29)

In [55]:
df_churn.columns

Index(['Unnamed: 0', 'CreditScore', 'Age', 'Tenure', 'Balance',
       'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary',
       'days_since_last_tx', 'txs_avg_amount', 'amount_std',
       'avg_cashout_amount', 'ratio_recent_vs_past_txs',
       'ratio_recent_vs_past_amount', 'ratio_cashouts', 'ratio_transfers',
       'inflation_pressure', 'days_since_last_ss', 'total_ss_past30d',
       'total_ss_past90d', 'avg_ss_per_wk', 'avg_ss_duration_min',
       'std_ss_duration_min', 'ratio_ss_time_recent_vs_past',
       'ratio_events_sessios', 'ratio_failed_ss', 'total_opened_push',
       'Churn'],
      dtype='object')

In [56]:
features = ['CreditScore', 'Age', 'Tenure',
       'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember',
       'EstimatedSalary', 'days_since_last_tx', 'txs_avg_amount', 'amount_std',
       'avg_cashout_amount', 'ratio_recent_vs_past_txs',
       'ratio_recent_vs_past_amount', 'ratio_cashouts', 'ratio_transfers',
       'inflation_pressure', 'days_since_last_ss', 'total_ss_past30d',
       'total_ss_past90d', 'avg_ss_per_wk', 'avg_ss_duration_min',
       'std_ss_duration_min', 'ratio_ss_time_recent_vs_past',
       'ratio_events_sessios', 'ratio_failed_ss', 'total_opened_push']

In [57]:
X = df_churn[features]

In [58]:
y = df_churn[['Churn']]

In [59]:
print(X.shape)
print(y.shape)

(12368, 27)
(12368, 1)


In [60]:
X = pd.get_dummies(X, drop_first=True)

In [61]:
scaler = StandardScaler()

In [62]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', scaler , features)
    ]
)

In [63]:
X_processed = preprocessor.fit_transform(X)

In [64]:
joblib.dump(preprocessor, "pipeline_churn.pkl") 

['pipeline_churn.pkl']

In [65]:
X_processed.shape

(12368, 27)

In [66]:
X_train, X_test, y_train, y_test = train_test_split(X_processed ,y, test_size=0.2, random_state=42)

In [67]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  
    Dropout(0.3), 
    Dense(32, activation='relu'), 
    Dense(1, activation='sigmoid')  
])

In [68]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                1792      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 3905 (15.25 KB)
Trainable params: 3905 (15.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [69]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [70]:
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [71]:
loss, accuracy = model.evaluate(X_test, y_test)



In [72]:
print("Acurácia no teste:", accuracy)

Acurácia no teste: 0.9029911160469055


In [73]:
y_prob = model.predict(X_test).flatten()



In [74]:
threshold = 0.7
clientes_alvo = np.where(y_prob > threshold)[0]
print(f" Clientes: {len(y_prob)} | Clientes com probabilidade > {threshold*100}%:", len(clientes_alvo))

 Clientes: 2474 | Clientes com probabilidade > 70.0%: 1109


In [75]:
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, roc_curve

In [76]:
y_pred = (y_prob >= threshold).astype(int)

In [77]:
precision, recall, f1, _ = precision_recall_fscore_support(
        y_test, y_pred, average='binary'
)

In [78]:
roc_auc = roc_auc_score(y_test, y_prob)

In [79]:
results = []
results.append([precision, recall, f1, roc_auc])

In [80]:
results = np.array(results)
metrics_df = pd.DataFrame(results, columns=['Precision', 'Recall', 'F1', 'ROC AUC'])
print("\n📊 Média das métricas:")
print(metrics_df.mean())



📊 Média das métricas:
Precision    0.943192
Recall       0.838141
F1           0.887569
ROC AUC      0.962435
dtype: float64


In [81]:
model.save("rnn_churn_model.keras")

In [82]:
tf.keras.backend.clear_session()