In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [None]:
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
print("Tensorflow:", tf.__version__)

Tensorflow: 2.19.0


In [None]:
dataset = pd.read_csv("Churn_Modelling.csv")
X = dataset.iloc[:, 3:-1].copy()
y = dataset.iloc[:, -1].values
print("Shapes:", X.shape, y.shape)

Shapes: (10000, 10) (10000,)


In [None]:
le_gender = LabelEncoder()
X.loc[:, X.columns[2]] = le_gender.fit_transform(X.iloc[:, 2])
ct = ColumnTransformer(
    transformers=[("geo_ohe", OneHotEncoder(handle_unknown="ignore"), [1])],
    remainder="passthrough"
)
X_ohe = ct.fit_transform(X)
X_ohe = np.asarray(X_ohe).astype("float32")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_ohe, y, test_size=0.2, random_state=SEED, stratify=y
)

sc = StandardScaler(with_mean=False)
X_train = sc.fit_transform(X_train).astype("float32")
X_test = sc.transform(X_test).astype("float32")

In [None]:
from tensorflow.keras import layers, models, callbacks
def build_model(input_dim):
  model = models.Sequential([
      layers.Dense(16, activation="relu", input_shape=(input_dim,)),
      layers.Dense(16, activation="relu"),
      layers.Dense(1, activation="sigmoid")
  ])
  model.compile(
      optimizer="adam",
      loss="binary_crossentropy",
      metrics=["accuracy", tf.keras.metrics.AUC(name="auc")]
  )
  return model
ann = build_model(input_dim=X_train.shape[1])
ann.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
cb = [
    callbacks.EarlyStopping(patience=10, restore_best_weights=True, monitor="val_auc", mode="max"),
    callbacks.ModelCheckpoint("best_ann.keras", monitor="val_auc", mode="max", save_best_only=True),
    callbacks.ReduceLROnPlateau(monitor="val_loss", patience=5, factor=0.5)
]

In [None]:
hist = ann.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=32,
    callbacks=cb,
    verbose=1
)

Epoch 1/200
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5950 - auc: 0.4679 - loss: 0.7381 - val_accuracy: 0.8000 - val_auc: 0.6442 - val_loss: 0.4793 - learning_rate: 0.0010
Epoch 2/200
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7913 - auc: 0.6753 - loss: 0.4816 - val_accuracy: 0.8019 - val_auc: 0.7275 - val_loss: 0.4480 - learning_rate: 0.0010
Epoch 3/200
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7997 - auc: 0.7386 - loss: 0.4530 - val_accuracy: 0.8106 - val_auc: 0.7491 - val_loss: 0.4346 - learning_rate: 0.0010
Epoch 4/200
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8064 - auc: 0.7618 - loss: 0.4383 - val_accuracy: 0.8181 - val_auc: 0.7559 - val_loss: 0.4301 - learning_rate: 0.0010
Epoch 5/200
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8105 - auc: 0.7685 - lo

In [None]:
y_proba = ann.predict(X_test).ravel()
y_pred = (y_proba >= 0.5).astype(int)

print("\nMatriz de confusión:\n", confusion_matrix(y_test, y_pred))
print("\nReporte de clasificación:\n", classification_report(y_test, y_pred, digits=4))
print("AUC:", roc_auc_score(y_test, y_proba))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

Matriz de confusión:
 [[1525   68]
 [ 211  196]]

Reporte de clasificación:
               precision    recall  f1-score   support

           0     0.8785    0.9573    0.9162      1593
           1     0.7424    0.4816    0.5842       407

    accuracy                         0.8605      2000
   macro avg     0.8104    0.7194    0.7502      2000
weighted avg     0.8508    0.8605    0.8486      2000

AUC: 0.8523562082884117


In [None]:
sample_raw = pd.DataFrame([{
    "CreditScore": 600,
    "Geography": "France",
    "Gender": "Male",
    "Age": 40,
    "Tenure": 3,
    "Balance": 60000,
    "NumOfProducts": 2,
    "HasCrCard": 1,
    "IsActiveMember": 1,
    "EstimatedSalary": 50000
}])
sample_raw.loc[:, "Gender"] = le_gender.transform(sample_raw["Gender"])
sample_ohe = ct.transform(sample_raw)
sample_scaled = sc.transform(sample_ohe).astype("float32")

proba = ann.predict(sample_scaled).item()
print(f"\nProbabilidad de churn para el cliente ejemplo: {proba:.4f}")
print("Salir del banco" if proba >= 0.5 else "Permanecer en el banco")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step

Probabilidad de churn para el cliente ejemplo: 0.0488
Permanecer en el banco
