In [46]:
import tensorflow as tf
from tensorflow.keras import mixed_precision

# ---- GPU memory config (silent) ----
gpus = tf.config.list_physical_devices("GPU")
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

# ---- Mixed precision (Apple Silicon) ----
mixed_precision.set_global_policy("mixed_float16")


In [47]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import pickle

In [58]:
df = pd.read_csv('data/Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [49]:
for col in df.columns:
    print(f"{col}: {df[col].nunique()} unique values")

RowNumber: 10000 unique values
CustomerId: 10000 unique values
Surname: 2932 unique values
CreditScore: 460 unique values
Geography: 3 unique values
Gender: 2 unique values
Age: 70 unique values
Tenure: 11 unique values
Balance: 6382 unique values
NumOfProducts: 4 unique values
HasCrCard: 2 unique values
IsActiveMember: 2 unique values
EstimatedSalary: 9999 unique values
Exited: 2 unique values


In [50]:
df.isna().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [51]:
# preprocessing 
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1) # dropping unnecessary columns


In [52]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [53]:
# convert categorical columns to numerical
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])


In [54]:
One_hot_Encoder = OneHotEncoder()
geo_encoded = One_hot_Encoder.fit_transform(df[['Geography']]).toarray()
geo_df = pd.DataFrame(data =geo_encoded, columns=One_hot_Encoder.get_feature_names_out(['Geography']))
geo_df.head()

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0


In [55]:
# drop geography column and concatenate one hot encoded columns
df = df.drop('Geography', axis=1)
df = pd.concat([df, geo_df], axis=1)
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [56]:
# split the dataset 
X = df.drop('Exited', axis=1)
y = df['Exited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)   

In [None]:
# save scaler, label encoder, one hot encoder
with open('../models/OneHotEncoder_geo.pkl', 'wb') as f:
    pickle.dump(OneHotEncoder_geo, f)
with open('../models/LabelEncoder_gender.pkl', 'wb') as f:
    pickle.dump(LabelEncoder_gender, f)
with open('../models/StandardScaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# ANN Implementation 

In [14]:
import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from datetime import datetime
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy


In [15]:
def build_model(hp):
    model = Sequential()

    # Limit depth (important for speed)
    num_layers = hp.Int('num_layers', 1, 3)

    for i in range(num_layers):
        model.add(
            Dense(
                units=hp.Choice(
                    f'units_{i}', [32, 64]
                ),
                activation='relu',
                input_shape=(X_train.shape[1],) if i == 0 else None
            )
        )

        # Dropout AFTER each hidden layer
        model.add(
            Dropout(
                hp.Choice(
                    f'dropout_{i}', [0.0, 0.2, 0.3]
                )
            )
        )

    # Output layer
    model.add(Dense(1, activation='sigmoid'))

    model.compile(
        optimizer=Adam(
            learning_rate=hp.Choice(
                'learning_rate', [1e-3, 2e-3]
            )
        ),
        loss=BinaryCrossentropy(),
        metrics=['accuracy']
    )

    return model


In [16]:
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=50,        # reduced
    factor=3,
    directory='kt_logs',
    project_name='churn_modeling'
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-12-23 21:19:51.865720: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2025-12-23 21:19:51.865742: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-12-23 21:19:51.865746: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-12-23 21:19:51.865762: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-12-23 21:19:51.865771: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [17]:
EarlyStopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
TensorBoard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [18]:
tuner.search(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,          # FIXED batch size
    callbacks=[EarlyStopping_callback],
    verbose=1
)

Trial 90 Complete [00h 00m 14s]
val_loss: 0.41854190826416016

Best val_loss So Far: 0.4146338403224945
Total elapsed time: 00h 13m 20s


In [19]:
best_hp = tuner.get_best_hyperparameters(1)[0]

print("Best configuration:")
print("Layers:", best_hp.get('num_layers'))

for i in range(best_hp.get('num_layers')):
    print(
        f"Layer {i+1}: "
        f"units={best_hp.get(f'units_{i}')}, "
        f"dropout={best_hp.get(f'dropout_{i}')}"
    )

print("Learning rate:", best_hp.get('learning_rate'))


Best configuration:
Layers: 1
Layer 1: units=32, dropout=0.0
Learning rate: 0.001


In [20]:
final_model = tuner.get_best_models(1)[0]

history = final_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        )
    ]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8111 - loss: 0.4338 - val_accuracy: 0.8145 - val_loss: 0.4155
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8104 - loss: 0.4335 - val_accuracy: 0.8120 - val_loss: 0.4159
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8094 - loss: 0.4336 - val_accuracy: 0.8150 - val_loss: 0.4181
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8080 - loss: 0.4347 - val_accuracy: 0.8130 - val_loss: 0.4155
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8083 - loss: 0.4342 - val_accuracy: 0.8120 - val_loss: 0.4147
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8106 - loss: 0.4337 - val_accuracy: 0.8115 - val_loss: 0.4183
Epoch 7/100
[1m250/25

In [None]:
# save model 
final_model.save('../models/churn_classification_model.h5')



In [None]:
# load tensorboard
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit