# Import

In [1]:
import keras
import tensorflow as tf
print("Keras Current Version:", keras.__version__, "Tensorflow Current Version:", tf.__version__)

Keras Current Version: 3.8.0 Tensorflow Current Version: 2.18.0


In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import random
from joblib import dump, load

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import ReLU, LeakyReLU, PReLU
from keras_tuner import RandomSearch


random.seed(46)
np.random.seed(46)
tf.random.set_seed(46)

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

# Functions

In [3]:
def grab_col_names(dataframe, cat_th=10, car_th=20):
    """
    Identifies categorical, numerical, and high-cardinality categorical variables in the dataset.

    Parameters:
    dataframe : pd.DataFrame
        The dataframe to analyze.
    cat_th : int, optional (default=10)
        Threshold for a numeric column to be considered categorical.
    car_th : int, optional (default=20)
        Threshold for a categorical column to be considered high-cardinality.

    Returns:
    cat_cols : list
        List of categorical variables.
    num_cols : list
        List of numerical variables.
    cat_but_car : list
        List of categorical variables with high cardinality.
    """
    # Identify categorical columns (object type)
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == "O"]

    # Identify numerical columns that should be considered categorical
    num_but_cat = [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and dataframe[col].dtypes != "O"]

    # Identify high-cardinality categorical columns
    cat_but_car = [col for col in dataframe.columns if dataframe[col].nunique() > car_th and dataframe[col].dtypes == "O"]

    # Combine categorical and numerical-but-categorical columns
    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]

    # Identify numerical columns
    num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != "O"]
    num_cols = [col for col in num_cols if col not in num_but_cat]

    return cat_cols, num_cols, cat_but_car

def prepare_datasets(X_train, X_val, y_train, y_val, batch_size=None):
    """
    Converts training and validation datasets into TensorFlow Dataset format.

    Parameters:
    X_train, X_val : np.array or pd.DataFrame
        Feature matrices for training and validation.
    y_train, y_val : np.array or pd.Series
        Labels for training and validation.
    batch_size : int, optional
        Batch size (default: entire dataset as one batch).

    Returns:
    train_dataset : tf.data.Dataset
        Training dataset.
    val_dataset : tf.data.Dataset
        Validation dataset.
    """
    if batch_size is None:
        batch_size = len(X_train)

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(X_train)).batch(batch_size)
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size)

    return train_dataset, val_dataset

def plot_training_history(history, train_loss='loss', train_metric='accuracy', val_loss='val_loss', val_metric='val_accuracy'):
    """
    Plots training and validation loss and metrics over epochs.

    Parameters:
    history : keras.callbacks.History
        Model training history.
    train_loss : str, optional
        Name of the training loss metric.
    train_metric : str, optional
        Name of the training accuracy metric.
    val_loss : str, optional
        Name of the validation loss metric.
    val_metric : str, optional
        Name of the validation accuracy metric.
    """
    # Plot training and validation loss
    plt.figure(figsize=(10, 5))
    plt.plot(history.history[train_loss], label='Training Loss')
    plt.plot(history.history[val_loss], label='Validation Loss')
    plt.title('Training and Validation Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    # Plot training and validation accuracy
    plt.figure(figsize=(10, 5))
    plt.plot(history.history[train_metric], label=f"Training {train_metric}")
    plt.plot(history.history[val_metric], label=f"Validation {val_metric}")
    plt.title(f'Training and Validation {train_metric} Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel(f'{train_metric}')
    plt.legend()
    plt.show()

def get_best_epoch_details(history, metric="val_loss", mode=min):
    """
    Finds the best epoch based on a given metric and returns its details.

    Parameters:
    history : keras.callbacks.History
        Model training history.
    metric : str, optional
        Metric to evaluate (default: validation loss).
    mode : function, optional
        Function to determine the best epoch (min/max).

    Returns:
    pd.DataFrame
        Dataframe containing the best epoch's metric values.
    """
    metric_values = history.history[metric]
    best_epoch_index = metric_values.index(mode(metric_values))
    best_epoch = best_epoch_index + 1

    # Extract all metric values for the best epoch
    metrics = []
    values = []
    for key, value in history.history.items():
        metrics.append(key)
        values.append(value[best_epoch_index])

    df = pd.DataFrame({'Metric': metrics, 'Value': values})
    df['Value'] = df['Value'].map('{:.4f}'.format)

    # Append the best epoch number
    best_epoch_data = pd.DataFrame({'Metric': ['best_epoch'], 'Value': [str(best_epoch)]})
    df = pd.concat([df, best_epoch_data], ignore_index=True)

    return df

def print_hyperparameters(hyperparameters):
    """
    Displays model hyperparameters in a formatted table.

    Parameters:
    hyperparameters : dict
        Dictionary containing model hyperparameters.
    """
    hp_df = pd.DataFrame(list(hyperparameters.items()), columns=['Hyperparameter', 'Value'])
    print(hp_df)


# Feature Engineering

In [4]:
def dataproprocessing(dataframe):
    cat_cols, num_cols, cat_but_car = grab_col_names(dataframe)

    dataframe["TotalCharges"] = pd.to_numeric(dataframe["TotalCharges"], errors='coerce')

    dataframe["TotalCharges"] = dataframe["TotalCharges"].fillna(dataframe["TotalCharges"].median())

    # feature engineering
    dataframe.loc[(dataframe["tenure"] >= 0) & (dataframe["tenure"] <= 12), "NEW_TENURE_YEAR"] = "0-1 Year"
    dataframe.loc[(dataframe["tenure"] > 12) & (dataframe["tenure"] <= 24), "NEW_TENURE_YEAR"] = "1-2 Year"
    dataframe.loc[(dataframe["tenure"] > 24) & (dataframe["tenure"] <= 36), "NEW_TENURE_YEAR"] = "2-3 Year"
    dataframe.loc[(dataframe["tenure"] > 36) & (dataframe["tenure"] <= 48), "NEW_TENURE_YEAR"] = "3-4 Year"
    dataframe.loc[(dataframe["tenure"] > 48) & (dataframe["tenure"] <= 60), "NEW_TENURE_YEAR"] = "4-5 Year"
    dataframe.loc[(dataframe["tenure"] > 60) & (dataframe["tenure"] <= 72), "NEW_TENURE_YEAR"] = "5-6 Year"

    dataframe["NEW_Engaged"] = dataframe["Contract"].apply(lambda x: 1 if x in ["One year", "Two year"] else 0)

    dataframe["NEW_noProt"] = dataframe.apply(lambda x: 1 if (x["OnlineBackup"] != "Yes") or (x["DeviceProtection"] != "Yes") or (
                x["TechSupport"] != "Yes") else 0, axis=1)

    dataframe["NEW_Young_Not_Engaged"] = dataframe.apply(lambda x: 1 if (x["NEW_Engaged"] == 0) and (x["SeniorCitizen"] == 0) else 0,
                                          axis=1)

    dataframe['NEW_TotalServices'] = (dataframe[['PhoneService', 'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection', 'TechSupport',
                                  'StreamingTV', 'StreamingMovies']] == 'Yes').sum(axis=1)

    dataframe["NEW_FLAG_ANY_STREAMING"] = dataframe.apply(
        lambda x: 1 if (x["StreamingTV"] == "Yes") or (x["StreamingMovies"] == "Yes") else 0, axis=1)

    dataframe["NEW_FLAG_AutoPayment"] = dataframe["PaymentMethod"].apply(
        lambda x: 1 if x in ["Bank transfer (automatic)", "Credit card (automatic)"] else 0)

    dataframe["NEW_AVG_Charges"] = dataframe["TotalCharges"] / (dataframe["tenure"] + 1)

    dataframe["NEW_Increase"] = dataframe["NEW_AVG_Charges"] / dataframe["MonthlyCharges"]

    dataframe["NEW_AVG_Service_Fee"] = dataframe["MonthlyCharges"] / (dataframe['NEW_TotalServices'] + 1)

    cat_cols, num_cols, cat_but_car = grab_col_names(dataframe)

    cat_cols.remove("Churn")

    dataframe = pd.get_dummies(dataframe, columns=cat_cols, drop_first=True, dtype=int)

    scaler = MinMaxScaler()

    dataframe[num_cols] = scaler.fit_transform(dataframe[num_cols])

    dump(scaler, 'scaler.joblib')

    dataframe.columns = [col.replace(' ', '_').upper() for col in dataframe.columns]

    y = dataframe["CHURN"]
    X = dataframe.drop(["CHURN", "CUSTOMERID"], axis=1)

    return X, y

# Data Preprocessing


In [5]:
df = pd.read_csv("churn.csv")

In [6]:
df["Churn"].value_counts() * 100 / len(df)

Churn
0   73.46301
1   26.53699
Name: count, dtype: float64

In [7]:
X, y = dataproprocessing(df)

In [8]:
X.head()

Unnamed: 0,TENURE,MONTHLYCHARGES,TOTALCHARGES,NEW_AVG_CHARGES,NEW_INCREASE,NEW_AVG_SERVICE_FEE,GENDER_MALE,PARTNER_YES,DEPENDENTS_YES,PHONESERVICE_YES,MULTIPLELINES_NO_PHONE_SERVICE,MULTIPLELINES_YES,INTERNETSERVICE_FIBER_OPTIC,INTERNETSERVICE_NO,ONLINESECURITY_NO_INTERNET_SERVICE,ONLINESECURITY_YES,ONLINEBACKUP_NO_INTERNET_SERVICE,ONLINEBACKUP_YES,DEVICEPROTECTION_NO_INTERNET_SERVICE,DEVICEPROTECTION_YES,TECHSUPPORT_NO_INTERNET_SERVICE,TECHSUPPORT_YES,STREAMINGTV_NO_INTERNET_SERVICE,STREAMINGTV_YES,STREAMINGMOVIES_NO_INTERNET_SERVICE,STREAMINGMOVIES_YES,CONTRACT_ONE_YEAR,CONTRACT_TWO_YEAR,PAPERLESSBILLING_YES,PAYMENTMETHOD_CREDIT_CARD_(AUTOMATIC),PAYMENTMETHOD_ELECTRONIC_CHECK,PAYMENTMETHOD_MAILED_CHECK,NEW_TENURE_YEAR_1-2_YEAR,NEW_TENURE_YEAR_2-3_YEAR,NEW_TENURE_YEAR_3-4_YEAR,NEW_TENURE_YEAR_4-5_YEAR,NEW_TENURE_YEAR_5-6_YEAR,SENIORCITIZEN_1,NEW_ENGAGED_1,NEW_NOPROT_1,NEW_YOUNG_NOT_ENGAGED_1,NEW_TOTALSERVICES_1,NEW_TOTALSERVICES_2,NEW_TOTALSERVICES_3,NEW_TOTALSERVICES_4,NEW_TOTALSERVICES_5,NEW_TOTALSERVICES_6,NEW_TOTALSERVICES_7,NEW_FLAG_ANY_STREAMING_1,NEW_FLAG_AUTOPAYMENT_1
0,0.01389,0.11542,0.00128,0.00414,0.00041,0.2071,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0
1,0.47222,0.38507,0.21587,0.03227,0.00677,0.18441,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0
2,0.02778,0.35423,0.01031,0.01935,0.00282,0.15883,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0
3,0.625,0.2393,0.21024,0.02221,0.00674,0.06353,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1
4,0.02778,0.52189,0.01533,0.0298,0.00346,0.88119,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0


In [9]:
X.shape

(7043, 50)

# Train Val Split

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

train_ds, val_ds = prepare_datasets(X_train, X_val, y_train, y_val, batch_size=32)

# Base Model with Binary Log Loss


In [11]:
"""
### Neural Network Model Training

This section defines and trains a neural network model using TensorFlow and Keras. The model is designed for binary classification, utilizing a fully connected architecture with regularization techniques to enhance generalization. Early stopping is employed to prevent overfitting by monitoring validation loss and restoring the best model weights.
"""

# Define the base neural network model
base_model = Sequential([
    Input(shape=(train_ds.element_spec[0].shape[1],)),  # Input layer with dynamic shape
    Dense(50, activation='relu', kernel_regularizer=l2(0.001)),  # Hidden layer with L2 regularization
    BatchNormalization(),  # Normalization to stabilize training
    Dropout(0.5),  # Dropout to prevent overfitting
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Define the optimizer with a specific learning rate
optimizer = Adam(learning_rate=0.001)

# Compile the model with loss function and performance metrics
base_model.compile(optimizer=optimizer,
                   loss="binary_crossentropy",
                   metrics=["accuracy", "precision", "recall", "auc"])

# Define early stopping criteria to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss',  # Monitor validation loss
                               patience=20,  # Stop if no improvement for 20 epochs
                               verbose=1,  # Print messages when stopping
                               restore_best_weights=True)  # Restore best weights

# Train the model using the training dataset
base_model_history = base_model.fit(train_ds,
                                    epochs=1000,  # Maximum number of epochs
                                    validation_data=val_ds,  # Validation dataset
                                    verbose=1,  # Display training progress
                                    callbacks=early_stopping)  # Apply early stopping


Epoch 1/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5781 - auc: 0.5995 - loss: 0.8961 - precision: 0.3324 - recall: 0.5478 - val_accuracy: 0.7743 - val_auc: 0.8271 - val_loss: 0.5062 - val_precision: 0.6455 - val_recall: 0.3271
Epoch 2/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7369 - auc: 0.7555 - loss: 0.5829 - precision: 0.5106 - recall: 0.5446 - val_accuracy: 0.7977 - val_auc: 0.8448 - val_loss: 0.4606 - val_precision: 0.6594 - val_recall: 0.4879
Epoch 3/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7596 - auc: 0.7900 - loss: 0.5268 - precision: 0.5541 - recall: 0.5029 - val_accuracy: 0.8027 - val_auc: 0.8490 - val_loss: 0.4531 - val_precision: 0.6444 - val_recall: 0.5684
Epoch 4/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7806 - auc: 0.8068 - loss: 0.5065 - precision: 0.6175 - recall: 

In [12]:
get_best_epoch_details(base_model_history, metric="val_loss", mode=min)

Unnamed: 0,Metric,Value
0,accuracy,0.7993
1,auc,0.8493
2,loss,0.424
3,precision,0.6717
4,recall,0.4773
5,val_accuracy,0.8133
6,val_auc,0.8536
7,val_loss,0.4212
8,val_precision,0.6719
9,val_recall,0.5764


In [13]:
val_loss, val_accuracy, val_precision, val_recall, val_auc = base_model.evaluate(val_ds, verbose=0)
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation AUC: {val_auc}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {f1_score}")

Validation Loss: 0.4211616814136505
Validation Accuracy: 0.813342809677124
Validation AUC: 0.8536428213119507
Validation Precision: 0.671875
Validation Recall: 0.5764074921607971
Validation F1-Score: 0.6204906120651559


# Weighted Cross-Entropy Loss


This is used to indicate that the classes are imbalanced.

In [14]:
len(df[df['Churn'] == 1])

1869

In [15]:
len(df[df['Churn'] == 0])

5174

In [16]:
class_weight_for_0 = 1.0 / len(df[df['Churn'] == 0])

class_weight_for_1 = 1.0 / len(df[df['Churn'] == 1])

We prevent the weights from being biased.  

In [17]:
class_weight_for_0

0.00019327406262079628

In [18]:
class_weights = {0: class_weight_for_0, 1: class_weight_for_1}

In [19]:
### Define the neural network model
base_model = Sequential([
    Input(shape=(train_ds.element_spec[0].shape[1],)),  # Input layer with dynamic shape
    Dense(50, activation='relu', kernel_regularizer=l2(0.001)),  # Hidden layer with L2 regularization to prevent overfitting
    BatchNormalization(),  # Normalization layer to stabilize training
    Dropout(0.5),  # Dropout to reduce overfitting
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Define the optimizer with a specified learning rate
optimizer = Adam(learning_rate=0.001)

# Compile the model with loss function and evaluation metrics
base_model.compile(optimizer=optimizer,
                   loss="binary_crossentropy",  # Loss function for binary classification
                   metrics=["accuracy", "precision", "recall", "auc"])  # Performance metrics

# Define early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss',  # Monitor validation loss
                               patience=20,  # Stop training if no improvement for 20 epochs
                               verbose=1,  # Print messages when early stopping is triggered
                               restore_best_weights=True,  # Restore the best model weights
                               mode='min')  # Ensure the monitored value is minimized

# Train the model with class weighting to handle imbalance
base_model_history = base_model.fit(train_ds,
                                    epochs=1000,  # Maximum number of epochs
                                    validation_data=val_ds,  # Validation dataset
                                    verbose=1,  # Display training progress
                                    callbacks=early_stopping,  # Apply early stopping
                                    class_weight=class_weights)  # Adjust training for imbalanced classes


Epoch 1/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5825 - auc: 0.6135 - loss: 0.0318 - precision: 0.3287 - recall: 0.6073 - val_accuracy: 0.6977 - val_auc: 0.8149 - val_loss: 0.6492 - val_precision: 0.4630 - val_recall: 0.8901
Epoch 2/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6802 - auc: 0.7701 - loss: 0.0024 - precision: 0.4460 - recall: 0.7639 - val_accuracy: 0.7779 - val_auc: 0.8475 - val_loss: 0.6011 - val_precision: 0.5595 - val_recall: 0.7560
Epoch 3/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7173 - auc: 0.8097 - loss: 2.3848e-04 - precision: 0.4779 - recall: 0.7849 - val_accuracy: 0.7594 - val_auc: 0.8513 - val_loss: 0.5219 - val_precision: 0.5305 - val_recall: 0.7936
Epoch 4/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7143 - auc: 0.8029 - loss: 1.6553e-04 - precision: 0.4736 - 

In [20]:
get_best_epoch_details(base_model_history, metric="val_loss", mode=min)

Unnamed: 0,Metric,Value
0,accuracy,0.7238
1,auc,0.805
2,loss,0.0002
3,precision,0.4874
4,recall,0.7781
5,val_accuracy,0.7821
6,val_auc,0.8512
7,val_loss,0.4394
8,val_precision,0.566
9,val_recall,0.7587


In [21]:
val_loss, val_accuracy, val_precision, val_recall, val_auc = base_model.evaluate(val_ds, verbose=0)
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation AUC: {val_auc}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {f1_score}")

Validation Loss: 0.4394136965274811
Validation Accuracy: 0.7821149826049805
Validation AUC: 0.8512362241744995
Validation Precision: 0.5659999847412109
Validation Recall: 0.7587131261825562
Validation F1-Score: 0.6483390468489076


# Weighted Cross-Entropy Loss and Monitoring With AUC

In [22]:
class_weights = {0: class_weight_for_0, 1: class_weight_for_1}

base_model = Sequential([
    Input(shape=(train_ds.element_spec[0].shape[1],)),
    Dense(50, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')])

optimizer = Adam(learning_rate=0.001)

base_model.compile(optimizer=optimizer,
                   loss="binary_crossentropy",
                   metrics=["accuracy", "precision", "recall", "auc"])

early_stopping = EarlyStopping(monitor='val_auc', #*******
                               patience=20,
                               verbose=1,
                               restore_best_weights=True,
                               mode='max') #*******

base_model_history = base_model.fit(train_ds,
                                    epochs=1000,
                                    validation_data=val_ds,
                                    verbose=1,
                                    callbacks=early_stopping,
                                    class_weight=class_weights)

Epoch 1/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5057 - auc: 0.5211 - loss: 0.0312 - precision: 0.2775 - recall: 0.5288 - val_accuracy: 0.6636 - val_auc: 0.7965 - val_loss: 0.6646 - val_precision: 0.4302 - val_recall: 0.8338
Epoch 2/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6461 - auc: 0.7348 - loss: 0.0024 - precision: 0.4076 - recall: 0.7327 - val_accuracy: 0.7928 - val_auc: 0.8439 - val_loss: 0.5916 - val_precision: 0.5940 - val_recall: 0.6863
Epoch 3/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7267 - auc: 0.8105 - loss: 2.3716e-04 - precision: 0.4896 - recall: 0.7820 - val_accuracy: 0.7999 - val_auc: 0.8503 - val_loss: 0.5287 - val_precision: 0.6022 - val_recall: 0.7185
Epoch 4/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7144 - auc: 0.8017 - loss: 1.6788e-04 - precision: 0.4821 - 

In [23]:
get_best_epoch_details(base_model_history, metric="val_auc", mode=max)

Unnamed: 0,Metric,Value
0,accuracy,0.7226
1,auc,0.8109
2,loss,0.0002
3,precision,0.4859
4,recall,0.7734
5,val_accuracy,0.7899
6,val_auc,0.8533
7,val_loss,0.4262
8,val_precision,0.5842
9,val_recall,0.7158


In [24]:
val_loss, val_accuracy, val_precision, val_recall, val_auc = base_model.evaluate(val_ds, verbose=0)
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation AUC: {val_auc}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {f1_score}")

Validation Loss: 0.42617955803871155
Validation Accuracy: 0.7899219393730164
Validation AUC: 0.8533284068107605
Validation Precision: 0.5842450857162476
Validation Recall: 0.7158176898956299
Validation F1-Score: 0.643373497704287


# Hyperparameter Optimization


## Search Space

In [25]:
def build_model(hp):
    # Initialize a sequential model
    model = Sequential()
    model.add(Input(shape=(train_ds.element_spec[0].shape[1],)))

    # Hidden layers with advanced activation functions, L2 regularization, and Dropout
    for i in range(hp.Int('num_layers', 1, 10)):
        # Add a Dense layer with tunable number of units and L2 regularization
        model.add(Dense(
            units=hp.Int('units_' + str(i + 1), min_value=32, max_value=512, step=16),
            kernel_regularizer=l2(hp.Float('l2_' + str(i + 1), min_value=0.0001, max_value=0.01, sampling='log'))
        ))

        # Choose an activation function dynamically
        activation_choice = hp.Choice('activation_' + str(i + 1), values=['relu', 'leaky_relu', 'prelu'])

        if activation_choice == 'relu':
            model.add(ReLU())  # ReLU activation function
        elif activation_choice == 'leaky_relu':
            model.add(LeakyReLU(negative_slope=0.01))  # Leaky ReLU to prevent dying neurons
        elif activation_choice == 'prelu':
            model.add(PReLU())  # Parametric ReLU for adaptive activation
        else:
            model.add(Activation(activation_choice))

        # Add Batch Normalization for stabilizing training
        model.add(BatchNormalization())

        # Apply Dropout for regularization
        model.add(Dropout(hp.Float('dropout_' + str(i + 1), min_value=0.0, max_value=0.5, step=0.1)))

    # Output layer with sigmoid activation for binary classification
    model.add(Dense(1, activation='sigmoid'))

    # Configure the optimizer (Adam) with tunable beta parameters
    optimizer = Adam(
        beta_1=hp.Float('beta1', min_value=0.85, max_value=0.99, step=0.01),
        beta_2=hp.Float('beta2', min_value=0.995, max_value=0.999, step=0.001)
    )

    # Compile the model with binary cross-entropy loss and key evaluation metrics
    model.compile(optimizer=optimizer,
                  loss="binary_crossentropy",
                  metrics=["accuracy", "precision", "recall", "auc"])

    return model


## Random Search

In [26]:
class_weights = {0: class_weight_for_0, 1: class_weight_for_1}

random_search_tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=30,
    executions_per_trial=1,
    overwrite=True)

early_stopping = EarlyStopping(
    monitor='val_auc',
    patience=10,
    verbose=1,
    restore_best_weights=True,
    mode='max')

model_checkpoint = ModelCheckpoint(
    'final_tuned_model.keras',
    monitor='val_auc',
    verbose=0,
    save_best_only=True)

random_search_tuner.search(train_ds,
                           epochs=250,

                           validation_data=val_ds,

                           callbacks=[early_stopping, model_checkpoint],

                           class_weight=class_weights)

Trial 30 Complete [00h 00m 15s]
val_loss: 0.4386385679244995

Best val_loss So Far: 0.41866570711135864
Total elapsed time: 00h 13m 01s


In [27]:
best_hps = random_search_tuner.get_best_hyperparameters(num_trials=1)[0]

print_hyperparameters(best_hps.values)

   Hyperparameter       Value
0      num_layers           2
1         units_1         288
2            l2_1     0.00055
3    activation_1       prelu
4       dropout_1     0.10000
5           beta1     0.94000
6           beta2     0.99700
7         units_2         480
8            l2_2     0.00034
9    activation_2       prelu
10      dropout_2     0.40000
11        units_3          64
12           l2_3     0.00018
13   activation_3       prelu
14      dropout_3     0.00000
15        units_4         128
16           l2_4     0.00570
17   activation_4        relu
18      dropout_4     0.10000
19        units_5         240
20           l2_5     0.00040
21   activation_5       prelu
22      dropout_5     0.40000
23        units_6         512
24           l2_6     0.00019
25   activation_6        relu
26      dropout_6     0.40000
27        units_7         464
28           l2_7     0.00011
29   activation_7       prelu
30      dropout_7     0.10000
31        units_8         400
32        

In [28]:
dump(best_hps, 'best_hps.joblib')

['best_hps.joblib']

In [29]:
best_model = random_search_tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [30]:
best_model.summary()

In [31]:
val_loss, val_accuracy, val_precision, val_recall, val_auc = best_model.evaluate(val_ds, verbose=0)
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation AUC: {val_auc}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {f1_score}")

Validation Loss: 0.41866570711135864
Validation Accuracy: 0.7977288961410522
Validation AUC: 0.8529105186462402
Validation Precision: 0.6042653918266296
Validation Recall: 0.6836460828781128
Validation F1-Score: 0.6415094146680984


# Retrain for Entire Dataset

In [32]:
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(train_ds.element_spec[0].shape[1],)))

    # Hidden layers with advanced activation functions, l2, Dropout
    for i in range(hp.Int('num_layers', 1, 10)):
        # Add Dense layer
        model.add(Dense(
            units=hp.Int('units_' + str(i + 1), min_value=32, max_value=512, step=16),
            kernel_regularizer=l2(hp.Float('l2_' + str(i + 1), min_value=0.0001, max_value=0.01, sampling='log'))
        ))

        # Activation layer choice
        activation_choice = hp.Choice('activation_' + str(i + 1), values=['relu', 'leaky_relu', 'prelu'])

        if activation_choice == 'relu':
            model.add(ReLU())
        elif activation_choice == 'leaky_relu':
            model.add(LeakyReLU(negative_slope=0.01))
        elif activation_choice == 'prelu':
            model.add(PReLU())
        else:
            model.add(Activation(activation_choice))

        # Batch Normalization and Dropout
        model.add(BatchNormalization())
        model.add(Dropout(hp.Float('dropout_' + str(i + 1), min_value=0.0, max_value=0.5, step=0.1)))

    model.add(Dense(1, activation='sigmoid'))

    # Optimizer: Adam with tuning for beta1 and beta2
    optimizer = Adam(
        beta_1=hp.Float('beta1', min_value=0.85, max_value=0.99, step=0.01),
        beta_2=hp.Float('beta2', min_value=0.995, max_value=0.999, step=0.001)
    )

    model.compile(optimizer=optimizer,
                  loss="binary_crossentropy",
                  metrics=["accuracy", "precision", "recall", "auc"])

    return model

In [33]:
df = pd.read_csv("churn.csv")

In [34]:
X, y = dataproprocessing(df)

In [35]:
dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(buffer_size=len(X)).batch(len(X))

In [36]:
best_hps = load('best_hps.joblib')

final_tuned_model = build_model(best_hps)

In [37]:
early_stopping = EarlyStopping(
    monitor='loss',
    patience=5,
    verbose=1,
    restore_best_weights=True)

model_checkpoint = ModelCheckpoint(
    'final_tuned_all_data_model.keras',
    monitor='loss',
    verbose=0,
    save_best_only=True)

final_history = final_tuned_model.fit(dataset,
            epochs=100,
            verbose=1,
            callbacks=[early_stopping, model_checkpoint])

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5207 - auc: 0.5367 - loss: 1.0921 - precision: 0.2851 - recall: 0.5350
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step - accuracy: 0.6521 - auc: 0.7688 - loss: 0.9859 - precision: 0.4202 - recall: 0.8186
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step - accuracy: 0.6760 - auc: 0.7998 - loss: 1.0180 - precision: 0.4416 - recall: 0.8357
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - accuracy: 0.6973 - auc: 0.8165 - loss: 0.9553 - precision: 0.4617 - recall: 0.8486
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step - accuracy: 0.7000 - auc: 0.8123 - loss: 0.8857 - precision: 0.4632 - recall: 0.8208
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step - accuracy: 0.6986 - auc: 0.7977 - loss: 0.8392 - precision: 0.4605 

In [38]:
get_best_epoch_details(final_history, metric="loss", mode=min)

Unnamed: 0,Metric,Value
0,accuracy,0.8542
1,auc,0.9136
2,loss,0.4594
3,precision,0.742
4,recall,0.6907
5,best_epoch,99.0


In [39]:
final_tuned_model.save('final_tuned_all_data_model.keras')