In [10]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
import mlflow
import dagshub
from dagshub import dagshub_logger
import keras_tuner
from keras_tuner import BayesianOptimization
from tensorflow import keras
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
import json

In [2]:
DF_PATH = "../data/dataframes/telecom_churn_scaled.csv"
df = pd.read_csv(DF_PATH)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3333 entries, 0 to 3332
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   AccountWeeks         3333 non-null   float64
 1   DataUsage            3333 non-null   float64
 2   CustServCalls        3333 non-null   float64
 3   DayMins              3333 non-null   float64
 4   DayCalls             3333 non-null   float64
 5   MonthlyCharge        3333 non-null   float64
 6   OverageFee           3333 non-null   float64
 7   RoamMins             3333 non-null   float64
 8   AvgMinPerCall        3333 non-null   float64
 9   AvgDataUsagePerWeek  3333 non-null   float64
 10  RoamMinsRatio        3333 non-null   float64
 11  ComplaintIndex       3333 non-null   float64
 12  Churn                3333 non-null   int64  
 13  ContractRenewal      3333 non-null   int64  
 14  DataPlan             3333 non-null   int64  
dtypes: float64(12), int64(3)
memory usage:

In [3]:
X = df.drop(columns=['Churn'])
y = df['Churn']

X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.15, shuffle=True, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=(15/85), shuffle=True, stratify=y_temp, random_state=42)

In [4]:
# Defining MLP model
def build_model(hp):
    # Input layer
    input_layer = keras.layers.Input(shape=(14,), dtype=float)
    x = input_layer

    # Dense layers
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])
    dropout_rate = hp.Float('dropout_rate', 0.1, 0.5, step=0.1)
    batch_norm = hp.Boolean('batch_norm')

    # Dense Layers 
    x = keras.layers.Dense(units=256, activation=activation)(x)
    if batch_norm:
        x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(dropout_rate)(x)

    x = keras.layers.Dense(units=128, activation=activation)(x)
    if batch_norm:
        x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(dropout_rate)(x)

    x = keras.layers.Dense(units=64, activation=activation)(x)
    if batch_norm:
        x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(dropout_rate)(x)
    
    x = keras.layers.Dense(units=32, activation=activation)(x)
    if batch_norm:
        x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(dropout_rate)(x)

    # Output Layer
    output_layer = keras.layers.Dense(1, activation='sigmoid')(x)

    # Defyning the optimizer
    optimizer = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd'])

    if optimizer == 'adam':
        optimizer = keras.optimizers.Adam(
            learning_rate=hp.Float('adam_lr', 1e-5, 1e-2, sampling='log')
    )
    elif optimizer == 'rmsprop':
        optimizer = keras.optimizers.RMSprop(
            learning_rate=hp.Float('rmsprop_lr', 1e-5, 1e-2, sampling='log'),
            rho=hp.Float('rmsprop_rho', 0.8, 0.99)
    )
    elif optimizer == 'sgd':
        optimizer = keras.optimizers.SGD(
            learning_rate=hp.Float('sgd_lr', 1e-4, 1e-1, sampling='log'),
            momentum=hp.Float('sgd_momentum', 0.0, 0.99)
    )
        
    # Instantiating the model
    model = keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=[
            keras.metrics.AUC(name='auc_pr', curve='PR'), # best-practice for a rare positive class
            keras.metrics.Precision(name='precision'),
            keras.metrics.Recall(name='recall')
        ]
    )

    return model  

In [5]:
base_callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5, min_lr=1e-5)
]

In [None]:
# Instantiating keras tuner
tuner = BayesianOptimization(
    hypermodel= build_model,
    objective= keras_tuner.Objective('val_auc_pr', direction='max'), # Same objective as the model itself
    max_trials=10,
    seed=42,
    executions_per_trial=1,
    directory='tuner_trials',
    project_name='mlp_churn_prediction',
    overwrite=True
)

# Defining class weights to prevent unbalanced predictions
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

# Starting the optimization
tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size = 64,
    callbacks = base_callbacks,
    class_weight=class_weight_dict
)

Trial 10 Complete [00h 00m 17s]
val_auc_pr: 0.42211705446243286

Best val_auc_pr So Far: 0.6892575025558472
Total elapsed time: 00h 03m 34s


In [7]:
best_model = tuner.get_best_models(num_models=1)[0]
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

for param, value in best_hps.values.items():
    print(f"{param} : {value}")

# Saving the model
model_path = "../models/mlp/mlp_param2.keras"
best_model.save(model_path)

activation : relu
dropout_rate : 0.1
batch_norm : True
optimizer : sgd
adam_lr : 0.005361764238699136
rmsprop_lr : 9.359659238480389e-05
rmsprop_rho : 0.813951064412443
sgd_lr : 0.0006909496810943752
sgd_momentum : 0.8511311607399468


  saveable.load_own_variables(weights_store.get(inner_path))


In [11]:
# Evaluating the model
results = best_model.evaluate(X_test, y_test)
print("Evaluation results:", results)

y_probs = best_model.predict(X_test)
y_pred = (y_probs > 0.5).astype(int) # Label assignement for a binary task

y_true = np.array(y_test)

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - auc_pr: 0.6654 - loss: 0.3691 - precision: 0.5155 - recall: 0.6944
Evaluation results: [0.3691120743751526, 0.6653565168380737, 0.5154638886451721, 0.6944444179534912]
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [12]:
report = classification_report(y_true=y_true, y_pred=y_pred, output_dict=True, zero_division=0)
print("Classification Report", json.dumps(report, indent=4))

Classification Report {
    "0": {
        "precision": 0.9454094292803971,
        "recall": 0.8901869158878505,
        "f1-score": 0.9169675090252708,
        "support": 428.0
    },
    "1": {
        "precision": 0.5154639175257731,
        "recall": 0.6944444444444444,
        "f1-score": 0.591715976331361,
        "support": 72.0
    },
    "accuracy": 0.862,
    "macro avg": {
        "precision": 0.7304366734030852,
        "recall": 0.7923156801661475,
        "f1-score": 0.7543417426783159,
        "support": 500.0
    },
    "weighted avg": {
        "precision": 0.8834972755877313,
        "recall": 0.862,
        "f1-score": 0.8701312883173478,
        "support": 500.0
    }
}
