# Telco Customer Churn Prediction

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive




**Problem**: Development of a machine learning model that can predict customers who are likely to churn is required.

The Telco customer churn data contains information about 7043 customers who were provided with home telephone and Internet services by a telecom company in California during a specific quarter of a given year.

**21 Variables, 7043 Observations**

Each row represents a unique customer.

Variables include information about the services customers are subscribed to, details about their accounts, contracts, etc.

**Churn**: Whether the customer has churned (Yes or No) - customers who left in the last month or quarter

**MonthlyCharges**: The amount billed to the customer on a monthly basis

**TotalCharges**: The total amount billed to the customer

**CustomerId**: Customer Id

**Gender**: Gender

**SeniorCitizen**: Whether the customer is a senior citizen (1, 0)

**Partner**: Whether the customer has a partner (Yes, No)

**Dependents**: Whether the customer has dependents (Yes, No) (Children, mother, father, grandmother)

**Tenure**: The number of months the customer has been with the company

**PhoneService**: Whether the customer has phone service (Yes, No)

**MultipleLines**: Whether the customer has multiple lines (Yes, No, No phone service)

**InternetService**: The type of internet service provider the customer has (DSL, Fiber optic, None)

**OnlineSecurity**: Whether the customer has online security (Yes, No, No internet service)

**OnlineBackup**: Whether the customer has online backup (Yes, No, No internet service)

**DeviceProtection**: Whether the customer has device protection (Yes, No, No internet service)

**TechSupport**: Whether the customer receives technical support (Yes, No, No internet service)

**StreamingTV**: Whether the customer has TV streaming (Yes, No, No internet service) This indicates if the customer uses their Internet service to stream television programs from a third-party provider

**StreamingMovies**: Whether the customer has movie streaming (Yes, No, No internet service) This indicates if the customer uses their Internet service to stream movies from a third-party provider

**Contract**: The customer's contract duration (Month-to-month, One year, Two years)

**PaperlessBilling**: Whether the customer has paperless billing (Yes, No)

**PaymentMethod**: The customer's payment method (Electronic check, Mailed check, Bank transfer (automatic), Credit card (automatic))

In [2]:
import keras
import tensorflow as tf
print("Keras Current Version:", keras.__version__, "Tensorflow Current Version:", tf.__version__)

Keras Current Version: 3.5.0 Tensorflow Current Version: 2.17.1


In [1]:
#!pip uninstall tf-keras
#!pip install keras-tuner
#!pip install tensorflow==2.16.1

# Imports

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import random
from joblib import dump, load

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Activation
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import SGD, RMSprop, Adam
from keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import ReLU, LeakyReLU, PReLU
from keras_tuner import RandomSearch
from keras_tuner.engine.hyperparameters import HyperParameters


random.seed(46)
np.random.seed(46)
tf.random.set_seed(46)

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)


# Functions


In [3]:
def grab_col_names(dataframe, cat_th=10, car_th=20):
    # cat_cols, cat_but_car
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == "O"]

    num_but_cat = [col for col in dataframe.columns if
                   dataframe[col].nunique() < cat_th and dataframe[col].dtypes != "O"]

    #cat_but_car
    cat_but_car = [col for col in dataframe.columns if
                   dataframe[col].nunique() > car_th and dataframe[col].dtypes == "O"]

    cat_cols = cat_cols + num_but_cat

    cat_cols = [col for col in cat_cols if col not in cat_but_car]

    # num_cols
    num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != "O"]

    num_cols = [col for col in num_cols if col not in num_but_cat]

    return cat_cols, num_cols, cat_but_car


def prepare_datasets(X_train, X_val, y_train, y_val, batch_size=None):
    if batch_size is None:
        batch_size = len(X_train)
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(batch_size)
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
    val_dataset = val_dataset.batch(batch_size)
    return train_dataset, val_dataset

def plot_training_history(history, train_loss='loss', train_metric='accuracy', val_loss='val_loss', val_metric='val_accuracy'):

    #Loss
    plt.figure(figsize=(10, 5))
    plt.plot(history.history[train_loss], label='Training Loss')
    plt.plot(history.history[val_loss], label='Validation Loss')
    plt.title('Training and Validation Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    # Metrics
    plt.figure(figsize=(10, 5))
    plt.plot(history.history[train_metric], label=f"Training: {train_metric}")
    plt.plot(history.history[val_metric], label=f"Validation: {val_metric}")
    plt.title(f'Training and Validation {train_metric} Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel(f'train_metric')
    plt.legend()
    plt.show()

def get_best_epoch_details(history, metric="val_loss", mode=min):
    metric_values = history.history[metric]
    min_metric_value_index = metric_values.index(mode(metric_values))
    best_epoch = min_metric_value_index + 1

    metrics = []
    values = []

    for key, value in history.history.items():
        metrics.append(key)
        values.append(value[min_metric_value_index])

    data = {'Metric': metrics, 'Value': values}
    df = pd.DataFrame(data)
    df['Value'] = df['Value'].map('{:.4f}'.format)
    best_epoch_data = pd.DataFrame({'Metric': ['best_epoch'], 'Value': [str(best_epoch)]})
    df = pd.concat([df, best_epoch_data], ignore_index=True)
    return df

def print_hyperparameters(hyperparameters):
    hp_df = pd.DataFrame(list(hyperparameters.items()), columns=['Hyperparameter', 'Value'])
    print(hp_df)


def dataproprocessing(dataframe):

    cat_cols, num_cols, cat_but_car = grab_col_names(dataframe)


    dataframe["TotalCharges"].fillna(dataframe["TotalCharges"].median(), inplace=True)

    # feature engineering
    dataframe.loc[(dataframe["tenure"] >= 0) & (dataframe["tenure"] <= 12), "NEW_TENURE_YEAR"] = "0-1 Year"
    dataframe.loc[(dataframe["tenure"] > 12) & (dataframe["tenure"] <= 24), "NEW_TENURE_YEAR"] = "1-2 Year"
    dataframe.loc[(dataframe["tenure"] > 24) & (dataframe["tenure"] <= 36), "NEW_TENURE_YEAR"] = "2-3 Year"
    dataframe.loc[(dataframe["tenure"] > 36) & (dataframe["tenure"] <= 48), "NEW_TENURE_YEAR"] = "3-4 Year"
    dataframe.loc[(dataframe["tenure"] > 48) & (dataframe["tenure"] <= 60), "NEW_TENURE_YEAR"] = "4-5 Year"
    dataframe.loc[(dataframe["tenure"] > 60) & (dataframe["tenure"] <= 72), "NEW_TENURE_YEAR"] = "5-6 Year"

    dataframe["NEW_Engaged"] = dataframe["Contract"].apply(lambda x: 1 if x in ["One year", "Two year"] else 0)

    dataframe["NEW_noProt"] = dataframe.apply(lambda x: 1 if (x["OnlineBackup"] != "Yes") or (x["DeviceProtection"] != "Yes") or (
                x["TechSupport"] != "Yes") else 0, axis=1)

    dataframe["NEW_Young_Not_Engaged"] = dataframe.apply(lambda x: 1 if (x["NEW_Engaged"] == 0) and (x["SeniorCitizen"] == 0) else 0,
                                          axis=1)

    dataframe['NEW_TotalServices'] = (dataframe[['PhoneService', 'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection', 'TechSupport',
                                  'StreamingTV', 'StreamingMovies']] == 'Yes').sum(axis=1)

    dataframe["NEW_FLAG_ANY_STREAMING"] = dataframe.apply(
        lambda x: 1 if (x["StreamingTV"] == "Yes") or (x["StreamingMovies"] == "Yes") else 0, axis=1)

    dataframe["NEW_FLAG_AutoPayment"] = dataframe["PaymentMethod"].apply(
        lambda x: 1 if x in ["Bank transfer (automatic)", "Credit card (automatic)"] else 0)

    dataframe["NEW_AVG_Charges"] = dataframe["TotalCharges"] / (dataframe["tenure"] + 1)

    dataframe["NEW_Increase"] = dataframe["NEW_AVG_Charges"] / dataframe["MonthlyCharges"]

    dataframe["NEW_AVG_Service_Fee"] = dataframe["MonthlyCharges"] / (dataframe['NEW_TotalServices'] + 1)

    cat_cols, num_cols, cat_but_car = grab_col_names(dataframe)

    cat_cols.remove("Churn")

    dataframe = pd.get_dummies(dataframe, columns=cat_cols, drop_first=True, dtype=int)

    scaler = MinMaxScaler()

    dataframe[num_cols] = scaler.fit_transform(dataframe[num_cols])

    dump(scaler, 'scaler.joblib')

    dataframe.columns = [col.replace(' ', '_').upper() for col in dataframe.columns]

    y = dataframe["CHURN"]
    X = dataframe.drop(["CHURN", "CUSTOMERID"], axis=1)

    return X, y

# Data Preprocessing

In [4]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Deep Learning/03_neural_network_course_materials/03_neural_network_course_materials/telco_files/telco_customer_churn.csv")

In [5]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,0
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,0
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,1
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,0
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,1


In [6]:
df["Churn"].value_counts() * 100 / len(df)

Unnamed: 0_level_0,count
Churn,Unnamed: 1_level_1
0,73.46301
1,26.53699


In [7]:
X, y = dataproprocessing(df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataframe["TotalCharges"].fillna(dataframe["TotalCharges"].median(), inplace=True)


In [8]:
X.head()

Unnamed: 0,TENURE,MONTHLYCHARGES,TOTALCHARGES,NEW_AVG_CHARGES,NEW_INCREASE,NEW_AVG_SERVICE_FEE,GENDER_MALE,PARTNER_YES,DEPENDENTS_YES,PHONESERVICE_YES,MULTIPLELINES_NO_PHONE_SERVICE,MULTIPLELINES_YES,INTERNETSERVICE_FIBER_OPTIC,INTERNETSERVICE_NO,ONLINESECURITY_NO_INTERNET_SERVICE,ONLINESECURITY_YES,ONLINEBACKUP_NO_INTERNET_SERVICE,ONLINEBACKUP_YES,DEVICEPROTECTION_NO_INTERNET_SERVICE,DEVICEPROTECTION_YES,TECHSUPPORT_NO_INTERNET_SERVICE,TECHSUPPORT_YES,STREAMINGTV_NO_INTERNET_SERVICE,STREAMINGTV_YES,STREAMINGMOVIES_NO_INTERNET_SERVICE,STREAMINGMOVIES_YES,CONTRACT_ONE_YEAR,CONTRACT_TWO_YEAR,PAPERLESSBILLING_YES,PAYMENTMETHOD_CREDIT_CARD_(AUTOMATIC),PAYMENTMETHOD_ELECTRONIC_CHECK,PAYMENTMETHOD_MAILED_CHECK,NEW_TENURE_YEAR_1-2_YEAR,NEW_TENURE_YEAR_2-3_YEAR,NEW_TENURE_YEAR_3-4_YEAR,NEW_TENURE_YEAR_4-5_YEAR,NEW_TENURE_YEAR_5-6_YEAR,SENIORCITIZEN_1,NEW_ENGAGED_1,NEW_NOPROT_1,NEW_YOUNG_NOT_ENGAGED_1,NEW_TOTALSERVICES_1,NEW_TOTALSERVICES_2,NEW_TOTALSERVICES_3,NEW_TOTALSERVICES_4,NEW_TOTALSERVICES_5,NEW_TOTALSERVICES_6,NEW_TOTALSERVICES_7,NEW_FLAG_ANY_STREAMING_1,NEW_FLAG_AUTOPAYMENT_1
0,0.01389,0.11542,0.00128,0.00414,0.00041,0.2071,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0
1,0.47222,0.38507,0.21587,0.03227,0.00677,0.18441,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0
2,0.02778,0.35423,0.01031,0.01935,0.00282,0.15883,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0
3,0.625,0.2393,0.21024,0.02221,0.00674,0.06353,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1
4,0.02778,0.52189,0.01533,0.0298,0.00346,0.88119,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0


In [9]:
X.shape

(7043, 50)

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

train_ds, val_ds = prepare_datasets(X_train, X_val, y_train, y_val, batch_size=32)

# Base Model with Binary Log Loss

In [11]:
X_train.shape[0]

5634

In [12]:
X_val.shape[0]

1409

In [13]:
base_model = Sequential([

    Input(shape=(train_ds.element_spec[0].shape[1],)),
    Dense(50, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')])

optimizer = Adam(learning_rate=0.001)

base_model.compile(optimizer=optimizer,
                   loss="binary_crossentropy",
                   metrics=["accuracy", "precision", "recall", "auc"])

early_stopping = EarlyStopping(monitor='val_loss',
                               patience=20,
                               verbose=1,
                               restore_best_weights=True)

base_model_history = base_model.fit(train_ds,
                                    epochs=1000,
                                    validation_data=val_ds,
                                    verbose=1,
                                    callbacks=early_stopping)

Epoch 1/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.5781 - auc: 0.5995 - loss: 0.8961 - precision: 0.3324 - recall: 0.5478 - val_accuracy: 0.7743 - val_auc: 0.8271 - val_loss: 0.5062 - val_precision: 0.6455 - val_recall: 0.3271
Epoch 2/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7420 - auc: 0.7729 - loss: 0.5657 - precision: 0.5251 - recall: 0.5455 - val_accuracy: 0.7935 - val_auc: 0.8448 - val_loss: 0.4640 - val_precision: 0.6708 - val_recall: 0.4316
Epoch 3/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7558 - auc: 0.7885 - loss: 0.5262 - precision: 0.5518 - recall: 0.4808 - val_accuracy: 0.8034 - val_auc: 0.8478 - val_loss: 0.4541 - val_precision: 0.6538 - val_recall: 0.5469
Epoch 4/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7839 - auc: 0.8129 - loss: 0.4931 - precision: 0.5989 - recall: 

In [14]:
get_best_epoch_details(base_model_history, metric="val_loss", mode=min)

Unnamed: 0,Metric,Value
0,accuracy,0.8055
1,auc,0.8519
2,loss,0.4233
3,precision,0.6733
4,recall,0.5194
5,val_accuracy,0.8119
6,val_auc,0.8555
7,val_loss,0.4191
8,val_precision,0.672
9,val_recall,0.5657


In [15]:
val_loss, val_accuracy, val_precision, val_recall, val_auc = base_model.evaluate(val_ds, verbose=0)
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation AUC: {val_auc}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {f1_score}")

Validation Loss: 0.41914066672325134
Validation Accuracy: 0.8119233250617981
Validation AUC: 0.8555474877357483
Validation Precision: 0.6719745397567749
Validation Recall: 0.5656836628913879
Validation F1-Score: 0.6142649371305102


In [17]:
df["Churn"].value_counts() * 100 / len(df)

Unnamed: 0_level_0,count
Churn,Unnamed: 1_level_1
0,73.46301
1,26.53699


# Weighted Cross-Entropy Loss

In [18]:
len(df[df['Churn'] == 1])

1869

In [19]:
len(df[df['Churn'] == 0])

5174

In [20]:
class_weight_for_0 = 1.0 / len(df[df['Churn'] == 0])

class_weight_for_1 = 1.0 / len(df[df['Churn'] == 1])

In [21]:
class_weight_for_0

0.00019327406262079628

In [None]:
class_weight_for_1

0.0005350454788657035

In [23]:
class_weights = {0: class_weight_for_0, 1: class_weight_for_1}

In [24]:
base_model = Sequential([
    Input(shape=(train_ds.element_spec[0].shape[1],)),
    Dense(50, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')])

optimizer = Adam(learning_rate=0.001)

base_model.compile(optimizer=optimizer,
                   loss="binary_crossentropy",
                   metrics=["accuracy", "precision", "recall", "auc"])

early_stopping = EarlyStopping(monitor='val_loss',
                               patience=20,
                               verbose=1,
                               restore_best_weights=True,
                               mode='min')

base_model_history = base_model.fit(train_ds,
                                    epochs=1000,
                                    validation_data=val_ds,
                                    verbose=1,
                                    callbacks=early_stopping,
                                    class_weight=class_weights)

Epoch 1/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.5298 - auc: 0.5516 - loss: 0.0312 - precision: 0.2941 - recall: 0.5483 - val_accuracy: 0.5912 - val_auc: 0.7925 - val_loss: 0.6787 - val_precision: 0.3843 - val_recall: 0.9035
Epoch 2/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6478 - auc: 0.7549 - loss: 0.0024 - precision: 0.4147 - recall: 0.7716 - val_accuracy: 0.7828 - val_auc: 0.8448 - val_loss: 0.6080 - val_precision: 0.5743 - val_recall: 0.6944
Epoch 3/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7065 - auc: 0.7989 - loss: 2.4009e-04 - precision: 0.4666 - recall: 0.7909 - val_accuracy: 0.7324 - val_auc: 0.8425 - val_loss: 0.5428 - val_precision: 0.4967 - val_recall: 0.8043
Epoch 4/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7395 - auc: 0.8138 - loss: 1.6240e-04 - precision: 0.5058 - 

In [25]:
get_best_epoch_details(base_model_history, metric="val_loss", mode=min)

Unnamed: 0,Metric,Value
0,accuracy,0.7208
1,auc,0.8114
2,loss,0.0002
3,precision,0.4843
4,recall,0.7955
5,val_accuracy,0.7906
6,val_auc,0.8415
7,val_loss,0.441
8,val_precision,0.5882
9,val_recall,0.6971


In [26]:
val_loss, val_accuracy, val_precision, val_recall, val_auc = base_model.evaluate(val_ds, verbose=0)
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation AUC: {val_auc}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {f1_score}")

Validation Loss: 0.44097182154655457
Validation Accuracy: 0.7906316518783569
Validation AUC: 0.8414620757102966
Validation Precision: 0.5882353186607361
Validation Recall: 0.697050929069519
Validation F1-Score: 0.6380368203706546


In [None]:
# weighted binary log loss
# Validation Loss: 0.4474751055240631
# Validation Accuracy: 0.7743080258369446
# Validation AUC: 0.8461032509803772
# Validation Precision: 0.5564681887626648
# Validation Recall: 0.7265415787696838
# Validation F1-Score: 0.6302325775373169

# binary log loss
# Validation Loss: 0.42540422081947327
# Validation Accuracy: 0.8019872307777405
# Validation AUC: 0.8505944013595581
# Validation Precision: 0.6577181220054626
# Validation Recall: 0.525469183921814
# Validation F1-Score: 0.5842026923200292

# Weighted Cross-Entropy Loss and Monitoring With AUC

In [27]:
class_weights = {0: class_weight_for_0, 1: class_weight_for_1}

base_model = Sequential([
    Input(shape=(train_ds.element_spec[0].shape[1],)),
    Dense(50, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')])

optimizer = Adam(learning_rate=0.001)

base_model.compile(optimizer=optimizer,
                   loss="binary_crossentropy",
                   metrics=["accuracy", "precision", "recall", "auc"])

early_stopping = EarlyStopping(monitor='val_auc',
                               patience=20,
                               verbose=1,
                               restore_best_weights=True,
                               mode='max')

base_model_history = base_model.fit(train_ds,
                                    epochs=1000,
                                    validation_data=val_ds,
                                    verbose=1,
                                    callbacks=early_stopping,
                                    class_weight=class_weights)

Epoch 1/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.5456 - auc: 0.5842 - loss: 0.0317 - precision: 0.3096 - recall: 0.5957 - val_accuracy: 0.6196 - val_auc: 0.8057 - val_loss: 0.6688 - val_precision: 0.4024 - val_recall: 0.9008
Epoch 2/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6836 - auc: 0.7663 - loss: 0.0025 - precision: 0.4393 - recall: 0.7477 - val_accuracy: 0.7381 - val_auc: 0.8485 - val_loss: 0.6054 - val_precision: 0.5033 - val_recall: 0.8097
Epoch 3/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7205 - auc: 0.8160 - loss: 2.3894e-04 - precision: 0.4713 - recall: 0.7833 - val_accuracy: 0.7239 - val_auc: 0.8514 - val_loss: 0.5656 - val_precision: 0.4877 - val_recall: 0.8499
Epoch 4/1000
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7183 - auc: 0.8088 - loss: 1.6265e-04 - precision: 0.4718 - 

In [28]:
get_best_epoch_details(base_model_history, metric="val_auc", mode=max)

Unnamed: 0,Metric,Value
0,accuracy,0.7213
1,auc,0.8036
2,loss,0.0002
3,precision,0.4849
4,recall,0.7928
5,val_accuracy,0.7147
6,val_auc,0.8555
7,val_loss,0.5412
8,val_precision,0.4785
9,val_recall,0.8633


In [29]:
val_loss, val_accuracy, val_precision, val_recall, val_auc = base_model.evaluate(val_ds, verbose=0)
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation AUC: {val_auc}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {f1_score}")

Validation Loss: 0.5411725044250488
Validation Accuracy: 0.7146912813186646
Validation AUC: 0.855521559715271
Validation Precision: 0.47845467925071716
Validation Recall: 0.8632707595825195
Validation F1-Score: 0.6156787706756951


In [None]:
# weighted binary log loss
# Validation Loss: 0.4474751055240631
# Validation Accuracy: 0.7743080258369446
# Validation AUC: 0.8461032509803772
# Validation Precision: 0.5564681887626648
# Validation Recall: 0.7265415787696838
# Validation F1-Score: 0.6302325775373169

# binary log loss
# Validation Loss: 0.42540422081947327
# Validation Accuracy: 0.8019872307777405
# Validation AUC: 0.8505944013595581
# Validation Precision: 0.6577181220054626
# Validation Recall: 0.525469183921814
# Validation F1-Score: 0.5842026923200292

# Hyperparameter Optimization


## Search Space

In [30]:
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(train_ds.element_spec[0].shape[1],)))

    # Hidden layers with advanced activation functions, l2, Dropout
    for i in range(hp.Int('num_layers', 1, 10)):
        # Add Dense layer
        model.add(Dense(
            units=hp.Int('units_' + str(i + 1), min_value=32, max_value=512, step=16),
            kernel_regularizer=l2(hp.Float('l2_' + str(i + 1), min_value=0.0001, max_value=0.01, sampling='log'))
        ))

        # Activation layer choice
        activation_choice = hp.Choice('activation_' + str(i + 1), values=['relu', 'leaky_relu', 'prelu'])

        if activation_choice == 'relu':
            model.add(ReLU())
        elif activation_choice == 'leaky_relu':
            model.add(LeakyReLU(negative_slope=0.01))
        elif activation_choice == 'prelu':
            model.add(PReLU())
        else:
            model.add(Activation(activation_choice))

        # Batch Normalization and Dropout
        model.add(BatchNormalization())
        model.add(Dropout(hp.Float('dropout_' + str(i + 1), min_value=0.0, max_value=0.5, step=0.1)))

    model.add(Dense(1, activation='sigmoid'))

    # Optimizer: Adam with tuning for beta1 and beta2
    optimizer = Adam(
        beta_1=hp.Float('beta1', min_value=0.85, max_value=0.99, step=0.01),
        beta_2=hp.Float('beta2', min_value=0.995, max_value=0.999, step=0.001)
    )

    model.compile(optimizer=optimizer,
                  loss="binary_crossentropy",
                  metrics=["accuracy", "precision", "recall", "auc"])

    return model


## Random Search

In [31]:
class_weights = {0: class_weight_for_0, 1: class_weight_for_1}

random_search_tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=30,
    executions_per_trial=1,
    overwrite=True)

early_stopping = EarlyStopping(
    monitor='val_auc',
    patience=10,
    verbose=1,
    restore_best_weights=True,
    mode='max')

model_checkpoint = ModelCheckpoint(
    'final_tuned_model.keras',
    monitor='val_auc',
    verbose=0,
    save_best_only=True)

random_search_tuner.search(train_ds,
                           epochs=250,

                           validation_data=val_ds,

                           callbacks=[early_stopping, model_checkpoint],

                           class_weight=class_weights)


Trial 30 Complete [00h 04m 34s]
val_loss: 0.43919867277145386

Best val_loss So Far: 0.41981950402259827
Total elapsed time: 00h 59m 51s


In [32]:
best_hps = random_search_tuner.get_best_hyperparameters(num_trials=1)[0]

print_hyperparameters(best_hps.values)

   Hyperparameter       Value
0      num_layers          10
1         units_1         464
2            l2_1     0.00036
3    activation_1  leaky_relu
4       dropout_1     0.10000
5           beta1     0.97000
6           beta2     0.99700
7         units_2         368
8            l2_2     0.00022
9    activation_2        relu
10      dropout_2     0.00000
11        units_3         112
12           l2_3     0.00021
13   activation_3       prelu
14      dropout_3     0.00000
15        units_4         240
16           l2_4     0.00063
17   activation_4       prelu
18      dropout_4     0.10000
19        units_5         160
20           l2_5     0.00771
21   activation_5  leaky_relu
22      dropout_5     0.40000
23        units_6         256
24           l2_6     0.00299
25   activation_6       prelu
26      dropout_6     0.00000
27        units_7         112
28           l2_7     0.00046
29   activation_7        relu
30      dropout_7     0.10000
31        units_8         192
32        

In [33]:
dump(best_hps, 'best_hps.joblib')

['best_hps.joblib']

In [34]:
best_model = random_search_tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [35]:
best_model.summary()

In [36]:
val_loss, val_accuracy, val_precision, val_recall, val_auc = best_model.evaluate(val_ds, verbose=0)
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation AUC: {val_auc}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {f1_score}")

Validation Loss: 0.41981950402259827
Validation Accuracy: 0.8019872307777405
Validation AUC: 0.8501247763633728
Validation Precision: 0.6230366230010986
Validation Recall: 0.6380696892738342
Validation F1-Score: 0.6304635550152768


In [None]:
# weighted binary log loss monitor with auc
# Validation Loss: 0.45272842049598694
# Validation Accuracy: 0.7750177383422852
# Validation AUC: 0.8536428213119507
# Validation Precision: 0.5534350872039795
# Validation Recall: 0.777479887008667
# Validation F1-Score: 0.6465997564127386

# weighted binary log loss
# Validation Loss: 0.4474751055240631
# Validation Accuracy: 0.7743080258369446
# Validation AUC: 0.8461032509803772
# Validation Precision: 0.5564681887626648
# Validation Recall: 0.7265415787696838
# Validation F1-Score: 0.6302325775373169

# binary log loss
# Validation Loss: 0.42540422081947327
# Validation Accuracy: 0.8019872307777405
# Validation AUC: 0.8505944013595581
# Validation Precision: 0.6577181220054626
# Validation Recall: 0.525469183921814
# Validation F1-Score: 0.5842026923200292

# Retrain for Entire Dataset

In [37]:
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(train_ds.element_spec[0].shape[1],)))

    # Hidden layers with advanced activation functions, l2, Dropout
    for i in range(hp.Int('num_layers', 1, 10)):
        # Add Dense layer
        model.add(Dense(
            units=hp.Int('units_' + str(i + 1), min_value=32, max_value=512, step=16),
            kernel_regularizer=l2(hp.Float('l2_' + str(i + 1), min_value=0.0001, max_value=0.01, sampling='log'))
        ))

        # Activation layer choice
        activation_choice = hp.Choice('activation_' + str(i + 1), values=['relu', 'leaky_relu', 'prelu'])

        if activation_choice == 'relu':
            model.add(ReLU())
        elif activation_choice == 'leaky_relu':
            model.add(LeakyReLU(negative_slope=0.01))
        elif activation_choice == 'prelu':
            model.add(PReLU())
        else:
            model.add(Activation(activation_choice))

        # Batch Normalization and Dropout
        model.add(BatchNormalization())
        model.add(Dropout(hp.Float('dropout_' + str(i + 1), min_value=0.0, max_value=0.5, step=0.1)))

    model.add(Dense(1, activation='sigmoid'))

    # Optimizer: Adam with tuning for beta1 and beta2
    optimizer = Adam(
        beta_1=hp.Float('beta1', min_value=0.85, max_value=0.99, step=0.01),
        beta_2=hp.Float('beta2', min_value=0.995, max_value=0.999, step=0.001)
    )

    model.compile(optimizer=optimizer,
                  loss="binary_crossentropy",
                  metrics=["accuracy", "precision", "recall", "auc"])

    return model

In [38]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Deep Learning/03_neural_network_course_materials/03_neural_network_course_materials/telco_files/telco_customer_churn.csv")

In [39]:
X, y = dataproprocessing(df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataframe["TotalCharges"].fillna(dataframe["TotalCharges"].median(), inplace=True)


In [40]:
dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(buffer_size=len(X)).batch(len(X))

In [41]:
best_hps = load('best_hps.joblib')

final_tuned_model = build_model(best_hps)

In [42]:
early_stopping = EarlyStopping(
    monitor='loss',
    patience=5,
    verbose=1,
    restore_best_weights=True)

model_checkpoint = ModelCheckpoint(
    'final_tuned_all_data_model.keras',
    monitor='loss',
    verbose=0,
    save_best_only=True)

final_history = final_tuned_model.fit(dataset,
            epochs=100,
            verbose=1,
            callbacks=[early_stopping, model_checkpoint])

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12s/step - accuracy: 0.5019 - auc: 0.4924 - loss: 6.7621 - precision: 0.2569 - recall: 0.4633
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5344 - auc: 0.5741 - loss: 6.5962 - precision: 0.3064 - recall: 0.5971
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5596 - auc: 0.6176 - loss: 6.4639 - precision: 0.3269 - recall: 0.6228
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.5865 - auc: 0.6520 - loss: 6.3328 - precision: 0.3484 - recall: 0.6410
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.6053 - auc: 0.6652 - loss: 6.2187 - precision: 0.3633 - recall: 0.6479
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.6269 - auc: 0.6928 - loss: 6.1017 - precision: 0.3834 - recall: 0.6

In [43]:
get_best_epoch_details(final_history, metric="loss", mode=min)

Unnamed: 0,Metric,Value
0,accuracy,0.842
1,auc,0.8906
2,loss,1.2452
3,precision,0.7234
4,recall,0.6549
5,best_epoch,100.0


In [44]:
final_tuned_model.save('final_tuned_all_data_model.keras')

# Prediction

## Imports

In [None]:
# !pip uninstall tf-keras
# !pip install keras-tuner
# !pip install tensorflow==2.16.1

Found existing installation: tf_keras 2.15.1
Uninstalling tf_keras-2.15.1:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/tf_keras-2.15.1.dist-info/*
    /usr/local/lib/python3.10/dist-packages/tf_keras/*
Proceed (Y/n)? y
  Successfully uninstalled tf_keras-2.15.1
Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
Collecting tensorflow==2.16.1
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting h5py>=3.10.0 (from tensorflow==2.16.1)
  Downlo

In [45]:
import keras
import tensorflow as tf
print("Keras Current Version:", keras.__version__, "Tensorflow Current Version:", tf.__version__)

Keras Current Version: 3.5.0 Tensorflow Current Version: 2.16.1


In [46]:
import numpy as np
import pandas as pd

import random
from joblib import dump, load

from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model

def grab_col_names(dataframe, cat_th=10, car_th=20):
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == "O"]
    num_but_cat = [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and dataframe[col].dtypes != "O"]
    cat_but_car = [col for col in dataframe.columns if dataframe[col].nunique() > car_th and dataframe[col].dtypes == "O"]
    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]
    num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != "O"]
    num_cols = [col for col in num_cols if col not in num_but_cat]
    return cat_cols, num_cols, cat_but_car

## New Customers

In [48]:
new_customers_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Deep Learning/03_neural_network_course_materials/03_neural_network_course_materials/telco_files/new_customers.csv")

In [49]:
new_customers_df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65


In [50]:
new_customers_df.shape

(10, 20)

## Load Scaler & Final Model

In [51]:
scaler = load('scaler.joblib')

loaded_final_tuned_model = load_model("/content/final_tuned_all_data_model.keras", compile=False)

In [None]:
loaded_final_tuned_model.predict(new_customers_df)

In [53]:
def data_proprocess_prediction(dataframe, scaler):

    # feature engineering
    dataframe.loc[(dataframe["tenure"] >= 0) & (dataframe["tenure"] <= 12), "NEW_TENURE_YEAR"] = "0-1 Year"
    dataframe.loc[(dataframe["tenure"] > 12) & (dataframe["tenure"] <= 24), "NEW_TENURE_YEAR"] = "1-2 Year"
    dataframe.loc[(dataframe["tenure"] > 24) & (dataframe["tenure"] <= 36), "NEW_TENURE_YEAR"] = "2-3 Year"
    dataframe.loc[(dataframe["tenure"] > 36) & (dataframe["tenure"] <= 48), "NEW_TENURE_YEAR"] = "3-4 Year"
    dataframe.loc[(dataframe["tenure"] > 48) & (dataframe["tenure"] <= 60), "NEW_TENURE_YEAR"] = "4-5 Year"
    dataframe.loc[(dataframe["tenure"] > 60) & (dataframe["tenure"] <= 72), "NEW_TENURE_YEAR"] = "5-6 Year"

    dataframe["NEW_Engaged"] = dataframe["Contract"].apply(lambda x: 1 if x in ["One year", "Two year"] else 0)

    dataframe["NEW_noProt"] = dataframe.apply(lambda x: 1 if (x["OnlineBackup"] != "Yes") or (x["DeviceProtection"] != "Yes") or (
                x["TechSupport"] != "Yes") else 0, axis=1)

    dataframe["NEW_Young_Not_Engaged"] = dataframe.apply(lambda x: 1 if (x["NEW_Engaged"] == 0) and (x["SeniorCitizen"] == 0) else 0,
                                          axis=1)

    dataframe['NEW_TotalServices'] = (dataframe[['PhoneService', 'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection', 'TechSupport',
                                  'StreamingTV', 'StreamingMovies']] == 'Yes').sum(axis=1)

    dataframe["NEW_FLAG_ANY_STREAMING"] = dataframe.apply(
        lambda x: 1 if (x["StreamingTV"] == "Yes") or (x["StreamingMovies"] == "Yes") else 0, axis=1)

    dataframe["NEW_FLAG_AutoPayment"] = dataframe["PaymentMethod"].apply(
        lambda x: 1 if x in ["Bank transfer (automatic)", "Credit card (automatic)"] else 0)

    dataframe["NEW_AVG_Charges"] = dataframe["TotalCharges"] / (dataframe["tenure"] + 1)

    dataframe["NEW_Increase"] = dataframe["NEW_AVG_Charges"] / dataframe["MonthlyCharges"]

    dataframe["NEW_AVG_Service_Fee"] = dataframe["MonthlyCharges"] / (dataframe['NEW_TotalServices'] + 1)

    cat_cols, num_cols, cat_but_car = grab_col_names(dataframe, cat_th=5)

    cat_cols.remove("customerID")

    dataframe = pd.get_dummies(dataframe, columns=cat_cols, drop_first=True, dtype=int)

    dataframe[num_cols] = scaler.fit_transform(dataframe[num_cols])

    dataframe.columns = [col.replace(' ', '_').upper() for col in dataframe.columns]

    X = dataframe.drop(["CUSTOMERID"], axis=1)

    return X

In [54]:
new_customers_processed = data_proprocess_prediction(new_customers_df, scaler)

In [55]:
new_customers_processed

Unnamed: 0,TENURE,MONTHLYCHARGES,TOTALCHARGES,NEW_AVG_CHARGES,NEW_INCREASE,NEW_AVG_SERVICE_FEE,GENDER_MALE,PARTNER_YES,DEPENDENTS_YES,PHONESERVICE_YES,MULTIPLELINES_NO_PHONE_SERVICE,MULTIPLELINES_YES,INTERNETSERVICE_FIBER_OPTIC,ONLINESECURITY_YES,ONLINEBACKUP_YES,DEVICEPROTECTION_YES,TECHSUPPORT_YES,STREAMINGTV_YES,STREAMINGMOVIES_YES,CONTRACT_ONE_YEAR,PAPERLESSBILLING_YES,PAYMENTMETHOD_CREDIT_CARD_(AUTOMATIC),PAYMENTMETHOD_ELECTRONIC_CHECK,PAYMENTMETHOD_MAILED_CHECK,NEW_TENURE_YEAR_1-2_YEAR,NEW_TENURE_YEAR_2-3_YEAR,NEW_TENURE_YEAR_3-4_YEAR,NEW_TENURE_YEAR_5-6_YEAR,NEW_ENGAGED_1,NEW_YOUNG_NOT_ENGAGED_1,NEW_TOTALSERVICES_3,NEW_TOTALSERVICES_4,NEW_TOTALSERVICES_5,NEW_FLAG_ANY_STREAMING_1,NEW_FLAG_AUTOPAYMENT_1
0,0.0,0.00133,0.0,0.0,0.0,0.17558,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0
1,0.54098,0.36243,0.53777,0.43347,0.89188,0.14783,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0
2,0.01639,0.32112,0.02264,0.23443,0.33738,0.11655,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0
3,0.72131,0.16722,0.52367,0.27845,0.88802,0.0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1
4,0.01639,0.54564,0.03522,0.39534,0.42806,1.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0
5,0.11475,0.93138,0.22864,0.84608,0.82601,0.3776,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0
6,0.34426,0.79081,0.55509,0.77495,0.89845,0.47225,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1
7,0.14754,0.0,0.07867,0.13894,0.84128,0.17356,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
8,0.44262,1.0,0.87221,1.0,1.0,0.27817,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0
9,1.0,0.35177,1.0,0.44877,0.96765,0.13976,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1


In [56]:
loaded_final_tuned_model.predict(new_customers_processed)

ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_11" is incompatible with the layer: expected axis -1 of input shape to have value 50, but received input with shape (10, 35)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(10, 35), dtype=float32)
  • training=False
  • mask=None

In [57]:
def dataproprocessing(dataframe):

    # cat_cols, num_cols, cat_but_car = grab_col_names(dataframe)

    dataframe["TotalCharges"].fillna(dataframe["TotalCharges"].median(), inplace=True)

    # feature engineering
    dataframe.loc[(dataframe["tenure"] >= 0) & (dataframe["tenure"] <= 12), "NEW_TENURE_YEAR"] = "0-1 Year"
    dataframe.loc[(dataframe["tenure"] > 12) & (dataframe["tenure"] <= 24), "NEW_TENURE_YEAR"] = "1-2 Year"
    dataframe.loc[(dataframe["tenure"] > 24) & (dataframe["tenure"] <= 36), "NEW_TENURE_YEAR"] = "2-3 Year"
    dataframe.loc[(dataframe["tenure"] > 36) & (dataframe["tenure"] <= 48), "NEW_TENURE_YEAR"] = "3-4 Year"
    dataframe.loc[(dataframe["tenure"] > 48) & (dataframe["tenure"] <= 60), "NEW_TENURE_YEAR"] = "4-5 Year"
    dataframe.loc[(dataframe["tenure"] > 60) & (dataframe["tenure"] <= 72), "NEW_TENURE_YEAR"] = "5-6 Year"

    dataframe["NEW_Engaged"] = dataframe["Contract"].apply(lambda x: 1 if x in ["One year", "Two year"] else 0)

    dataframe["NEW_noProt"] = dataframe.apply(lambda x: 1 if (x["OnlineBackup"] != "Yes") or (x["DeviceProtection"] != "Yes") or (
                x["TechSupport"] != "Yes") else 0, axis=1)

    dataframe["NEW_Young_Not_Engaged"] = dataframe.apply(lambda x: 1 if (x["NEW_Engaged"] == 0) and (x["SeniorCitizen"] == 0) else 0,
                                          axis=1)

    dataframe['NEW_TotalServices'] = (dataframe[['PhoneService', 'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection', 'TechSupport',
                                  'StreamingTV', 'StreamingMovies']] == 'Yes').sum(axis=1)

    dataframe["NEW_FLAG_ANY_STREAMING"] = dataframe.apply(
        lambda x: 1 if (x["StreamingTV"] == "Yes") or (x["StreamingMovies"] == "Yes") else 0, axis=1)

    dataframe["NEW_FLAG_AutoPayment"] = dataframe["PaymentMethod"].apply(
        lambda x: 1 if x in ["Bank transfer (automatic)", "Credit card (automatic)"] else 0)

    dataframe["NEW_AVG_Charges"] = dataframe["TotalCharges"] / (dataframe["tenure"] + 1)

    dataframe["NEW_Increase"] = dataframe["NEW_AVG_Charges"] / dataframe["MonthlyCharges"]

    dataframe["NEW_AVG_Service_Fee"] = dataframe["MonthlyCharges"] / (dataframe['NEW_TotalServices'] + 1)

    cat_cols, num_cols, cat_but_car = grab_col_names(dataframe)

    cat_cols.remove("Churn")

    dataframe = pd.get_dummies(dataframe, columns=cat_cols, drop_first=True, dtype=int)

    scaler = MinMaxScaler()

    dataframe[num_cols] = scaler.fit_transform(dataframe[num_cols])

    # dump(scaler, 'scaler.joblib')

    dataframe.columns = [col.replace(' ', '_').upper() for col in dataframe.columns]

    y = dataframe["CHURN"]
    X = dataframe.drop(["CHURN", "CUSTOMERID"], axis=1)

    return X, y

In [58]:
original_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Deep Learning/03_neural_network_course_materials/03_neural_network_course_materials/telco_files/telco_customer_churn.csv")

In [59]:
original_X, y = dataproprocessing(original_df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataframe["TotalCharges"].fillna(dataframe["TotalCharges"].median(), inplace=True)


In [60]:
original_X.shape

(7043, 50)

In [61]:
new_customers_processed.shape

(10, 35)

In [62]:
def compare_columns(orijinal_df, new_df):
    columns_original_df = set(orijinal_df.columns)
    columns_new_df = set(new_df.columns)
    only_in_original_df = columns_original_df - columns_new_df
    only_in_new_df = columns_new_df - columns_original_df
    return list(only_in_original_df), list(only_in_new_df)

In [63]:
only_in_original, only_in_new_df = compare_columns(original_X, new_customers_processed)

In [64]:
only_in_original

['SENIORCITIZEN_1',
 'NEW_TOTALSERVICES_2',
 'NEW_TENURE_YEAR_4-5_YEAR',
 'ONLINEBACKUP_NO_INTERNET_SERVICE',
 'STREAMINGTV_NO_INTERNET_SERVICE',
 'TECHSUPPORT_NO_INTERNET_SERVICE',
 'CONTRACT_TWO_YEAR',
 'INTERNETSERVICE_NO',
 'NEW_TOTALSERVICES_7',
 'ONLINESECURITY_NO_INTERNET_SERVICE',
 'DEVICEPROTECTION_NO_INTERNET_SERVICE',
 'STREAMINGMOVIES_NO_INTERNET_SERVICE',
 'NEW_TOTALSERVICES_6',
 'NEW_TOTALSERVICES_1',
 'NEW_NOPROT_1']

In [65]:
only_in_new_df

[]

In [66]:
len(only_in_original)

15

In [67]:
original_X[only_in_original].head()

Unnamed: 0,SENIORCITIZEN_1,NEW_TOTALSERVICES_2,NEW_TENURE_YEAR_4-5_YEAR,ONLINEBACKUP_NO_INTERNET_SERVICE,STREAMINGTV_NO_INTERNET_SERVICE,TECHSUPPORT_NO_INTERNET_SERVICE,CONTRACT_TWO_YEAR,INTERNETSERVICE_NO,NEW_TOTALSERVICES_7,ONLINESECURITY_NO_INTERNET_SERVICE,DEVICEPROTECTION_NO_INTERNET_SERVICE,STREAMINGMOVIES_NO_INTERNET_SERVICE,NEW_TOTALSERVICES_6,NEW_TOTALSERVICES_1,NEW_NOPROT_1
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1


In [68]:
new_customers_processed

Unnamed: 0,TENURE,MONTHLYCHARGES,TOTALCHARGES,NEW_AVG_CHARGES,NEW_INCREASE,NEW_AVG_SERVICE_FEE,GENDER_MALE,PARTNER_YES,DEPENDENTS_YES,PHONESERVICE_YES,MULTIPLELINES_NO_PHONE_SERVICE,MULTIPLELINES_YES,INTERNETSERVICE_FIBER_OPTIC,ONLINESECURITY_YES,ONLINEBACKUP_YES,DEVICEPROTECTION_YES,TECHSUPPORT_YES,STREAMINGTV_YES,STREAMINGMOVIES_YES,CONTRACT_ONE_YEAR,PAPERLESSBILLING_YES,PAYMENTMETHOD_CREDIT_CARD_(AUTOMATIC),PAYMENTMETHOD_ELECTRONIC_CHECK,PAYMENTMETHOD_MAILED_CHECK,NEW_TENURE_YEAR_1-2_YEAR,NEW_TENURE_YEAR_2-3_YEAR,NEW_TENURE_YEAR_3-4_YEAR,NEW_TENURE_YEAR_5-6_YEAR,NEW_ENGAGED_1,NEW_YOUNG_NOT_ENGAGED_1,NEW_TOTALSERVICES_3,NEW_TOTALSERVICES_4,NEW_TOTALSERVICES_5,NEW_FLAG_ANY_STREAMING_1,NEW_FLAG_AUTOPAYMENT_1
0,0.0,0.00133,0.0,0.0,0.0,0.17558,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0
1,0.54098,0.36243,0.53777,0.43347,0.89188,0.14783,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0
2,0.01639,0.32112,0.02264,0.23443,0.33738,0.11655,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0
3,0.72131,0.16722,0.52367,0.27845,0.88802,0.0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1
4,0.01639,0.54564,0.03522,0.39534,0.42806,1.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0
5,0.11475,0.93138,0.22864,0.84608,0.82601,0.3776,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0
6,0.34426,0.79081,0.55509,0.77495,0.89845,0.47225,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1
7,0.14754,0.0,0.07867,0.13894,0.84128,0.17356,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
8,0.44262,1.0,0.87221,1.0,1.0,0.27817,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0
9,1.0,0.35177,1.0,0.44877,0.96765,0.13976,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1


In [69]:
for col in only_in_original:
    if col not in new_customers_processed.columns:
        new_customers_processed[col] = 0

In [70]:
pd.set_option('display.max_columns', None)

In [71]:
new_customers_processed

Unnamed: 0,TENURE,MONTHLYCHARGES,TOTALCHARGES,NEW_AVG_CHARGES,NEW_INCREASE,NEW_AVG_SERVICE_FEE,GENDER_MALE,PARTNER_YES,DEPENDENTS_YES,PHONESERVICE_YES,MULTIPLELINES_NO_PHONE_SERVICE,MULTIPLELINES_YES,INTERNETSERVICE_FIBER_OPTIC,ONLINESECURITY_YES,ONLINEBACKUP_YES,DEVICEPROTECTION_YES,TECHSUPPORT_YES,STREAMINGTV_YES,STREAMINGMOVIES_YES,CONTRACT_ONE_YEAR,PAPERLESSBILLING_YES,PAYMENTMETHOD_CREDIT_CARD_(AUTOMATIC),PAYMENTMETHOD_ELECTRONIC_CHECK,PAYMENTMETHOD_MAILED_CHECK,NEW_TENURE_YEAR_1-2_YEAR,NEW_TENURE_YEAR_2-3_YEAR,NEW_TENURE_YEAR_3-4_YEAR,NEW_TENURE_YEAR_5-6_YEAR,NEW_ENGAGED_1,NEW_YOUNG_NOT_ENGAGED_1,NEW_TOTALSERVICES_3,NEW_TOTALSERVICES_4,NEW_TOTALSERVICES_5,NEW_FLAG_ANY_STREAMING_1,NEW_FLAG_AUTOPAYMENT_1,SENIORCITIZEN_1,NEW_TOTALSERVICES_2,NEW_TENURE_YEAR_4-5_YEAR,ONLINEBACKUP_NO_INTERNET_SERVICE,STREAMINGTV_NO_INTERNET_SERVICE,TECHSUPPORT_NO_INTERNET_SERVICE,CONTRACT_TWO_YEAR,INTERNETSERVICE_NO,NEW_TOTALSERVICES_7,ONLINESECURITY_NO_INTERNET_SERVICE,DEVICEPROTECTION_NO_INTERNET_SERVICE,STREAMINGMOVIES_NO_INTERNET_SERVICE,NEW_TOTALSERVICES_6,NEW_TOTALSERVICES_1,NEW_NOPROT_1
0,0.0,0.00133,0.0,0.0,0.0,0.17558,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0.54098,0.36243,0.53777,0.43347,0.89188,0.14783,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0.01639,0.32112,0.02264,0.23443,0.33738,0.11655,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.72131,0.16722,0.52367,0.27845,0.88802,0.0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.01639,0.54564,0.03522,0.39534,0.42806,1.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,0.11475,0.93138,0.22864,0.84608,0.82601,0.3776,0,0,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,0.34426,0.79081,0.55509,0.77495,0.89845,0.47225,1,0,1,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0.14754,0.0,0.07867,0.13894,0.84128,0.17356,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,0.44262,1.0,0.87221,1.0,1.0,0.27817,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,1.0,0.35177,1.0,0.44877,0.96765,0.13976,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [72]:
new_customers_processed.shape

(10, 50)

In [73]:
original_X.shape

(7043, 50)

In [74]:
loaded_final_tuned_model.predict(new_customers_processed)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 501ms/step


array([[0.17687239],
       [0.17617084],
       [0.17692102],
       [0.17558615],
       [0.17720248],
       [0.17508233],
       [0.17621544],
       [0.17686291],
       [0.17560539],
       [0.17525332]], dtype=float32)

# Let's wrap it up

In [None]:
# !pip uninstall tf-keras
# !pip install keras-tuner
# !pip install tensorflow==2.16.1

In [75]:
import keras
import tensorflow as tf
print("Keras Current Version:", keras.__version__, "Tensorflow Current Version:", tf.__version__)

Keras Current Version: 3.5.0 Tensorflow Current Version: 2.16.1


In [76]:
import numpy as np
import pandas as pd

import random
from joblib import dump, load

from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model

def grab_col_names(dataframe, cat_th=10, car_th=20):
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == "O"]
    num_but_cat = [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and dataframe[col].dtypes != "O"]
    cat_but_car = [col for col in dataframe.columns if dataframe[col].nunique() > car_th and dataframe[col].dtypes == "O"]
    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]
    num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != "O"]
    num_cols = [col for col in num_cols if col not in num_but_cat]
    return cat_cols, num_cols, cat_but_car

In [77]:
def data_proprocessing_new(dataframe):

    dataframe["TotalCharges"].fillna(dataframe["TotalCharges"].median(), inplace=True)

    # feature engineering
    dataframe.loc[(dataframe["tenure"] >= 0) & (dataframe["tenure"] <= 12), "NEW_TENURE_YEAR"] = "0-1 Year"
    dataframe.loc[(dataframe["tenure"] > 12) & (dataframe["tenure"] <= 24), "NEW_TENURE_YEAR"] = "1-2 Year"
    dataframe.loc[(dataframe["tenure"] > 24) & (dataframe["tenure"] <= 36), "NEW_TENURE_YEAR"] = "2-3 Year"
    dataframe.loc[(dataframe["tenure"] > 36) & (dataframe["tenure"] <= 48), "NEW_TENURE_YEAR"] = "3-4 Year"
    dataframe.loc[(dataframe["tenure"] > 48) & (dataframe["tenure"] <= 60), "NEW_TENURE_YEAR"] = "4-5 Year"
    dataframe.loc[(dataframe["tenure"] > 60) & (dataframe["tenure"] <= 72), "NEW_TENURE_YEAR"] = "5-6 Year"

    dataframe["NEW_Engaged"] = dataframe["Contract"].apply(lambda x: 1 if x in ["One year", "Two year"] else 0)

    dataframe["NEW_noProt"] = dataframe.apply(lambda x: 1 if (x["OnlineBackup"] != "Yes") or (x["DeviceProtection"] != "Yes") or (
                x["TechSupport"] != "Yes") else 0, axis=1)

    dataframe["NEW_Young_Not_Engaged"] = dataframe.apply(lambda x: 1 if (x["NEW_Engaged"] == 0) and (x["SeniorCitizen"] == 0) else 0,
                                          axis=1)

    dataframe['NEW_TotalServices'] = (dataframe[['PhoneService', 'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection', 'TechSupport',
                                  'StreamingTV', 'StreamingMovies']] == 'Yes').sum(axis=1)

    dataframe["NEW_FLAG_ANY_STREAMING"] = dataframe.apply(
        lambda x: 1 if (x["StreamingTV"] == "Yes") or (x["StreamingMovies"] == "Yes") else 0, axis=1)

    dataframe["NEW_FLAG_AutoPayment"] = dataframe["PaymentMethod"].apply(
        lambda x: 1 if x in ["Bank transfer (automatic)", "Credit card (automatic)"] else 0)

    dataframe["NEW_AVG_Charges"] = dataframe["TotalCharges"] / (dataframe["tenure"] + 1)

    dataframe["NEW_Increase"] = dataframe["NEW_AVG_Charges"] / dataframe["MonthlyCharges"]

    dataframe["NEW_AVG_Service_Fee"] = dataframe["MonthlyCharges"] / (dataframe['NEW_TotalServices'] + 1)

    cat_cols, num_cols, cat_but_car = grab_col_names(dataframe)

    cat_cols.remove("Churn")

    dataframe = pd.get_dummies(dataframe, columns=cat_cols, drop_first=True, dtype=int)

    scaler = MinMaxScaler()

    dataframe[num_cols] = scaler.fit_transform(dataframe[num_cols])


    dataframe.columns = [col.replace(' ', '_').upper() for col in dataframe.columns]

    y = dataframe["CHURN"]
    X = dataframe.drop(["CHURN", "CUSTOMERID"], axis=1)

    dump(scaler, 'scaler.joblib')
    dump(X.columns, 'original_col_names.joblib')

    return X, y

In [78]:
original_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Deep Learning/03_neural_network_course_materials/03_neural_network_course_materials/telco_files/telco_customer_churn.csv")

original_X, y = data_proprocessing_new(original_df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataframe["TotalCharges"].fillna(dataframe["TotalCharges"].median(), inplace=True)


In [79]:
original_X.head()

Unnamed: 0,TENURE,MONTHLYCHARGES,TOTALCHARGES,NEW_AVG_CHARGES,NEW_INCREASE,NEW_AVG_SERVICE_FEE,GENDER_MALE,PARTNER_YES,DEPENDENTS_YES,PHONESERVICE_YES,MULTIPLELINES_NO_PHONE_SERVICE,MULTIPLELINES_YES,INTERNETSERVICE_FIBER_OPTIC,INTERNETSERVICE_NO,ONLINESECURITY_NO_INTERNET_SERVICE,ONLINESECURITY_YES,ONLINEBACKUP_NO_INTERNET_SERVICE,ONLINEBACKUP_YES,DEVICEPROTECTION_NO_INTERNET_SERVICE,DEVICEPROTECTION_YES,TECHSUPPORT_NO_INTERNET_SERVICE,TECHSUPPORT_YES,STREAMINGTV_NO_INTERNET_SERVICE,STREAMINGTV_YES,STREAMINGMOVIES_NO_INTERNET_SERVICE,STREAMINGMOVIES_YES,CONTRACT_ONE_YEAR,CONTRACT_TWO_YEAR,PAPERLESSBILLING_YES,PAYMENTMETHOD_CREDIT_CARD_(AUTOMATIC),PAYMENTMETHOD_ELECTRONIC_CHECK,PAYMENTMETHOD_MAILED_CHECK,NEW_TENURE_YEAR_1-2_YEAR,NEW_TENURE_YEAR_2-3_YEAR,NEW_TENURE_YEAR_3-4_YEAR,NEW_TENURE_YEAR_4-5_YEAR,NEW_TENURE_YEAR_5-6_YEAR,SENIORCITIZEN_1,NEW_ENGAGED_1,NEW_NOPROT_1,NEW_YOUNG_NOT_ENGAGED_1,NEW_TOTALSERVICES_1,NEW_TOTALSERVICES_2,NEW_TOTALSERVICES_3,NEW_TOTALSERVICES_4,NEW_TOTALSERVICES_5,NEW_TOTALSERVICES_6,NEW_TOTALSERVICES_7,NEW_FLAG_ANY_STREAMING_1,NEW_FLAG_AUTOPAYMENT_1
0,0.01389,0.11542,0.00128,0.00414,0.00041,0.2071,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0
1,0.47222,0.38507,0.21587,0.03227,0.00677,0.18441,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0
2,0.02778,0.35423,0.01031,0.01935,0.00282,0.15883,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0
3,0.625,0.2393,0.21024,0.02221,0.00674,0.06353,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,1
4,0.02778,0.52189,0.01533,0.0298,0.00346,0.88119,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0


In [80]:
scaler = load('scaler.joblib')

original_col_names = load('/content/drive/MyDrive/Colab Notebooks/Deep Learning/03_neural_network_course_materials/03_neural_network_course_materials/telco_files/original_col_names.joblib')

loaded_final_tuned_model = load_model("/content/drive/MyDrive/Colab Notebooks/Deep Learning/03_neural_network_course_materials/03_neural_network_course_materials/telco_files/final_tuned_all_data_model.keras", compile=False)


In [81]:
len(original_col_names)

50

In [82]:
def data_proprocess_prediction_new(dataframe, col_names, scaler):

    # feature engineering
    dataframe.loc[(dataframe["tenure"] >= 0) & (dataframe["tenure"] <= 12), "NEW_TENURE_YEAR"] = "0-1 Year"
    dataframe.loc[(dataframe["tenure"] > 12) & (dataframe["tenure"] <= 24), "NEW_TENURE_YEAR"] = "1-2 Year"
    dataframe.loc[(dataframe["tenure"] > 24) & (dataframe["tenure"] <= 36), "NEW_TENURE_YEAR"] = "2-3 Year"
    dataframe.loc[(dataframe["tenure"] > 36) & (dataframe["tenure"] <= 48), "NEW_TENURE_YEAR"] = "3-4 Year"
    dataframe.loc[(dataframe["tenure"] > 48) & (dataframe["tenure"] <= 60), "NEW_TENURE_YEAR"] = "4-5 Year"
    dataframe.loc[(dataframe["tenure"] > 60) & (dataframe["tenure"] <= 72), "NEW_TENURE_YEAR"] = "5-6 Year"

    dataframe["NEW_Engaged"] = dataframe["Contract"].apply(lambda x: 1 if x in ["One year", "Two year"] else 0)

    dataframe["NEW_noProt"] = dataframe.apply(lambda x: 1 if (x["OnlineBackup"] != "Yes") or (x["DeviceProtection"] != "Yes") or (
                x["TechSupport"] != "Yes") else 0, axis=1)

    dataframe["NEW_Young_Not_Engaged"] = dataframe.apply(lambda x: 1 if (x["NEW_Engaged"] == 0) and (x["SeniorCitizen"] == 0) else 0,
                                          axis=1)

    dataframe['NEW_TotalServices'] = (dataframe[['PhoneService', 'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection', 'TechSupport',
                                  'StreamingTV', 'StreamingMovies']] == 'Yes').sum(axis=1)

    dataframe["NEW_FLAG_ANY_STREAMING"] = dataframe.apply(
        lambda x: 1 if (x["StreamingTV"] == "Yes") or (x["StreamingMovies"] == "Yes") else 0, axis=1)

    dataframe["NEW_FLAG_AutoPayment"] = dataframe["PaymentMethod"].apply(
        lambda x: 1 if x in ["Bank transfer (automatic)", "Credit card (automatic)"] else 0)

    dataframe["NEW_AVG_Charges"] = dataframe["TotalCharges"] / (dataframe["tenure"] + 1)

    dataframe["NEW_Increase"] = dataframe["NEW_AVG_Charges"] / dataframe["MonthlyCharges"]

    dataframe["NEW_AVG_Service_Fee"] = dataframe["MonthlyCharges"] / (dataframe['NEW_TotalServices'] + 1)

    cat_cols, num_cols, cat_but_car = grab_col_names(dataframe, cat_th=5)

    cat_cols.remove("customerID")

    dataframe = pd.get_dummies(dataframe, columns=cat_cols, drop_first=True, dtype=int)

    dataframe[num_cols] = scaler.fit_transform(dataframe[num_cols])

    dataframe.columns = [col.replace(' ', '_').upper() for col in dataframe.columns]

    X = dataframe.drop(["CUSTOMERID"], axis=1)

    if scaler.n_features_in_ != dataframe.shape[1]:
        print("sizes are different")
        for col in col_names:
            if col not in X.columns:
                X[col] = 0

    return X

In [84]:
new_customers_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Deep Learning/03_neural_network_course_materials/03_neural_network_course_materials/telco_files/new_customers.csv")

In [85]:
new_customers_processed = data_proprocess_prediction_new(new_customers_df, original_col_names, scaler)

sizes are different


In [86]:
new_customers_processed.head()

Unnamed: 0,TENURE,MONTHLYCHARGES,TOTALCHARGES,NEW_AVG_CHARGES,NEW_INCREASE,NEW_AVG_SERVICE_FEE,GENDER_MALE,PARTNER_YES,DEPENDENTS_YES,PHONESERVICE_YES,MULTIPLELINES_NO_PHONE_SERVICE,MULTIPLELINES_YES,INTERNETSERVICE_FIBER_OPTIC,ONLINESECURITY_YES,ONLINEBACKUP_YES,DEVICEPROTECTION_YES,TECHSUPPORT_YES,STREAMINGTV_YES,STREAMINGMOVIES_YES,CONTRACT_ONE_YEAR,PAPERLESSBILLING_YES,PAYMENTMETHOD_CREDIT_CARD_(AUTOMATIC),PAYMENTMETHOD_ELECTRONIC_CHECK,PAYMENTMETHOD_MAILED_CHECK,NEW_TENURE_YEAR_1-2_YEAR,NEW_TENURE_YEAR_2-3_YEAR,NEW_TENURE_YEAR_3-4_YEAR,NEW_TENURE_YEAR_5-6_YEAR,NEW_ENGAGED_1,NEW_YOUNG_NOT_ENGAGED_1,NEW_TOTALSERVICES_3,NEW_TOTALSERVICES_4,NEW_TOTALSERVICES_5,NEW_FLAG_ANY_STREAMING_1,NEW_FLAG_AUTOPAYMENT_1,INTERNETSERVICE_NO,ONLINESECURITY_NO_INTERNET_SERVICE,ONLINEBACKUP_NO_INTERNET_SERVICE,DEVICEPROTECTION_NO_INTERNET_SERVICE,TECHSUPPORT_NO_INTERNET_SERVICE,STREAMINGTV_NO_INTERNET_SERVICE,STREAMINGMOVIES_NO_INTERNET_SERVICE,CONTRACT_TWO_YEAR,NEW_TENURE_YEAR_4-5_YEAR,SENIORCITIZEN_1,NEW_NOPROT_1,NEW_TOTALSERVICES_1,NEW_TOTALSERVICES_2,NEW_TOTALSERVICES_6,NEW_TOTALSERVICES_7
0,0.0,0.00133,0.0,0.0,0.0,0.17558,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0.54098,0.36243,0.53777,0.43347,0.89188,0.14783,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0.01639,0.32112,0.02264,0.23443,0.33738,0.11655,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.72131,0.16722,0.52367,0.27845,0.88802,0.0,1,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.01639,0.54564,0.03522,0.39534,0.42806,1.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [87]:
new_customers_processed.shape

(10, 50)

In [88]:
original_X.shape

(7043, 50)

In [89]:
loaded_final_tuned_model.predict(new_customers_processed)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step


array([[0.38229764],
       [0.40741757],
       [0.3631357 ],
       [0.36696312],
       [0.40483224],
       [0.39115614],
       [0.32096267],
       [0.37224132],
       [0.38878006],
       [0.3204678 ]], dtype=float32)

In [None]:
# 0, 0, 1, 0, 1, 1, 0, 0, 1, 0