In [12]:
from numpy.random import seed
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow
from sklearn.preprocessing import StandardScaler

seed(1907)
tensorflow.random.set_seed(2)

In [13]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

## Convert TotalCharges to numeric
df['TotalCharges']=pd.to_numeric(df['TotalCharges'],errors='coerce')

## Replace yes and No in the Churn column to 1 and 0. 1 for the event and 0 for the censured data.
df['Churn']=df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0 )

## Impute the null value with the median value

df.TotalCharges.fillna(value=df['TotalCharges'].median(),inplace=True)
df= df.drop('customerID', axis = 1)

df = pd.get_dummies(df)
print("The data size:", df.shape)

labels = np.array(df['TotalCharges'])
df= df.drop('TotalCharges', axis = 1)
df= df.drop('MonthlyCharges', axis = 1)

train_x, test_x, train_y, test_y = train_test_split(df, labels, test_size = 0.25, shuffle=False)

The data size: (7043, 46)


In [14]:
# created scaler
scaler = StandardScaler()
# fit scaler on training dataset
scaler.fit(train_x)
# transform training dataset
train_x = scaler.transform(train_x)
# transform test dataset
test_x = scaler.transform(test_x)

In [15]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(train_x.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])

history = model.fit(train_x, train_y, epochs=20, batch_size=10,  verbose=1, validation_split=0.2)

test_mse_score, test_mae_score = model.evaluate(test_x, test_y)

Train on 4225 samples, validate on 1057 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
test_mse_score

14831.8769520159

In [17]:
model = models.Sequential()
model.add(layers.Dense(128, activation='relu', input_shape=(train_x.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])

history = model.fit(train_x, train_y, epochs=20, batch_size=10,  verbose=1, validation_split=0.2)

test_mse_score, test_mae_score = model.evaluate(test_x, test_y)

Train on 4225 samples, validate on 1057 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
test_mse_score

11717.304741014072

In [19]:
model = models.Sequential()
model.add(layers.Dense(128, activation='relu', input_shape=(train_x.shape[1],)))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])

history = model.fit(train_x, train_y, epochs=20, batch_size=10,  verbose=1, validation_split=0.2)

test_mse_score, test_mae_score = model.evaluate(test_x, test_y)

Train on 4225 samples, validate on 1057 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [20]:
test_mse_score

15443.508473523567

In [29]:
model = models.Sequential()
model.add(layers.Dense(128, activation='relu', input_shape=(train_x.shape[1],)))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])

history = model.fit(train_x, train_y, epochs=20, batch_size=10,  verbose=1, validation_split=0.2)

test_mse_score, test_mae_score = model.evaluate(test_x, test_y)

Train on 4225 samples, validate on 1057 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [28]:
test_mse_score

27245.142931927883