# Artificial Neural Network for Customer Churn Prediction

In [1]:
# run to install python libraries
!pip install pandas
!pip install numpy
!pip install keras
!pip install scikit-learn
!pip install tensorflow
!pip install  --upgrade keras tensorflow



In [2]:
import sys
import types
import pandas as pd
import io
import requests

url = 'https://github.com/jacquesroy/byte-size-data-science/raw/master/data/customer_churn.csv'
content = requests.get(url).content
dataset = pd.read_csv(io.StringIO(content.decode('utf-8')))
dataset.head()

Unnamed: 0,ID,CHURN,Gender,Status,Children,Est Income,Car Owner,Age,LongDistance,International,Local,Dropped,Paymethod,LocalBilltype,LongDistanceBilltype,Usage,RatePlan
0,1,T,F,S,1.0,38000.0,N,24.393333,23.56,0.0,206.08,0.0,CC,Budget,Intnl_discount,229.64,3.0
1,6,F,M,M,2.0,29616.0,N,49.426667,29.78,0.0,45.5,0.0,CH,FreeLocal,Standard,75.29,2.0
2,8,F,M,M,0.0,19732.8,N,50.673333,24.81,0.0,22.44,0.0,CC,FreeLocal,Standard,47.25,3.0
3,11,F,M,S,2.0,96.33,N,56.473333,26.13,0.0,32.88,1.0,CC,Budget,Standard,59.01,1.0
4,14,F,F,M,2.0,52004.8,N,25.14,5.03,0.0,23.11,0.0,CH,Budget,Intnl_discount,28.14,1.0


# Encoding
- Categorical: Gender, Status, Car Owner, Paymetho, LocalBilltype, LongDistanceBilltype

Other encoding could be used for some attribute. For example `onehotencoder` for Gender and Status

In [3]:
import numpy as np

X = dataset.iloc[:,2:17].values  # Columns from Gender on
Y = dataset.iloc[:,1].values  # CHURN column

In [4]:
# Encoding categoriacal data before split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# columns: 0 (Gender), 1 (Status), 4 (Car owner), 10 (Payment method), 11 (LocalBillType, 112 (LongDistanceBillType

labelencoder_X_0 = LabelEncoder()
X[:,0] = labelencoder_X_0.fit_transform(X[:,0])
labelencoder_X_1 = LabelEncoder()
X[:,1] = labelencoder_X_1.fit_transform(X[:,1])
labelencoder_X_4 = LabelEncoder()
X[:,4] = labelencoder_X_4.fit_transform(X[:,4])
labelencoder_X_10 = LabelEncoder()
X[:,10] = labelencoder_X_10.fit_transform(X[:,10])
labelencoder_X_11 = LabelEncoder()
X[:,11] = labelencoder_X_11.fit_transform(X[:,11])
labelencoder_X_12 = LabelEncoder()
X[:,12] = labelencoder_X_12.fit_transform(X[:,12])

In [5]:
# Since the answer column is "T" or "F", we have to encode it
labelencoder_y = LabelEncoder()
Y=labelencoder_y.fit_transform(Y)

# Output transformation
print(Y)

[1 0 0 ... 1 0 0]


In [6]:
# Split dataset
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)
X_train[0]

array([0, 1, 2.0, 91861.0, 1, 41.98, 15.32, 0.0, 6.74, 0.0, 1, 0, 1,
       22.06, 4.0], dtype=object)

In [7]:
# Feature scaling - standarized the value ranges
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# output values 
X_train[0]

array([-0.73531301, -0.76831717,  1.02139443,  1.31226699,  1.34352828,
       -0.03339284, -0.09856166, -0.46305472, -0.91231284, -0.25442323,
        0.08934784, -0.9217453 ,  0.67672254, -0.90584898,  1.32496973])

In [8]:
import keras
from keras.models import Sequential
from keras.layers import Dense

2024-06-16 19:28:26.132251: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-16 19:28:26.133777: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-16 19:28:26.165378: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-16 19:28:26.318533: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
# Create a sequential model (most common in keras)
classifier = Sequential()

# Create the first hidden layer
classifier.add(Dense(8, activation='relu', input_shape=(15,))) # there are 14 attributes

# Create the second hidden layer
classifier.add(Dense(8, activation='relu'))

# Create the output layer
classifier.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Compile the neural network
classifier.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [11]:
# Train the model (fit)
# batch size: number of records used in each epoch
classifier.fit(X_train, Y_train, batch_size=18, epochs=100)

Epoch 1/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4149 - loss: 0.8322
Epoch 2/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6186 - loss: 0.6469
Epoch 3/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6932 - loss: 0.5996
Epoch 4/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7053 - loss: 0.5483
Epoch 5/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7417 - loss: 0.5155
Epoch 6/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7670 - loss: 0.4927
Epoch 7/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7431 - loss: 0.5082
Epoch 8/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7802 - loss: 0.4502
Epoch 9/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f35e436faa0>

In [12]:
# Train the model (fit)
# batch size: number of records used in each epoch
classifier.fit(X_train, Y_train, batch_size=18, epochs=100)

Epoch 1/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9457 - loss: 0.1650
Epoch 2/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9506 - loss: 0.1462  
Epoch 3/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9417 - loss: 0.1673
Epoch 4/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9542 - loss: 0.1428
Epoch 5/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9451 - loss: 0.1508
Epoch 6/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9595 - loss: 0.1390
Epoch 7/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9471 - loss: 0.1717
Epoch 8/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9382 - loss: 0.1798
Epoch 9/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f35e029cf20>

In [16]:
# Testing
Y_pred = classifier.predict(X_test)
Y_pred = (Y_pred > 0.5)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 914us/step


In [19]:
# confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, Y_pred)
print(cm)

[[199  10]
 [ 10 141]]


# Meaning
- True - True (top left) : Prediction of True correct
- True - False (top right): Predicted True while False
- False - True (bottom left) : Predicted False while True
- False - False (bottom right) : Prediction of False correct

In [33]:
# accuracy : number of correct predictions divided by the total number of predictions
accuracy = (cm[0,0] + cm[1,1]) / (cm[0,0] + cm[0,1] + cm[1,0] + cm [1,1])
print("Accuracy =", accuracy)

Accuracy = 0.9444444444444444


In [34]:
# precision
precision = cm[0,0] / (cm[0,0] + cm[0,1])
print("Precision =", precision)

Precision = 0.9521531100478469


In [35]:
# recall
recall = cm[0,0] / (cm[0,0] + cm[0,1])
print("Recall =", recall)

Recall = 0.9521531100478469
