In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score

In [2]:
employee = pd.read_csv('D:/Code/Python Projects/Employee Churn TensorFlow/Churn.csv')

In [3]:
employee.head

<bound method NDFrame.head of      Customer ID  Gender  Senior Citizen Partner Dependents  tenure  \
0     7590-VHVEA  Female               0     Yes         No       1   
1     7590-VHVEG  Female               0     Yes         No       1   
2     5575-GNVDE    Male               0      No         No      34   
3     3668-QPYBK    Male               0      No         No       2   
4     7795-CFOCW    Male               0      No         No      45   
...          ...     ...             ...     ...        ...     ...   
7039  6840-RESVB    Male               0     Yes        Yes      24   
7040  2234-XADUH  Female               0     Yes        Yes      72   
7041  4801-JZAZL  Female               0     Yes        Yes      11   
7042  8361-LTMKD    Male               1     Yes         No       4   
7043  3186-AJIEK    Male               0      No         No      66   

     Phone Service    Multiple Lines Internet Service Online Security  ...  \
0               No  No phone service   

In [4]:
# We drop 2 columns that are not relavant to the dataset to prevent overfitting
# ['Churn'] is dropped because it is the target variable
# ['Customer ID'] is dropped because it does not contribute any value to the dataset and therefore might cause mis-labeling
# get_dummies is a way of "one-hot encoding" used to convert categorical variables to binary encoded variables

In [5]:
employee.columns

Index(['Customer ID', 'Gender', 'Senior Citizen', 'Partner', 'Dependents',
       'tenure', 'Phone Service', 'Multiple Lines', 'Internet Service',
       'Online Security', 'Online Backup', 'Device Protection', 'Tech Support',
       'Streaming TV', 'Streaming Movies', 'Contract', 'Paperless Billing',
       'Payment Method', 'Monthly Charges', 'Total Charges', 'Churn'],
      dtype='object')

In [6]:
X = pd.get_dummies(employee.drop(['Churn', 'Customer ID'], axis=1))
y = employee['Churn'].apply(lambda x: 1 if x=='Yes' else 0)

In [7]:
# Notable increase in the number of total columns. This is due to the one-hot encoding performed on the categorical variables
X.columns

Index(['Senior Citizen', 'tenure', 'Monthly Charges', 'Gender_Female',
       'Gender_Male', 'Partner_No', 'Partner_Yes', 'Dependents_No',
       'Dependents_Yes', 'Phone Service_No',
       ...
       'Total Charges_995.35', 'Total Charges_996.45', 'Total Charges_996.85',
       'Total Charges_996.95', 'Total Charges_997.65', 'Total Charges_997.75',
       'Total Charges_998.1', 'Total Charges_999.45', 'Total Charges_999.8',
       'Total Charges_999.9'],
      dtype='object', length=6575)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)

In [9]:
X_train.head

<bound method NDFrame.head of       Senior Citizen  tenure  Monthly Charges  Gender_Female  Gender_Male  \
1161               1      23            95.10           True        False   
3780               1      66            79.40          False         True   
3044               1       2            30.40           True        False   
4085               0       1            78.05           True        False   
1362               0       4            78.45          False         True   
...              ...     ...              ...            ...          ...   
3772               0       1            79.05          False         True   
5191               1      29           103.95          False         True   
5226               0      72           104.45          False         True   
5390               0      51            69.15           True        False   
860                0      72            64.80          False         True   

      Partner_No  Partner_Yes  Dependents_No 

In [10]:
y_train.head

<bound method NDFrame.head of 1161    0
3780    1
3044    1
4085    1
1362    1
       ..
3772    1
5191    0
5226    0
5390    0
860     0
Name: Churn, Length: 5635, dtype: int64>

**Build the layers**

In [11]:
#Creates a new sequential object. It is a linear stack of layers, each layer having exactly one tensor i/p and o/p each
model = Sequential()

**ReLu - Rectified Linear Unit. It is an activation function used to introduce non-linearity in the data.**

**It works by holding the minimum value at zero and max value as input. Thus removing linearity in the Neural Network. value=value.max("-ve value", 0)**

In [12]:
#Consists of 3 fully connected ("Dense") layers
#model.add is used to add a dense layer to the model stack
model.add(Dense(units=32, activation='relu', input_dim=len(X_train.columns))) #consists of 32 neurons; input_dim specifies input size
model.add(Dense(units=64, activation='relu')) #consists of 64 neurons
model.add(Dense(units=1, activation='sigmoid')) #units = 1 limits op to one value. Sigmoid fn. used in binary classification for binary o/p. 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
#Warning us that mentioning "input_dim" value is unnecessary because keras can figure out shape based on input data.
#Explicity mentioning size may result in unwanted errors and confusion.

**Compile the model**

**loss: Specifies the loss function. Basically the sum of distances between the actual and predicted values. Binary crossentropy is commonly used for binary classification examples**

**optimizer: Stochastic Gradient Descent (sgd). specifies the optimizer algortihm used for updating weights of the network during training. Updates weights by updating gradient of the loss fn wrt weights and adjusting weights to minimize loss.**

**metrics: allows to evaluate model performance**

In [18]:
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

**Fit the model**

**1 epoch translates to one forward and one backward pass to compute prediction and updates weights based on loss fn respectively. Increasing the number of epochs will allow multiple passes to fine tune weight assignment and minimize loss**

**batch_size specifies the number of elements processed in each iteration. Batch processing is faster and uses less memory.**

In [14]:
model.fit(X_train, y_train, epochs=100, batch_size=32)

Epoch 1/100
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7327 - loss: 0.5338
Epoch 2/100
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7704 - loss: 0.4937
Epoch 3/100
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7671 - loss: 0.4860
Epoch 4/100
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7660 - loss: 0.4911
Epoch 5/100
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7722 - loss: 0.4955
Epoch 6/100
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7811 - loss: 0.4767
Epoch 7/100
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7737 - loss: 0.4854
Epoch 8/100
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7732 - loss: 0.4740
Epoch 9/100
[1m177/177[0m [32

<keras.src.callbacks.history.History at 0x258a6fa2de0>

**Prediction**

In [19]:
y_prediction = model.predict(X_test)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [20]:
#tensorflow will give output as continuous value bw 0 and 1.
#Standardize values to 0 and 1
y_prediction = [0 if val < 0.5 else 1 for val in y_prediction]

**Accuracy Score**

In [24]:
accuracy = accuracy_score(y_test, y_prediction)
print("Accuracy of this network:", round(accuracy,5)*100,"%")

Accuracy of this network: 74.521 %


***Currently ran 100 epochs because observed approx 80% accuracy in a few cases. Running the model for more epochs can result in higher accuracy.**