In [102]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [103]:
df = pd.read_csv('Churn.csv') # our data frame is the churn csv file
print(df.columns)

Index(['Customer ID', 'Gender', 'Senior Citizen', 'Partner', 'Dependents',
       'tenure', 'Phone Service', 'Multiple Lines', 'Internet Service',
       'Online Security', 'Online Backup', 'Device Protection', 'Tech Support',
       'Streaming TV', 'Streaming Movies', 'Contract', 'Paperless Billing',
       'Payment Method', 'Monthly Charges', 'Total Charges', 'Churn'],
      dtype='object')


Normalize Data

In [104]:

df['tenure'] = df['tenure'].apply(lambda x: (x-df['tenure'].min()) / (df['tenure'].max()-df['tenure'].min()))

df['Monthly Charges'] = df['Monthly Charges'].apply(lambda x: (x-df['Monthly Charges'].min()) / (df['Monthly Charges'].max() - df['Monthly Charges'].min()))


# Step 1: Replace empty strings or strings with whitespace with NaN
df['Total Charges'] = df['Total Charges'].replace(r'^\s*$', pd.NA, regex=True)

# Step 2: Convert the 'Total Charges' column to float, coercing errors to NaN
df['Total Charges'] = pd.to_numeric(df['Total Charges'], errors='coerce')

df.dropna(subset=['Total Charges'], inplace=True)
# Step 3: Fill NaN values with average
# df['Total Charges'].fillna(df['Total Charges'].mean(), inplace=True)

df['Total Charges'] = df['Total Charges'].apply(lambda x: (x-df['Total Charges'].min()) / (df['Total Charges'].max() - df['Total Charges'].min()))


In [105]:
X = pd.get_dummies(df.drop(['Churn', 'Customer ID'], axis = 1)) # this is our training data, so we dont need the target value (customer ID is an irrelevant feature)
y = df['Churn'].apply(lambda x: 1 if x=="Yes" else 0) # convert yes and no's into 1's and 0's for the Churn feature
print (X.columns)

Index(['Senior Citizen', 'tenure', 'Monthly Charges', 'Total Charges',
       'Gender_Female', 'Gender_Male', 'Partner_No', 'Partner_Yes',
       'Dependents_No', 'Dependents_Yes', 'Phone Service_No',
       'Phone Service_Yes', 'Multiple Lines_No',
       'Multiple Lines_No phone service', 'Multiple Lines_Yes',
       'Internet Service_DSL', 'Internet Service_Fiber optic',
       'Internet Service_No', 'Online Security_No',
       'Online Security_No internet service', 'Online Security_Yes',
       'Online Backup_No', 'Online Backup_No internet service',
       'Online Backup_Yes', 'Device Protection_No',
       'Device Protection_No internet service', 'Device Protection_Yes',
       'Tech Support_No', 'Tech Support_No internet service',
       'Tech Support_Yes', 'Streaming TV_No',
       'Streaming TV_No internet service', 'Streaming TV_Yes',
       'Streaming Movies_No', 'Streaming Movies_No internet service',
       'Streaming Movies_Yes', 'Contract_Month-to-month', 'Contract_One 

In [106]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2) # splits our data into 80% train 20% test

In [107]:
X_train.head()

Unnamed: 0,Senior Citizen,tenure,Monthly Charges,Total Charges,Gender_Female,Gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,Streaming Movies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,Paperless Billing_No,Paperless Billing_Yes,Payment Method_Bank transfer (automatic),Payment Method_Credit card (automatic),Payment Method_Electronic check,Payment Method_Mailed check
130,1,0.986111,0.3801,0.457714,False,True,True,False,True,False,...,True,False,True,False,False,True,False,False,True,False
2737,1,0.833333,0.708458,0.608793,True,False,False,True,True,False,...,True,True,False,False,True,False,False,False,True,False
4213,0,0.013889,0.7199,0.008285,True,False,True,False,True,False,...,True,True,False,False,True,False,False,False,True,False
4008,0,0.763889,0.074129,0.164418,False,True,False,True,True,False,...,False,False,False,True,True,False,True,False,False,False
192,0,0.083333,0.723881,0.059105,False,True,False,True,True,False,...,True,True,False,False,False,True,False,False,True,False


In [108]:
y_train.head()

130     0
2737    0
4213    1
4008    0
192     1
Name: Churn, dtype: int64

Import Dependencies

In [109]:
from tensorflow.keras.models import Sequential, load_model # Sequential is the core model class, load allows us to reload later on
from tensorflow.keras.layers import Dense # Dense means a fully connected layer in our NN
from sklearn.metrics import accuracy_score # Model evaluation

Build and Compile Model

In [110]:
model = Sequential()
# Sequential is a simple stack of layers that allows to build neural networks layers at a time.
# key characteristics:
# Linear stack of layers - suitable for architecutres where layers are added one after another, each layer has a single input and isngle output, not branching
# Easy to use
# Limited to linear architectures
model.add(Dense(units=32, activation='relu', input_dim = len(X_train.columns))) # first hidden layer with 32 neurons activated by relu, number of inputs per neuron is the number of features
model.add(Dense(units=64, activation='relu')) # second hidden layer 64 neurons with relu
model.add(Dense(units=1, activation='sigmoid')) # final output layer with a single neuron with sigmoid activation function

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [111]:
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
# binary cross entropy is used where target values are 0 or 1, measures the dissimilarity betweent true labels and predicted probabilities

# sgd -> stochastic gradient descent is a basic optimization that updates the weights using the gradient of the loss function
# weights = weights - learning rate * the gradient of loss with respect to weights
# stochastic means the gradient is computed using only one sample -> faster but noisy

Fit, Predict and Evaluate

In [112]:
model.fit(X_train, y_train, epochs = 100, batch_size = 32)

Epoch 1/100
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 777us/step - accuracy: 0.6860 - loss: 0.6014
Epoch 2/100
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 748us/step - accuracy: 0.7575 - loss: 0.4714
Epoch 3/100
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 734us/step - accuracy: 0.7927 - loss: 0.4446
Epoch 4/100
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 742us/step - accuracy: 0.7890 - loss: 0.4439
Epoch 5/100
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 819us/step - accuracy: 0.8026 - loss: 0.4246
Epoch 6/100
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 751us/step - accuracy: 0.7956 - loss: 0.4387
Epoch 7/100
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 760us/step - accuracy: 0.7947 - loss: 0.4320
Epoch 8/100
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 760us/step - accuracy: 0.7937 - loss: 0.4378
Epoch 9/100
[1m

<keras.src.callbacks.history.History at 0x245b58b1550>

In [113]:
y_hat = model.predict(X_test)
y_hat = [0 if val < 0.5 else 1 for val in y_hat]

[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [114]:
y_hat

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,


In [115]:
accuracy_score(y_test, y_hat)


0.7995735607675906