In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

url = "Churn_Modelling.csv"
data = pd.read_csv(url)

print(data.head())
print(data.columns)

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.63       0  
4         790

In [None]:
# Drop CustomerId, Surname, and RowNumber columns
X = data.drop(columns=['CustomerId', 'Surname', 'RowNumber', 'Exited'])
y = data['Exited']

# Convert categorical variables into dummy/indicator variables
X = pd.get_dummies(X, drop_first=True)

# Feature Engineering: Adding polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

In [None]:
# Normalize the train and test data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Initialize and build the model with improvements
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model with increased epochs and validation split
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7414 - loss: 0.5616 - val_accuracy: 0.8112 - val_loss: 0.4325
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7885 - loss: 0.4727 - val_accuracy: 0.8356 - val_loss: 0.3982
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8185 - loss: 0.4261 - val_accuracy: 0.8450 - val_loss: 0.3833
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8248 - loss: 0.4098 - val_accuracy: 0.8481 - val_loss: 0.3664
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8370 - loss: 0.3953 - val_accuracy: 0.8487 - val_loss: 0.3620
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8365 - loss: 0.3913 - val_accuracy: 0.8462 - val_loss: 0.3603
Epoch 7/100
[1m200/20

<keras.src.callbacks.history.History at 0x2786af151f0>

In [None]:
# Print the accuracy score and confusion matrix
y_pred = (model.predict(X_test) > 0.5).astype("int32")

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy: 0.8565
Confusion Matrix:
[[1542   65]
 [ 222  171]]


In [13]:
# 6. Print the predicted results for each customer
def interpret_prediction(prediction):
    return "Exit" if prediction == 1 else "Stay"

# Get predictions for all customers in the test set once
predicted_probabilities = model.predict(X_test)

# Print the results for the first 10 customers in the test set
for i in range(25):
    print(f"Customer {i + 1}: Probability of exit = {predicted_probabilities[i][0]:.2f}, Predicted: {interpret_prediction(y_pred[i][0])}")


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Customer 1: Probability of exit = 0.02, Predicted: Stay
Customer 2: Probability of exit = 0.00, Predicted: Stay
Customer 3: Probability of exit = 0.11, Predicted: Stay
Customer 4: Probability of exit = 0.04, Predicted: Stay
Customer 5: Probability of exit = 0.20, Predicted: Stay
Customer 6: Probability of exit = 0.00, Predicted: Stay
Customer 7: Probability of exit = 0.21, Predicted: Stay
Customer 8: Probability of exit = 0.18, Predicted: Stay
Customer 9: Probability of exit = 0.26, Predicted: Stay
Customer 10: Probability of exit = 0.29, Predicted: Stay
Customer 11: Probability of exit = 0.94, Predicted: Exit
Customer 12: Probability of exit = 0.99, Predicted: Exit
Customer 13: Probability of exit = 0.92, Predicted: Exit
Customer 14: Probability of exit = 0.31, Predicted: Stay
Customer 15: Probability of exit = 0.04, Predicted: Stay
Customer 16: Probability of exit = 0.32, Predicted: Stay
Customer 17: Probability