In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [2]:
dataset = pd.read_csv('ML_dataset_Churn_Modelling.csv')


In [3]:
print("===== Dataset Information =====")
print("Information of Dataset:\n", dataset.info())
print("\nShape of Dataset (rows x columns): ", dataset.shape)
print("\nColumn Names: ", list(dataset.columns))
print("\nTotal Elements in Dataset:", dataset.size)
print("\nData Types of Each Column:\n", dataset.dtypes)
print("\nFirst 5 Rows:\n", dataset.head().T)
print("\nLast 5 Rows:\n", dataset.tail().T)
print("\nRandom 5 Rows:\n", dataset.sample(5).T)

===== Dataset Information =====
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB
Information of Dataset:
 None

Shape of Dataset (rows x column

In [4]:
print("\n===== Statistical Information =====")
print(dataset.describe())



===== Statistical Information =====
         RowNumber    CustomerId   CreditScore           Age        Tenure  \
count  10000.00000  1.000000e+04  10000.000000  10000.000000  10000.000000   
mean    5000.50000  1.569094e+07    650.528800     38.921800      5.012800   
std     2886.89568  7.193619e+04     96.653299     10.487806      2.892174   
min        1.00000  1.556570e+07    350.000000     18.000000      0.000000   
25%     2500.75000  1.562853e+07    584.000000     32.000000      3.000000   
50%     5000.50000  1.569074e+07    652.000000     37.000000      5.000000   
75%     7500.25000  1.575323e+07    718.000000     44.000000      7.000000   
max    10000.00000  1.581569e+07    850.000000     92.000000     10.000000   

             Balance  NumOfProducts    HasCrCard  IsActiveMember  \
count   10000.000000   10000.000000  10000.00000    10000.000000   
mean    76485.889288       1.530200      0.70550        0.515100   
std     62397.405202       0.581654      0.45584        

In [5]:
X = dataset.drop(columns=['RowNumber', 'CustomerId', 'Surname', 'Exited'])
y = dataset['Exited']


In [11]:
categorical_cols = ['Geography', 'Gender']

In [13]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(drop='first'), categorical_cols)],
                       remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [19]:
classifier = Sequential()

In [21]:
classifier.add(Dense(units=6, activation='relu', input_dim=X_train.shape[1]))
classifier.add(Dropout(0.1)) 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [23]:
classifier.add(Dense(units=6, activation='relu'))
classifier.add(Dropout(0.1))

In [25]:
classifier.add(Dense(units=1, activation='sigmoid'))

In [27]:
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [29]:
history = classifier.fit(X_train, y_train, batch_size=32, epochs=100, verbose=0)

In [30]:
y_pred = (classifier.predict(X_test) > 0.5)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


In [31]:
cm = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)

In [32]:
print("\n===== Model Evaluation =====")
print("Confusion Matrix:\n", cm)
print("\nAccuracy Score:", round(acc * 100, 2), "%")


===== Model Evaluation =====
Confusion Matrix:
 [[1556   51]
 [ 254  139]]

Accuracy Score: 84.75 %
