In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, confusion_matrix

In [3]:
# Step 1: Read the dataset
data = pd.read_csv('Churn_Modelling.csv')  # Assuming you have downloaded the dataset

In [4]:
# Step 2: Distinguish the feature and target set and divide the data set into training and test sets.
X = data.drop(['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis=1)
y = data['Exited']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Step 3: Normalize the train and test data.
scaler = StandardScaler()
X_train_numeric = X_train.drop(['Geography', 'Gender'], axis=1)
X_test_numeric = X_test.drop(['Geography', 'Gender'], axis=1)
X_train_numeric = scaler.fit_transform(X_train_numeric)
X_test_numeric = scaler.transform(X_test_numeric)

In [7]:
# One-hot encoding for categorical variables
encoder = OneHotEncoder(sparse=False, drop='first')
X_train_categorical = encoder.fit_transform(X_train[['Geography', 'Gender']])
X_test_categorical = encoder.transform(X_test[['Geography', 'Gender']])




In [8]:
# Concatenate the numeric and encoded categorical features
X_train_processed = pd.concat([pd.DataFrame(X_train_numeric), pd.DataFrame(X_train_categorical)], axis=1)
X_test_processed = pd.concat([pd.DataFrame(X_test_numeric), pd.DataFrame(X_test_categorical)], axis=1)

In [9]:
# Convert labels to numpy arrays
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [10]:
# Step 4: Initialize and build the model.
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train_processed.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_processed, y_train, epochs=10, batch_size=32, validation_data=(X_test_processed, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1c49eeb6bc0>

In [11]:
# Step 5: Print the accuracy score and confusion matrix
y_pred = (model.predict(X_test_processed) > 0.5).astype("int32") 
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)



In [12]:
print(f"Accuracy Score: {accuracy}")
print("Confusion Matrix:")
print(confusion)

Accuracy Score: 0.864
Confusion Matrix:
[[1534   73]
 [ 199  194]]
