In [1]:
 import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import keras
from keras.models import Sequential
from keras.layers import Dense

In [3]:
data = pd.read_csv("churn.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
data.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [5]:
# Step 2: Distinguish feature and target set
X = data.drop(columns=['Exited'])  # Features (exclude the 'Exited' column)
y = data['Exited']  # Target variable

# Split the data into training and test sets (e.g., 80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Step 3: Define column transformers for numeric and non-numeric features
numeric_features = X_train.select_dtypes(include=['float64', 'int64']).columns
non_numeric_features = X_train.select_dtypes(include=['object']).columns

numeric_transformer = StandardScaler()
non_numeric_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

# Combine transformers using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('non_num', non_numeric_transformer, non_numeric_features)
    ])

# Apply the preprocessing to training and test data
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

# Convert the CSR matrices to dense NumPy arrays
X_train_preprocessed = X_train_preprocessed.toarray()
X_test_preprocessed = X_test_preprocessed.toarray()




In [7]:
# Step 4: Build a neural network model
model = Sequential()

# Input layer (number of features should match the output shape of the preprocessor)
model.add(Dense(units=64, activation='relu', input_dim=X_train_preprocessed.shape[1]))

# Hidden layers (you can add more as needed)
model.add(Dense(units=32, activation='relu'))

# Output layer for binary classification with sigmoid activation
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 5: Train the model
model.fit(X_train_preprocessed, y_train, epochs=5, batch_size=32, validation_split=0.3)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x286eede4d60>

In [8]:
# Step 6: Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_preprocessed, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")


Test Loss: 0.4579346179962158, Test Accuracy: 0.8220000267028809
