In [15]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dropout, BatchNormalization
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.callbacks import LearningRateScheduler
from keras.initializers import RandomNormal
from imblearn.over_sampling import SMOTE
from keras.optimizers import Adam

In [16]:
dataset = pd.read_csv("Churn_Modelling.csv")
X = dataset.iloc[:, 3:-1].values 
y = dataset.iloc[:, -1].values   

In [17]:
geography_encoder = LabelEncoder()
geography_encoder.fit(dataset['Geography']) 
gender_encoder = LabelEncoder()
gender_encoder.fit(dataset['Gender'])

In [18]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(handle_unknown='ignore'), [1, 2])], remainder='passthrough')  
X = np.array(ct.fit_transform(X))

In [19]:
sc = StandardScaler()
X = sc.fit_transform(X) 

In [20]:
#X, y = SMOTE(random_state=0).fit_resample(X, y)

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [9]:
ann_model = Sequential()

ann_model.add(Dense(units=12, kernel_initializer=RandomNormal(mean=0.0, stddev=0.05, seed=None), activation='relu'))
ann_model.add(BatchNormalization())
ann_model.add(Dropout(0.1))  

ann_model.add(Dense(units=12, kernel_initializer=RandomNormal(mean=0.0, stddev=0.05, seed=None), activation='relu'))
ann_model.add(BatchNormalization())
ann_model.add(Dropout(0.1))  

ann_model.add(Dense(units=1, activation='sigmoid'))
ann_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
ann_model.build(input_shape=(None, X_train.shape[1]))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

def lr_schedule(epoch):
    lr = 0.01
    if epoch > 75:
        lr = 0.001
    elif epoch > 50:
       lr = 0.005
    print('Learning rate: ', lr)
    return lr
lr_scheduler = LearningRateScheduler(lr_schedule)

ann_model.fit(X_train, y_train, batch_size=32, epochs=200, callbacks=[es, lr_scheduler], validation_split=0.2)

Learning rate:  0.01
Epoch 1/200
Learning rate:  0.01
Epoch 2/200
Learning rate:  0.01
Epoch 3/200
Learning rate:  0.01
Epoch 4/200
Learning rate:  0.01
Epoch 5/200
Learning rate:  0.01
Epoch 6/200
Learning rate:  0.01
Epoch 7/200
Learning rate:  0.01
Epoch 8/200
Learning rate:  0.01
Epoch 9/200
Learning rate:  0.01
Epoch 10/200
Learning rate:  0.01
Epoch 11/200
Learning rate:  0.01
Epoch 12/200
Learning rate:  0.01
Epoch 13/200
Learning rate:  0.01
Epoch 14/200
Learning rate:  0.01
Epoch 15/200
Learning rate:  0.01
Epoch 16/200
Learning rate:  0.01
Epoch 17/200
Learning rate:  0.01
Epoch 18/200
Learning rate:  0.01
Epoch 19/200
Learning rate:  0.01
Epoch 20/200
Learning rate:  0.01
Epoch 21/200
Learning rate:  0.01
Epoch 22/200
Learning rate:  0.01
Epoch 23/200
Learning rate:  0.01
Epoch 24/200
Learning rate:  0.01
Epoch 25/200
Learning rate:  0.01
Epoch 26/200
Learning rate:  0.01
Epoch 27/200
Learning rate:  0.01
Epoch 28/200
Learning rate:  0.01
Epoch 29/200
Learning rate:  0.01
Ep

<keras.callbacks.History at 0x2a56208ae00>

In [10]:
new_observation = np.array([[600, 'Spain', 'Female', 40, 3, 60000, 2, 1, 1, 50000]]) 

new_observation[:, 1] = geography_encoder.transform(new_observation[:, 1].reshape(-1, 1)) 
new_observation[:, 2] = gender_encoder.transform(new_observation[:, 2].reshape(-1, 1)) 

new_observation = ct.transform(new_observation)

new_observation = sc.transform(new_observation)


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [11]:
new_prediction = ann_model.predict(new_observation)[0]
probability = new_prediction
new_prediction = (new_prediction > 0.5)


prediction_result = "Will Leave" if new_prediction else "Will Stay"
print(f"Prediction: {prediction_result}")
print(f"Probability of leaving: {probability}") 


Prediction: Will Stay
Probability of leaving: [0.05230134]


In [12]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

y_pred = ann_model.predict(X_test)
y_pred = (y_pred > 0.5)

cm = confusion_matrix(y_test, y_pred)

accuracy = accuracy_score(y_test, y_pred)

precision = precision_score(y_test, y_pred)

recall = recall_score(y_test, y_pred)

f1 = f1_score(y_test, y_pred)

auc_roc = roc_auc_score(y_test, y_pred)

print(f"Confusion Matrix:\n{cm}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"AUC-ROC: {auc_roc}")


Confusion Matrix:
[[1538   57]
 [ 216  189]]
Accuracy: 0.8635
Precision: 0.7682926829268293
Recall: 0.4666666666666667
F1 Score: 0.5806451612903226
AUC-ROC: 0.7154649947753396


In [14]:
import joblib

ann_model.save('ann_model.h5')

joblib.dump(geography_encoder, 'geography_encoder.pkl')
joblib.dump(gender_encoder, 'gender_encoder.pkl')
joblib.dump(ct, 'column_transformer.pkl')
joblib.dump(sc, 'standard_scaler.pkl')


['standard_scaler.pkl']