## Step 1 — Data Pre-processing

[How To Build a Deep Learning Model to Predict Employee Retention Using Keras and TensorFlow](https://www.digitalocean.com/community/tutorials/how-to-build-a-deep-learning-model-to-predict-employee-retention-using-keras-and-tensorflow)

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("./hr.csv")
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,work_accident,left,promotion_last_5years,department,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [3]:
feats = ['department','salary']
df_final = pd.get_dummies(df,columns=feats,drop_first=True)

## Step 2 — Separating Your Training and Testing Datasets

In [4]:
from sklearn.model_selection import train_test_split
x = df_final.drop(['left'], axis=1).values
y = df_final['left'].values

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

## Step 3 — Transforming the Data

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

## Step 4 — Building the Artificial Neural Network

In [7]:
from keras.models import Sequential
from keras.layers import Dense

In [8]:
classifier = Sequential()
classifier.add(Dense(9, kernel_initializer = "uniform",activation = "relu", input_dim=18))
classifier.add(Dense(1, kernel_initializer = "uniform",activation = "sigmoid"))
classifier.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"])
classifier.fit(x_train, y_train, batch_size = 10, epochs = 1)



<keras.src.callbacks.History at 0x28efd7650>

## Step 5 — Running Predictions on the Test Set

In [9]:
y_pred = classifier.predict(x_test)
y_pred = (y_pred > 0.5)



## Step 6 — Checking the Confusion Matrix

In [10]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[3332,  129],
       [ 650,  389]])

## Step 7 — Making a Single Prediction

In [11]:
new_pred = classifier.predict(sc.transform(np.array([[0.26,0.7 ,3., 238., 6., 0.,0.,0.,0., 0.,0.,0.,0.,0.,1.,0., 0.,1.]])))
new_pred = (new_pred > 0.5)
print(new_pred)

[[False]]


## Step 8 — Improving the Model Accuracy

In [12]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

ModuleNotFoundError: No module named 'keras.wrappers'

In [None]:
def make_classifier():
    classifier = Sequential()
    classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim=18))
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
    classifier.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"])
    return classifier

In [None]:
classifier = KerasClassifier(build_fn = make_classifier, batch_size=10, nb_epoch=1)
accuracies = cross_val_score(estimator = classifier,X= x_train,y = y_train,cv = 10,n_jobs = -1)


  classifier = KerasClassifier(build_fn = make_classifier, batch_size=10, nb_epoch=1)




In [None]:
mean = accuracies.mean()
mean

0.8354122400283813

In [None]:
variance = accuracies.var()
variance

0.0010205988129607135

## Step 9 - Adding Dropout Regularization to Fight Over-Fitting

In [None]:
from keras.layers import Dropout

classifier = Sequential()
classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim=18))
classifier.add(Dropout(rate = 0.1))
classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
classifier.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"])

## Step 10 — Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV
def make_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim=18))
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
    classifier.compile(optimizer= optimizer,loss = "binary_crossentropy",metrics = ["accuracy"])
    return classifier
classifier = KerasClassifier(build_fn = make_classifier)
params = {
    'batch_size':[20,35],
    'epochs':[2,3],
    'optimizer':['adam','rmsprop']
}
grid_search = GridSearchCV(estimator=classifier,
                           param_grid=params,
                           scoring="accuracy",
                           cv=2)
grid_search = grid_search.fit(x_train,y_train)


Epoch 1/2


  classifier = KerasClassifier(build_fn = make_classifier)


Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
best_param = grid_search.best_params_
best_accuracy = grid_search.best_score_
print(best_param)
print(best_accuracy)

{'batch_size': 20, 'epochs': 3, 'optimizer': 'adam'}
0.8436022825209337
