### Deep Learning Model to Predict Employee Retention Using Keras and TensorFlow

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/mwitiderrick/kerasDO/master/HR_comma_sep.csv")

In [3]:
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,department,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [5]:
df.shape

(14999, 10)

In [6]:
feats = ['department','salary']
df_final=pd.get_dummies(df,columns=feats,drop_first=True)

In [7]:
df_final.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,department_RandD,department_accounting,department_hr,department_management,department_marketing,department_product_mng,department_sales,department_support,department_technical,salary_low,salary_medium
0,0.38,0.53,2,157,3,0,1,0,0,0,0,0,0,0,1,0,0,1,0
1,0.8,0.86,5,262,6,0,1,0,0,0,0,0,0,0,1,0,0,0,1
2,0.11,0.88,7,272,4,0,1,0,0,0,0,0,0,0,1,0,0,0,1
3,0.72,0.87,5,223,5,0,1,0,0,0,0,0,0,0,1,0,0,1,0
4,0.37,0.52,2,159,3,0,1,0,0,0,0,0,0,0,1,0,0,1,0


In [8]:
df_final.shape

(14999, 19)

In [17]:
df_final.isnull().sum()

satisfaction_level        0
last_evaluation           0
number_project            0
average_montly_hours      0
time_spend_company        0
Work_accident             0
left                      0
promotion_last_5years     0
department_RandD          0
department_accounting     0
department_hr             0
department_management     0
department_marketing      0
department_product_mng    0
department_sales          0
department_support        0
department_technical      0
salary_low                0
salary_medium             0
dtype: int64

In [10]:
from sklearn.model_selection import train_test_split

In [14]:
X = df_final.drop(columns=['left']).values
y = df_final['left'].values

In [24]:
X_train.shape

(10499, 18)

In [16]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)

In [18]:
from sklearn.preprocessing import StandardScaler

In [19]:
sc = StandardScaler()

In [20]:
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [22]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [29]:
classifier = Sequential()

In [30]:
classifier.add(Dense(9,kernel_initializer='uniform',activation='relu',input_shape=(18,)))

In [31]:
classifier.add(Dense(1,kernel_initializer='uniform',activation='sigmoid'))

In [32]:
classifier.compile(optimizer="adam",loss='binary_crossentropy',metrics=['accuracy'])

In [33]:
classifier.fit(X_train,y_train,batch_size=10,epochs=1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/1


<keras.callbacks.History at 0x20a9ba59708>

In [38]:
y_pred = classifier.predict(X_test)

In [39]:
y_pred

array([[0.09183484],
       [0.0776526 ],
       [0.04289815],
       ...,
       [0.14750811],
       [0.5193461 ],
       [0.42508936]], dtype=float32)

In [40]:
y_pred = (y_pred > 0.5)

In [41]:
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [ True],
       [False]])

In [42]:
from sklearn.metrics import confusion_matrix

In [43]:
cm = confusion_matrix(y_test,y_pred)

In [44]:
cm

array([[3294,  145],
       [ 651,  410]], dtype=int64)

In [45]:
(3294+410)/(3294+145+651+410)

0.8231111111111111

In [49]:
single_employee = np.array([[0.26,0.7 ,3., 238., 6., 0.,0.,0.,0., 0.,0.,0.,0.,0.,1.,0., 0.,1.]])

In [50]:
single_employee.shape

(1, 18)

In [51]:
single_employee_scaled = sc.transform(single_employee)
single_employee_scaled

array([[-1.42328837, -0.09614083, -0.64809305,  0.73993608,  1.7173031 ,
        -0.4100159 , -0.14999026, -0.23376464, -0.23262946, -0.22896686,
        -0.20988878, -0.25180398, -0.25073288, -0.61570155,  2.3839053 ,
        -0.46900194, -0.97823471,  1.15437981]])

In [55]:
new_pred = classifier.predict(single_employee_scaled)

In [56]:
new_pred

array([[0.4448171]], dtype=float32)

In [57]:
new_pred_50 = (new_pred>0.5)
new_pred_50

array([[False]])

In [58]:
new_pred_60 = (new_pred > 0.6)
new_pred_60

array([[False]])

### Cross Validation

In [59]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

In [60]:
def make_classifier():
    classifier = Sequential()
    classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim=18))
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
    classifier.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"])
    return classifier

In [62]:
classifier = KerasClassifier(build_fn=make_classifier,batch_size=10,epochs=1)

In [63]:
accuracies = cross_val_score(estimator=classifier,X=X_train,y=y_train,cv=10,n_jobs=-1)

In [64]:
mean = accuracies.mean()

In [65]:
mean

0.8446488665541148

In [67]:
variance = accuracies.var()
variance

0.002677662020901506

### Dropout

In [68]:
from keras.layers import Dropout

In [69]:
classifier = Sequential()
classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim=18))
classifier.add(Dropout(rate = 0.1))
classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
classifier.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"])

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


### Hyperparameter Tuning

In [70]:
from sklearn.model_selection import GridSearchCV

In [71]:
def make_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim=18))
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
    classifier.compile(optimizer= optimizer,loss = "binary_crossentropy",metrics = ["accuracy"])
    return classifier

In [72]:
classifier = KerasClassifier(build_fn=make_classifier)

In [73]:
params ={'batch_size':[20,35],'epochs':[2,3],'optimizer':['adam','rmsprop']}

In [74]:
grid_search =GridSearchCV(estimator=classifier,param_grid=params,scoring='accuracy',cv=2)

In [75]:
grid_search = grid_search.fit(X_train,y_train)

Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [77]:
best_params = grid_search.best_params_
best_params

{'batch_size': 35, 'epochs': 3, 'optimizer': 'adam'}

In [78]:
best_accuracy = grid_search.best_score_
best_accuracy

0.8576054862367845