# Retention Prediction with Tensorflow
Ref : https://www.digitalocean.com/community/tutorials/how-to-build-a-deep-learning-model-to-predict-employee-retention-using-keras-and-tensorflow

In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("https://raw.githubusercontent.com/mwitiderrick/kerasDO/master/HR_comma_sep.csv")

In [2]:
df.rename(columns={"Work_accident": "work_accident", "salary": "salary_level"}, inplace=True)
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,work_accident,left,promotion_last_5years,department,salary_level
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [3]:
feats = ['department','salary_level']
df_final = pd.get_dummies(df,columns=feats,drop_first=True)

## Separating Train and Test Dataset

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X = df_final.drop(['left'],axis=1).values
y = df_final['left'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Train Model

In [22]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import Dropout
from tensorflow.python.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

def make_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim=18))
    classifier.add(Dropout(rate = 0.1))
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
    classifier.compile(optimizer= optimizer,loss = "binary_crossentropy",metrics = ["accuracy"])
    return classifier

In [10]:
classifier = KerasClassifier(build_fn = make_classifier)

In [11]:
params = {
    'batch_size':[20,35],
    'epochs':[2,3],
    'optimizer':['adam','rmsprop']
}

In [12]:
grid_search = GridSearchCV(estimator=classifier,
                           param_grid=params,
                           scoring="accuracy",
                           cv=10)

In [13]:
grid_search = grid_search.fit(X_train,y_train)

2022-06-05 14:50:07.550851: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/2
Epoch 2/2




Epoch 1/2
Epoch 2/2




Epoch 1/2
Epoch 2/2




Epoch 1/2
Epoch 2/2




Epoch 1/3
Epoch 2/3
Epoch 3/3




Epoch 1/3
Epoch 2/3
Epoch 3/3




Epoch 1/3
Epoch 2/3
Epoch 3/3




Epoch 1/3
Epoch 2/3
Epoch 3/3




Epoch 1/2
Epoch 2/2
Epoch 1/2




Epoch 2/2
Epoch 1/2




Epoch 2/2
Epoch 1/2




Epoch 2/2




Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3




Epoch 2/3
Epoch 3/3
Epoch 1/3




Epoch 2/3
Epoch 3/3
Epoch 1/3




Epoch 2/3
Epoch 3/3




Epoch 1/3
Epoch 2/3
Epoch 3/3


In [14]:
best_param = grid_search.best_params_
best_accuracy = grid_search.best_score_
print(best_param)
print(best_accuracy)

{'batch_size': 35, 'epochs': 3, 'optimizer': 'adam'}
0.8295085140933873


In [28]:
best_classifier = KerasClassifier(
    build_fn = make_classifier,
    batch_size=best_param.get('batch_size'),
    nb_epoch=best_param.get('epochs'),
    optimizer=best_param.get('optimizer')
    )

In [29]:
accuracies = cross_val_score(estimator = best_classifier,X = X_train,y = y_train,cv = 10,n_jobs = -1)

 - 0s 1ms/step - loss: 0.4358 - accuracy: 0.7762


In [30]:
mean = accuracies.mean()
variance = accuracies.var()
print(f"mean: {mean}; variance: {variance}")

mean: 0.7876938462257386; variance: 0.0005344077568081218


In [31]:
best_classifier.fit(X_train, y_train, batch_size = best_param.get('batch_size'), epochs = best_param.get('epochs'))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x158060460>

In [39]:
y_pred = (best_classifier.predict(X_test) > 0.5).astype("bool")
y_pred

array([[False],
       [False],
       [False],
       ...,
       [ True],
       [False],
       [False]])

# Confusion Matrix

In [36]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[3279,  171],
       [ 170,  880]])

In [44]:
new_pred = best_classifier.predict(sc.transform(np.array([[0.26,0.7 ,3., 238., 6., 0.,0.,0.,0., 0.,0.,0.,0.,0.,1.,0., 0.,1.]])))
new_pred = (new_pred > 0.5)
print(new_pred)

[[False]]




# Save and Load model

In [41]:
# Create model folder
!mkdir model/
# saving model
json_model = best_classifier.model.to_json()
open('model/retention_prediction_model.json', 'w').write(json_model)
# saving weights
best_classifier.model.save_weights('model/retention_prediction_weights.h5', overwrite=True)

mkdir: model/: File exists


In [42]:
# loading model
from tensorflow.python.keras.models import model_from_json

model = model_from_json(open('model/retention_prediction_model.json').read())
model.load_weights('model/retention_prediction_weights.h5')

# dont forget to compile your model
model.compile(loss='binary_crossentropy', optimizer='adam')

In [43]:
model_pred = model.predict(sc.transform(np.array([[0.26,0.7 ,3., 238., 6., 0.,0.,0.,0., 0.,0.,0.,0.,0.,1.,0., 0.,1.]])))
print(model_pred)

[[0.24392584]]
