In [10]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames (label our checkpoints by epoch number)
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [2]:
# Import our input dataset
attrition_df = pd.read_csv('Resources/HR-Employee-Attrition.csv')
attrition_df


Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,4,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,1,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,36,No,Travel_Frequently,884,Research & Development,23,2,Medical,1,2061,...,3,80,1,17,3,3,5,2,0,3
1466,39,No,Travel_Rarely,613,Research & Development,6,1,Medical,1,2062,...,1,80,1,9,5,3,7,7,1,7
1467,27,No,Travel_Rarely,155,Research & Development,4,3,Life Sciences,1,2064,...,2,80,1,6,0,3,6,2,0,3
1468,49,No,Travel_Frequently,1023,Sales,2,3,Medical,1,2065,...,4,80,0,17,3,2,9,6,0,8


In [3]:
#Generate a categorical data list
categorical_list = attrition_df.dtypes[attrition_df.dtypes == "object"].index.tolist()
categorical_list

['Attrition',
 'BusinessTravel',
 'Department',
 'EducationField',
 'Gender',
 'JobRole',
 'MaritalStatus',
 'Over18',
 'OverTime']

In [4]:
# Before we loop through our variables and encode them using Scikit-learn's OneHotEncoder module, we need to make sure that none of the categorical variables have more than 10 unique values and require bucketing
for col in categorical_list:
    print(col)
    print(attrition_df[col].value_counts())
    print("----")

attrition_df[categorical_list].nunique()

# None of the columns have more than 10 unique values, so we can do one-hot encoding with all these columns without bucketing first


Attrition
No     1233
Yes     237
Name: Attrition, dtype: int64
----
BusinessTravel
Travel_Rarely        1043
Travel_Frequently     277
Non-Travel            150
Name: BusinessTravel, dtype: int64
----
Department
Research & Development    961
Sales                     446
Human Resources            63
Name: Department, dtype: int64
----
EducationField
Life Sciences       606
Medical             464
Marketing           159
Technical Degree    132
Other                82
Human Resources      27
Name: EducationField, dtype: int64
----
Gender
Male      882
Female    588
Name: Gender, dtype: int64
----
JobRole
Sales Executive              326
Research Scientist           292
Laboratory Technician        259
Manufacturing Director       145
Healthcare Representative    131
Manager                      102
Sales Representative          83
Research Director             80
Human Resources               52
Name: JobRole, dtype: int64
----
MaritalStatus
Married     673
Single      470
Divorced   

Attrition         2
BusinessTravel    3
Department        3
EducationField    6
Gender            2
JobRole           9
MaritalStatus     3
Over18            1
OverTime          2
dtype: int64

In [5]:
# Create instance of one-hot encoder
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(attrition_df[categorical_list]))
encode_df

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names_out(categorical_list)
encode_df

Unnamed: 0,Attrition_No,Attrition_Yes,BusinessTravel_Non-Travel,BusinessTravel_Travel_Frequently,BusinessTravel_Travel_Rarely,Department_Human Resources,Department_Research & Development,Department_Sales,EducationField_Human Resources,EducationField_Life Sciences,...,JobRole_Research Director,JobRole_Research Scientist,JobRole_Sales Executive,JobRole_Sales Representative,MaritalStatus_Divorced,MaritalStatus_Married,MaritalStatus_Single,Over18_Y,OverTime_No,OverTime_Yes
0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
1,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
2,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
3,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
4,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
1466,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
1467,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
1468,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0


In [6]:
# Merge two datasets and drop columns that we one-hot encoded
combined_df = attrition_df.join(encode_df).drop(columns = categorical_list)
len(combined_df.columns)

57

In [7]:
# Split data into training and testing groups (do this before scaling data to prevent testing data from influencing the standardization function.)
# building a model that will predict whether or not a person is at risk for attrition; therefore, we must separate the "Attrition" columns from the rest of the input data. 
# because the attrition data is dichotomous (one of two values), we only need to keep the "Attrition_Yes" column—we can ignore the "Attrition_No" column because it is redundant

y = combined_df["Attrition_Yes"].values
X = combined_df.drop(columns=["Attrition_Yes", "Attrition_No"]).values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)



In [8]:
# Create a StandardScaler instance
X_scaled = StandardScaler().fit(X_train)

X_train_scaled = X_scaled.transform(X_train)
X_test_scaled = X_scaled.transform(X_test)

print(len(X_train_scaled))
print(len(X_test_scaled))


1102
368


In [9]:
# Making the model

# input layer: add the number of input features equal to the number of variables in our feature DataFrame
# 2 hidden layers: only a few neurons in each layer. Create two Keras Dense classes (one per hidden layer). Both hidden layers will use the relu activation function to identify nonlinear characteristics from the input values
# output layer: use the same parameters from our basic neural network including the sigmoid activation function (because attrition or not attrition is two outcomes). The sigmoid activation function will help us predict the probability that an employee is at risk for attrition.

# Define deep neural model (8 neurons in first layer, 5 in second, for 45 total weight parameters)
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer_1 = 8 
hidden_nodes_layer_2 = 5 

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = hidden_nodes_layer_1, input_dim = number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = hidden_nodes_layer_2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation="sigmoid"))

# Check structure of model
nn.summary()

# n params explained:
# 55 input variables * 8 neurons = 440 + 8 parameters for bias terms  = 448 8 
# 8 neurons with input * 5 new neurons = 40 + 5 parameters for bias terms = 45
# 5 neurons with input * 1 place the input is going = 5 + 1 bias term parameter = 6


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 8)                 448       
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 45        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 6         
Total params: 499
Trainable params: 499
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Compile the model (using binarycrossentropy because we want to use our model as a binary classifier, and accuracy is good for binary classifiers too- use another metric for continuous predictions)
nn.compile(loss = "binary_crossentropy", optimizer="adam", metrics=["accuracy"])

#A callback object is used in the Keras module to define a set of functions that will be applied at specific stages of the training process. 
#There are a number of different callback functions available that can create log files, force training to stop, send training status messages, or in our case save model checkpoints

# Create a checkpoint callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(filepath=checkpoint_path, #checkpoint directory and file structure we defined previously
                             verbose=1, #we'll be notified when a checkpoint is being saved to the directory
                             save_weights_only=True, #saving the full model each time can fill up a hard drive very quickly; this ensures that the checkpoint files take up minimal space
                             save_freq="epoch") #checkpoints will be saved every epoch

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100, callbacks=[cp_callback])

# Evaluate the model
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# the model was able to correctly identify employees who are at risk of attrition approximately 83% of the time, according to accuracy 



Train on 1102 samples
Epoch 1/100
  32/1102 [..............................] - ETA: 8s - loss: 0.0895 - acc: 0.9688
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 2/100
  32/1102 [..............................] - ETA: 0s - loss: 0.0847 - acc: 1.0000
Epoch 00002: saving model to checkpoints/weights.02.hdf5
Epoch 3/100
Epoch 00003: saving model to checkpoints/weights.03.hdf5
Epoch 4/100
  32/1102 [..............................] - ETA: 0s - loss: 0.1282 - acc: 0.9688
Epoch 00004: saving model to checkpoints/weights.04.hdf5
Epoch 5/100
  32/1102 [..............................] - ETA: 0s - loss: 0.1307 - acc: 0.9375
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/100
  32/1102 [..............................] - ETA: 0s - loss: 0.1046 - acc: 0.9688
Epoch 00006: saving model to checkpoints/weights.06.hdf5
Epoch 7/100
  32/1102 [..............................] - ETA: 0s - loss: 0.1610 - acc: 0.9688
Epoch 00007: saving model to checkpoints/weights.07.hdf5
Epo

Epoch 35/100
  32/1102 [..............................] - ETA: 0s - loss: 0.1405 - acc: 0.9375
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/100
  32/1102 [..............................] - ETA: 0s - loss: 0.1419 - acc: 0.9375
Epoch 00036: saving model to checkpoints/weights.36.hdf5
Epoch 37/100
  32/1102 [..............................] - ETA: 0s - loss: 0.0759 - acc: 0.9688
Epoch 00037: saving model to checkpoints/weights.37.hdf5
Epoch 38/100
  32/1102 [..............................] - ETA: 0s - loss: 0.1293 - acc: 0.9688
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 39/100
  32/1102 [..............................] - ETA: 0s - loss: 0.0502 - acc: 1.0000
Epoch 00039: saving model to checkpoints/weights.39.hdf5
Epoch 40/100
  32/1102 [..............................] - ETA: 0s - loss: 0.2695 - acc: 0.9375
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/100
  32/1102 [..............................] - ETA: 0s - loss: 0.2651 - acc: 

Epoch 69/100
  32/1102 [..............................] - ETA: 0s - loss: 0.2175 - acc: 0.9375
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 70/100
  32/1102 [..............................] - ETA: 0s - loss: 0.2334 - acc: 0.8750
Epoch 00070: saving model to checkpoints/weights.70.hdf5
Epoch 71/100
  32/1102 [..............................] - ETA: 0s - loss: 0.0280 - acc: 1.0000
Epoch 00071: saving model to checkpoints/weights.71.hdf5
Epoch 72/100
  32/1102 [..............................] - ETA: 0s - loss: 0.0645 - acc: 0.9688
Epoch 00072: saving model to checkpoints/weights.72.hdf5
Epoch 73/100
  32/1102 [..............................] - ETA: 0s - loss: 0.0584 - acc: 0.9688
Epoch 00073: saving model to checkpoints/weights.73.hdf5
Epoch 74/100
  32/1102 [..............................] - ETA: 0s - loss: 0.0981 - acc: 0.9688
Epoch 00074: saving model to checkpoints/weights.74.hdf5
Epoch 75/100
  32/1102 [..............................] - ETA: 0s - loss: 0.0917 - acc: 

In [14]:
# if we ever need to restore weights, we can use the Keras Sequential model's load_weights method to restore the model weights so we don't have to retrain the model again
# testing this functionality: define another deep learning model, but restore the weights using the checkpoints rather than training the model

# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 5

nn_new = tf.keras.models.Sequential()

# First hidden layer
nn_new.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn_new.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn_new.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model
nn_new.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Restore the model weights for epoch 100 instead of training the model
nn_new.load_weights("checkpoints/weights.100.hdf5")

# Evaluate the model using the test data
model_loss, model_accuracy = nn_new.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


368/368 - 0s - loss: 0.7946 - acc: 0.8505
Loss: 0.7946283027691685, Accuracy: 0.8505434989929199


### Using the checkpoints, we were able to regenerate the model instantaneously and confirm the model was able to produce the exact same results.

###Checkpoints are a great way to save model weights during training, but they fall short when it comes to sharing a trained model. In data science, trained models are published in scientific papers, deployed in software, open-sourced on GitHub, not to mention passed along to colleagues. In these cases, it is not practical to pass along only model weights, which can cause frustration and confusion. Instead, we can use the Keras Sequential model's save method to export the entire model (weights, structure, and configuration settings) to an Hierarchical Data Format (HDF5) file. Once saved, anyone can import the exact same trained model to their environment by using the Keras load_model method and use it for analysis.

###Even though we can save full neural network and deep learning models using Keras checkpoints, each full model file is almost ten times the size of a weight-only file. For those with limited hard drive space, saving full models using checkpoints is not feasible.


In [15]:
# Export our model to HDF5 file (saves complete model configuration)
nn_new.save("trained_attrition.h5")

In [17]:
# Import the model to a new object
nn_imported = tf.keras.models.load_model("trained_attrition.h5")

# Evaluate the completed imported model using the test data
model_loss, model_accuracy = nn_imported.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

#This model still has the same .79 loss and .85 accuracy

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


368/368 - 0s - loss: 0.7946 - acc: 0.8505
Loss: 0.7946283027691685, Accuracy: 0.8505434989929199
