In [None]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
hr_df = pd.read_csv("HR_Employee_Attrition_Data.csv")

In [None]:
hr_df.head()

In [None]:
hr_df.describe()

In [None]:
hr_df.info()

In [None]:
hr_df.drop(['EmployeeCount', 'EmployeeNumber'],axis=1,inplace=True)

In [None]:
hr_df.head()

In [None]:
hr_df['Attrition'].sum()

In [None]:
X = hr_df.drop("Attrition" , axis=1)

y = hr_df.pop("Attrition")

In [None]:
# splitting data into training and test set for independent attributes
from sklearn.model_selection import train_test_split

X_train, X_test, train_labels, test_labels = train_test_split(X, y, test_size=.30, random_state=1)

## Decision Tree Classifier

In [None]:
dt_model = DecisionTreeClassifier(criterion = 'gini' )

In [None]:
dt_model.fit(X_train, train_labels)

In [None]:
from sklearn import tree

train_char_label = ['No', 'Yes']
HR_Tree_File = open('d:\hr_tree.dot','w')
dot_data = tree.export_graphviz(dt_model, out_file=HR_Tree_File, feature_names = list(X_train), class_names = list(train_char_label))

HR_Tree_File.close()

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'max_depth': [7, 8, 9, 10],
    'min_samples_leaf': [15, 20, 25],
    'min_samples_split': [45, 60, 75]
}

dt_model = DecisionTreeClassifier()

grid_search = GridSearchCV(estimator = dt_model, param_grid = param_grid, cv = 3)

In [None]:
grid_search.fit(X_train, train_labels)

In [None]:
grid_search.best_params_

In [None]:
best_grid = grid_search.best_estimator_

In [None]:
ytrain_predict = best_grid.predict(X_train)
ytest_predict = best_grid.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(train_labels,ytrain_predict))

In [None]:
print(classification_report(test_labels,ytest_predict))

## Random Forest Classifier

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'max_depth': [7, 8],
    'max_features': [11, 12, 13],
    'min_samples_leaf': [20, 25],
    'min_samples_split': [60, 75],
    'n_estimators': [101, 301]
}

rfcl = RandomForestClassifier()

grid_search = GridSearchCV(estimator = rfcl, param_grid = param_grid, cv = 3)



In [None]:
grid_search.fit(X_train, train_labels)

In [None]:
grid_search.best_params_

In [None]:
best_grid = grid_search.best_estimator_

In [None]:
ytrain_predict = best_grid.predict(X_train)
ytest_predict = best_grid.predict(X_test)

In [None]:
print(classification_report(train_labels,ytrain_predict))

In [None]:
print(classification_report(test_labels,ytest_predict))

## MLP Classifier (Artificial Neural Network)

In [29]:
from sklearn.preprocessing import StandardScaler 
sc = StandardScaler() 
X_trains = sc.fit_transform(X_train) 
X_tests = sc.transform (X_test)

In [30]:
param_grid = {
    'hidden_layer_sizes': [(100,100,100)],
    'activation': ['logistic', 'relu'],
    'solver': ['sgd', 'adam'],
    'tol': [0.1,0.01],
    'max_iter' : [10000]
}

rfcl = MLPClassifier()

grid_search = GridSearchCV(estimator = rfcl, param_grid = param_grid, cv = 3)

In [31]:
grid_search.fit(X_trains, train_labels)

GridSearchCV(cv=3, estimator=MLPClassifier(),
             param_grid={'activation': ['logistic', 'relu'],
                         'hidden_layer_sizes': [(100, 100, 100)],
                         'max_iter': [10000], 'solver': ['sgd', 'adam'],
                         'tol': [0.1, 0.01]})

In [32]:
grid_search.best_params_

{'activation': 'relu',
 'hidden_layer_sizes': (100, 100, 100),
 'max_iter': 10000,
 'solver': 'adam',
 'tol': 0.01}

In [33]:
best_grid = grid_search.best_estimator_

In [34]:
ytrain_predict = best_grid.predict(X_trains)
ytest_predict = best_grid.predict(X_tests)

In [35]:
print(classification_report(train_labels,ytrain_predict))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1737
           1       1.00      1.00      1.00       321

    accuracy                           1.00      2058
   macro avg       1.00      1.00      1.00      2058
weighted avg       1.00      1.00      1.00      2058



In [36]:
print(classification_report(test_labels,ytest_predict))

              precision    recall  f1-score   support

           0       0.95      0.98      0.96       729
           1       0.87      0.75      0.81       153

    accuracy                           0.94       882
   macro avg       0.91      0.86      0.88       882
weighted avg       0.94      0.94      0.94       882

