In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report

In [5]:
hr = pd.read_csv('https://raw.githubusercontent.com/skathirmani/datasets/master/HR%20Analytics.csv')
hr_dummies = pd.get_dummies(hr)


train, test = train_test_split(hr_dummies,
                               test_size=0.3,
                               random_state=100)

In [48]:
train_x = train.drop('Attrition', axis=1)
train_y = train['Attrition']

test_x = test.drop('Attrition', axis=1)
test_y = test['Attrition']

model = AdaBoostClassifier(random_state=100, n_estimators=300)
model.fit(train_x, train_y)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=300, random_state=100)

In [49]:
pred_test = model.predict(test_x)
print(accuracy_score(test_y, pred_test))
print(classification_report(test_y, pred_test))

0.8594104308390023
             precision    recall  f1-score   support

          0       0.90      0.94      0.92       371
          1       0.57      0.44      0.50        70

avg / total       0.85      0.86      0.85       441



In [15]:
def draw_tree(model, columns):
    import pydotplus
    from sklearn.externals.six import StringIO
    from IPython.display import Image
    import os
    from sklearn import tree
    
    graphviz_path = 'C:\Program Files (x86)\Graphviz2.38/bin/'
    os.environ["PATH"] += os.pathsep + graphviz_path

    dot_data = StringIO()
    tree.export_graphviz(model,
                         out_file=dot_data,
                         feature_names=columns)
    graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
    return Image(graph.create_png())

In [43]:
draw_tree(model.estimators_[250], train_x.columns)
model.estimator_errors_[:5]

array([0.16229349, 0.34244608, 0.36434203, 0.40214249, 0.40601838])

## Sensitivity & Specificity

In [53]:
pred_probs = pd.DataFrame(model.predict_proba(test_x),
                          columns=['Neg', 'Pos'])
pred_test = pred_probs['Pos'].apply(lambda v: 1 if v>0.5 else 0)
#print(classification_report(test_y, pred_test))


from sklearn.metrics import confusion_matrix
tn, fp, fn, tp = confusion_matrix(test_y, pred_test).ravel()
sensitivity = (tp)/ (tp+fn)
specificity = tn / (tn+fp)
print(sensitivity, specificity)

0.44285714285714284 0.9380053908355795


In [73]:
pred_test = pred_probs['Pos'].apply(lambda v: 1 if v>0.4 else 0)
tn, fp, fn, tp = confusion_matrix(test_y, pred_test).ravel()
sensitivity = (tp)/ (tp+fn)
specificity = tn / (tn+fp)
print(sensitivity, specificity)

1.0 0.0
