In [None]:
import numpy as np
from scipy.stats import mode

def get_baseline(y):
    """Percentage of most frequent value in array"""
    most_freq = mode(y)[0][0]
    return (y == most_freq).sum() / y.shape[0]

baseline = [get_baseline(y_train), get_baseline(y_test), get_baseline(y_val)]

def evaluate_classifier(clf, X_train, X_test, X_val):
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    y_pred_val = clf.predict(X_val)

    y_prob_train = clf.predict_proba(X_train)[:, 1]
    fpr_train, tpr_train, _ = roc_curve(y_train, y_prob_train)

    y_prob_test = clf.predict_proba(X_test)[:, 1]
    fpr_test, tpr_test, _ = roc_curve(y_test, y_prob_test)

    y_prob_val = clf.predict_proba(X_val)[:, 1]
    fpr_val, tpr_val, _ = roc_curve(y_val, y_prob_val)

    # Evaluate accuracy
    accuracy = [accuracy_score(y_train, y_pred_train), 
                accuracy_score(y_test, y_pred_test), 
                accuracy_score(y_val, y_pred_val)
               ]

    # F1 score
    f1 = [f1_score(y_train, y_pred_train, average='macro'), 
          f1_score(y_test, y_pred_test, average='macro'), 
          f1_score(y_val, y_pred_val, average='macro')
         ]

    # AUC 
    auc = [roc_auc_score(y_train, y_prob_train),
          roc_auc_score(y_test, y_prob_test),
          roc_auc_score(y_val, y_prob_val)
          ]

    # ROC CURVE
    plt.figure(1)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr_train, tpr_train, label='train')
    plt.plot(fpr_test, tpr_test, label='test')
    plt.plot(fpr_val, tpr_val, label='validation')
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve for'+ str(clf.__class__).split('.')[-1])
    plt.legend(loc='best')
    return pd.DataFrame([baseline, accuracy, f1, auc], index=['Baseline', 'Accuracy', 'F1', 'AUC'], columns=['Train', 'Test', 'Validation'])
