In [4]:
import numpy as np
from numba import njit
from scipy.stats import rankdata
from roc_auc import roc_auc_score as fast_roc_auc_score
from sklearn.metrics import roc_auc_score as skl_roc_auc_score


@njit
def _auc(y_true, pred_ranks):
    """numba jit ROC AUC function

    Args:
        y_true (np.array): 真实标签, shape=(N, )
        pred_ranks (np.array): 预测值的排名, shape=(N, )

    Returns:
        float: roc auc value
    """
    n_pos = np.sum(y_true)
    n_neg = len(y_true) - n_pos
    return (np.sum(pred_ranks[y_true == 1]) - n_pos*(n_pos+1)/2) / (n_pos*n_neg)


def numba_roc_auc_score(y_true, y_pred):
    """ROC AUC function warper
    首先计算出prediction的rank，因为numba不能jit求rank
    Args:
        y_true (np.array): 真实标签, shape=(N, )
        y_pred (np.array): 预测值, shape=(N, )

    Returns:
        float: roc auc value
    """
    pred_ranks = rankdata(y_pred)
    return _auc(y_true, pred_ranks)

In [2]:
num_data = 100000
labels = np.random.randint(0, 2, (num_data, )).astype(np.float64)
preds = np.random.rand(num_data)

In [14]:
%timeit skl_roc_auc_score(labels, preds)
skl_roc_auc_score(labels, preds)

31 ms ± 337 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


0.4989452229489488

In [15]:
%timeit numba_roc_auc_score(labels, preds)
numba_roc_auc_score(labels, preds)

12.7 ms ± 185 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


0.4989452229489488

In [13]:
%timeit fast_roc_auc_score(labels, preds)
fast_roc_auc_score(labels, preds)

4.76 ms ± 55.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


0.4989452229489488