In [1]:
import numpy as np
from sklearn import metrics

In [5]:
def compute_auc(v_truth, v_preds):

    def _reject_auc(ground_truth):
        # Reject if all positives (1) or all negatives (0).
        n_pos = ground_truth.sum()
        return (n_pos == 0 or n_pos == np.size(ground_truth))

    if _reject_auc(v_truth):
        return float('nan')
    """Compute the area under the ROC from the ground-truth `v_truth` and
     the predictions `v_preds`."""
    fpr, tpr, _ = metrics.roc_curve(v_truth.flatten(),
                                    v_preds.flatten(),
                                    pos_label=1)
    return metrics.auc(fpr, tpr)



def compute_auc2(y_true, y_prob):
    y_true = np.asarray(y_true)
    y_true = y_true[np.argsort(y_prob)]
    nfalse = 0
    auc = 0
    n = len(y_true)
    for i in range(n):
        y_i = y_true[i]
        nfalse += (1 - y_i)
        auc += y_i * nfalse
    auc /= (nfalse * (n - nfalse))
    return auc

def nan_compute_auc(v_truth, v_preds):
    isnan = np.isnan(v_truth) | np.isnan(v_preds)
    v_truth = v_truth[~isnan]
    v_preds = v_preds[~isnan]
    npos = v_truth.sum()
    nneg = np.size(v_truth) - npos
    return (npos, nneg), compute_auc(v_truth, v_preds)

def nan_compute_auc2(v_truth, v_preds):
    isnan = np.isnan(v_truth) | np.isnan(v_preds)
    v_truth = v_truth[~isnan]
    v_preds = v_preds[~isnan]
    npos = v_truth.sum()
    nneg = np.size(v_truth) - npos
    return (npos, nneg), compute_auc2(v_truth, v_preds)



In [11]:
np.random.seed(0)
gt = np.random.binomial(1, p=0.5, size=10**8)
pr_l = np.random.uniform(low=0.2, high=0.7, size=10**8)
pr_h = np.random.uniform(low=0.4, high=0.9, size=10**8)
pr = np.where(gt, pr_h, pr_l)

In [9]:
auc1 = nan_compute_auc(gt, pr)

In [10]:
auc1

((5001790, 4998210), 0.8198338020120195)

In [12]:
auc2 = nan_compute_auc2(gt, pr)

In [13]:
auc2

((49999058, 50000942), 0.8200049047264956)