In [108]:
import numpy as np
import pandas as pd

def GAP_vector(pred, conf, true, return_x=False):
    '''
    Compute Global Average Precision
    metric used in the  Google Landmark Recognition competition. 
    
    This function takes predictions, labels and confidence scores as vectors.
    In both predictions and ground-truth, use None/np.nan for "no label".

    Args:
        pred: vector of integer-coded predictions
        conf: vector of probability or confidence scores for pred
        true: vector of integer-coded labels for ground truth
        return_x: also return the data frame used in the calculation

    Returns:
        GAP score
    '''
    x = pd.DataFrame({'pred': pred, 'conf': conf, 'true': true})
    x.sort_values('conf', ascending=False, inplace=True, na_position='last')
    x['correct'] = (x.true == x.pred).astype(int)
    x['prec_k']  = x.correct.cumsum() / (np.arange(len(x)) + 1)
    x['term']    = x.prec_k * x.correct
    gap = x.term.sum() / x.true.count()
    if return_x:
        return gap, x
    else:
        return gap

In [109]:
# Generate some random predictions on 3 classes
np.random.seed(1234)
n_classes = 100
n_samples = 10000

#ypred = np.random.choice([1,2,3], 1000)
#ytrue = np.random.choice([1,2,3], 1000)
ypred = np.random.randint(0,n_classes,n_samples)
ytrue = np.random.randint(0,n_classes,n_samples)
conf = np.random.random(n_samples)

gap, x = GAP_vector(ypred, conf, ytrue, True)
gap

0.00011478827440959902

In [112]:
np.mean(ytrue==ypred)

0.0103

## GAP and Accuracy

Let us consider a model with 95 % accuracy. That is, from every 100 predictions 95 are correct. Let us further assume that all correct predictions are predicted with prob 0.95 and all incorrect predictions are done with prob 0.5.

Make some simulations with GAP.

In [102]:
conf 

array([0.57213483, 0.55061015, 0.86021271, ..., 0.39534697, 0.35008443,
       0.29125641])

In [113]:
np.arange(len(x)) 

array([   0,    1,    2, ..., 9997, 9998, 9999])

## Do we benefit com calibrated models?