In [4]:
from scipy.stats import percentileofscore
import numpy as np

In [5]:
def gen_cdf(mean, var, size=10000):
    """
    Inputs:
        mean (float): the mean of the gaussian distribution
        var (float): the variance of the gaussian distribution    
    Return:
        (np.ndarray): a sample from the gaussian distribution
        
    """
    return np.random.normal(mean, var, size)


def get_xi(f0, f1, mu_0, mu_1, fpr=0.8):
    """
    Inputs:
        f0, f1 (np.ndarray): samples from gaussian distributions
        mu_0, mu_1 (float): the magnitude of the shift for cdf
        fpr (float): the predefined false positive rate
        
    Return:
        xi (float): the calculated minus recall change   
    """
    # Set up distribution
    f0_ = f0 + mu_0
    f1_ = f1 + mu_1

    # Get threshold
    t_q = np.quantile(f0, fpr)
    t_tilde = np.quantile(f0_, fpr)

    # Get fnr
    q = percentileofscore(f1, t_tilde) / 100
    q_star = percentileofscore(f1_, t_tilde) / 100

    # Get xi
    xi = q - q_star
    
    return xi


def get_xi_approx(f0, f1, mu_0, mu_1, fpr=0.8):
    """
    Inputs:
        f0, f1 (np.ndarray): samples from gaussian distributions
        mu_0, mu_1 (float): the magnitude of the shift for cdf
        fpr (float): the predefined false positive rate
        
    Return:
        xi_approx (float): the calculated minus recall change by algorithm   
    """
    # Set up distribution
    f0_ = f0 + mu_0
    f1_ = f1 + mu_1

    # Get threshold
    t_q = np.quantile(f0, fpr)

    # Get recall change
    xi_approx = (percentileofscore(f1, t_q + mu_0) - percentileofscore(f1, t_q + mu_0 - mu_1)) / 100
    
    return xi_approx

In [18]:
f0 = gen_cdf(0.2, 1)
f1 = gen_cdf(0.4, 1.4)

mu_0 = 0.1
mu_1 = 0.2

xi = get_xi(f0, f1, mu_0, mu_1)
xi_approx = get_xi_approx(f0, f1, mu_0, mu_1)
print(f'xi: {round(xi, 3)}\nxi_approx: {round(xi_approx, 3)}')

xi: 0.049
xi_approx: 0.049


In [19]:
f0 = gen_cdf(0.2, 1)
f1 = gen_cdf(0.4, 1.4)

mu_0 = 0.1
mu_1 = 0.1

xi = get_xi(f0, f1, mu_0, mu_1)
xi_approx = get_xi_approx(f0, f1, mu_0, mu_1)
print(f'xi: {round(xi, 3)}\nxi_approx: {round(xi_approx, 3)}')

xi: 0.021
xi_approx: 0.021


In [21]:
f0 = gen_cdf(0.2, 1)
f1 = gen_cdf(0.4, 1.4)

mu_0 = 0.1
mu_1 = 0

xi = get_xi(f0, f1, mu_0, mu_1)
xi_approx = get_xi_approx(f0, f1, mu_0, mu_1)
print(f'xi: {round(xi, 3)}\nxi_approx: {round(xi_approx, 3)}')
print('xi will be 0 when mu_1 = 0!')

xi: 0.0
xi_approx: 0.0
xi will be 0 when mu_1 = 0!
