In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

In [2]:
np.random.seed(seed=666)

# Create some example data

In [3]:
size = 1000

In [4]:
df = pd.DataFrame(data = {'actual': np.random.binomial(1, 0.5, size)})

### Random predictions

In [5]:
df['p_rand'] = np.random.uniform(0, 1, size)

### "Good" predictions

In [6]:
def goodify_pred(actual, p_rand):
    if actual == 1:
        p_good = min(p_rand * 1.5, 1.)
    else:
        p_good = p_rand / 1.5
    return p_good

goodify_pred = np.vectorize(goodify_pred)

In [7]:
df['p_good'] = goodify_pred(df.actual, df.p_rand)

### "Super" predictions

In [8]:
def superfy_pred(actual, p_rand):
    if actual == 1:
        p_super = min(p_rand * 5, 1.)
    else:
        p_super = p_rand / 5.
    return p_super

superfy_pred = np.vectorize(superfy_pred)

In [9]:
df['p_super'] = superfy_pred(df.actual, df.p_rand)

In [10]:
df[:10]

Unnamed: 0,actual,p_rand,p_good,p_super
0,1,0.645437,0.968155,1.0
1,1,0.564611,0.846916,1.0
2,1,0.753532,1.0,1.0
3,1,0.009851,0.014777,0.049257
4,1,0.66263,0.993945,1.0
5,0,0.227465,0.151643,0.045493
6,0,0.259312,0.172874,0.051862
7,0,0.968816,0.645877,0.193763
8,0,0.486736,0.324491,0.097347
9,1,0.649122,0.973682,1.0


In [11]:
df.dtypes

actual       int32
p_rand     float64
p_good     float64
p_super    float64
dtype: object

# Check AUC interpretation

In [12]:
n_simuls = 1000

### Random predictions

In [13]:
s_rand = []

In [14]:
for i in range(n_simuls):
    random_1 = df[df.actual == 1].sample(n=1).iloc[0]['p_rand']
    random_0 = df[df.actual == 0].sample(n=1).iloc[0]['p_rand']
    
    if random_1 > random_0:
        s_rand.append(1)
    else:
        s_rand.append(0)

In [15]:
sum(s_rand) / n_simuls

0.521

In [16]:
roc_auc_score(df.actual, df.p_rand)

0.5034980437385619

### "Good" predictions

In [17]:
s_good = []

In [18]:
for i in range(n_simuls):
    random_1 = df[df.actual == 1].sample(n=1).iloc[0]['p_good']
    random_0 = df[df.actual == 0].sample(n=1).iloc[0]['p_good']
    
    if random_1 > random_0:
        s_good.append(1)
    else:
        s_good.append(0)

In [19]:
sum(s_good) / n_simuls

0.762

In [20]:
roc_auc_score(df.actual, df.p_good)

0.7776469598856278

### "Super" predictions

In [21]:
s_super = []

In [22]:
for i in range(n_simuls):
    random_1 = df[df.actual == 1].sample(n=1).iloc[0]['p_super']
    random_0 = df[df.actual == 0].sample(n=1).iloc[0]['p_super']
    
    if random_1 > random_0:
        s_super.append(1)
    else:
        s_super.append(0)

In [23]:
sum(s_super) / n_simuls

0.974

In [24]:
roc_auc_score(df.actual, df.p_super)

0.9771375710321132