# Understanding the amex metric

The negative class has been subsampled for the dataset, so these cases are assigned a 20x weight.

In [2]:
import pandas as pd

In [3]:
def amex_metric(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:

    def top_four_percent_captured(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        df = (pd.concat([y_true, y_pred], axis='columns')
              .sort_values('prediction', ascending=False))
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        four_pct_cutoff = int(0.04 * df['weight'].sum())
        df['weight_cumsum'] = df['weight'].cumsum()
        df_cutoff = df.loc[df['weight_cumsum'] <= four_pct_cutoff]
        return (df_cutoff['target'] == 1).sum() / (df['target'] == 1).sum()
        
    def weighted_gini(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        df = (pd.concat([y_true, y_pred], axis='columns')
              .sort_values('prediction', ascending=False))
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        df['random'] = (df['weight'] / df['weight'].sum()).cumsum()
        total_pos = (df['target'] * df['weight']).sum()
        df['cum_pos_found'] = (df['target'] * df['weight']).cumsum()
        df['lorentz'] = df['cum_pos_found'] / total_pos
        df['gini'] = (df['lorentz'] - df['random']) * df['weight']
        return df['gini'].sum()

    def normalized_weighted_gini(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        y_true_pred = y_true.rename(columns={'target': 'prediction'})
        return weighted_gini(y_true, y_pred) / weighted_gini(y_true, y_true_pred)

    g = normalized_weighted_gini(y_true, y_pred)
    d = top_four_percent_captured(y_true, y_pred)

    return 0.5 * (g + d)

# Using simulation

I will set up two dataframes: true and prediction.  
The true dataframe will consist of 20 values: the first ten will be 0, and the second 10 will be 1.  
The prediction dataframe will vary based on the question I am seeking to answer.

In [7]:
y_true = pd.DataFrame(['0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
                       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1'],
                       columns=['target'])
y_true.head()

Unnamed: 0,target
0,0
1,0
2,0
3,0
4,0


Testing if the evaluation metric is more punishing of false negatives or false positives.

In [8]:
y_pred_neg = pd.DataFrame(['0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
                           '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'],
                           columns=['prediction'])
y_pred_pos = pd.DataFrame(['1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
                           '1', '1', '1', '1', '1', '1', '1', '1', '1', '1'],
                           columns=['prediction'])

In [12]:
y_true = y_true.astype(float)
y_pred_neg = y_pred_neg.astype(float)
y_pred_pos = y_pred_pos.astype(float)

In [13]:
print("The amex score for 50% false negatives is {}.".format(amex_metric(y_true, y_pred_neg)))

print("The amex score for 50% false positives is {}.".format(amex_metric(y_true, y_pred_pos)))

The amex score for 50% false negatives is 0.19214659685863866.
The amex score for 50% false positives is 0.19214659685863866.


The evaluation metric is equally punishing for false negatives and false positives.  

Walking through the metric one step at a time to see how it's computed

In [20]:
#Rename the column
y_true.rename(columns={'prediction': 'target'}, inplace=True)
y_true.head()

Unnamed: 0,target
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [27]:
#Using a new "prediction" dataframe with predictions from 0-1

y_pred_scale = pd.DataFrame(['0', '0', '0.2', '0.2', '0.4', '0.4', '0.6', '0.6', '0.8', '0.8',
                           '1', '1', '0', '0', '0', '0', '0', '0', '0', '0'],
                           columns=['prediction'])
df_gini = (pd.concat([y_true, y_pred_scale], axis=1)
                    .sort_values('prediction', ascending=False))

df_gini.head()

Unnamed: 0,target,prediction
10,1.0,1.0
11,1.0,1.0
8,0.0,0.8
9,0.0,0.8
6,0.0,0.6


In [28]:
#Any negative event is given a 20x weight
df_gini['weight'] = df_gini['target'].apply(lambda x: 20 if x==0 else 1)

df_gini.head()

Unnamed: 0,target,prediction,weight
10,1.0,1.0,1
11,1.0,1.0,1
8,0.0,0.8,20
9,0.0,0.8,20
6,0.0,0.6,20


In [29]:
#Assign weights to each prediction in ascending order
df_gini['random'] = (df_gini['weight'] / df_gini['weight'].sum()).cumsum()

df_gini

Unnamed: 0,target,prediction,weight,random
10,1.0,1.0,1,0.004762
11,1.0,1.0,1,0.009524
8,0.0,0.8,20,0.104762
9,0.0,0.8,20,0.2
6,0.0,0.6,20,0.295238
7,0.0,0.6,20,0.390476
4,0.0,0.4,20,0.485714
5,0.0,0.4,20,0.580952
2,0.0,0.2,20,0.67619
3,0.0,0.2,20,0.771429


In [30]:
#Calculate the total number of positive targets
total_pos = (df_gini['target'] * df_gini['weight']).sum()

total_pos

10.0

In [31]:
#Cumulative sum of the positives found in the targets
df_gini['cum_pos_found'] = (df_gini['target'] * df_gini['weight']).cumsum()

df_gini

Unnamed: 0,target,prediction,weight,random,cum_pos_found
10,1.0,1.0,1,0.004762,1.0
11,1.0,1.0,1,0.009524,2.0
8,0.0,0.8,20,0.104762,2.0
9,0.0,0.8,20,0.2,2.0
6,0.0,0.6,20,0.295238,2.0
7,0.0,0.6,20,0.390476,2.0
4,0.0,0.4,20,0.485714,2.0
5,0.0,0.4,20,0.580952,2.0
2,0.0,0.2,20,0.67619,2.0
3,0.0,0.2,20,0.771429,2.0


In [33]:
#Calculate the proportion of positive targets that have been encountered
df_gini['lorentz'] = df_gini['cum_pos_found'] / total_pos

df_gini

Unnamed: 0,target,prediction,weight,random,cum_pos_found,lorentz
10,1.0,1.0,1,0.004762,1.0,0.1
11,1.0,1.0,1,0.009524,2.0,0.2
8,0.0,0.8,20,0.104762,2.0,0.2
9,0.0,0.8,20,0.2,2.0,0.2
6,0.0,0.6,20,0.295238,2.0,0.2
7,0.0,0.6,20,0.390476,2.0,0.2
4,0.0,0.4,20,0.485714,2.0,0.2
5,0.0,0.4,20,0.580952,2.0,0.2
2,0.0,0.2,20,0.67619,2.0,0.2
3,0.0,0.2,20,0.771429,2.0,0.2


In [34]:
#Define the gini coefficient
df_gini['gini'] = (df_gini['lorentz'] - df_gini['random']) * df_gini['weight']

df_gini

Unnamed: 0,target,prediction,weight,random,cum_pos_found,lorentz,gini
10,1.0,1.0,1,0.004762,1.0,0.1,0.0952381
11,1.0,1.0,1,0.009524,2.0,0.2,0.1904762
8,0.0,0.8,20,0.104762,2.0,0.2,1.904762
9,0.0,0.8,20,0.2,2.0,0.2,5.551115e-16
6,0.0,0.6,20,0.295238,2.0,0.2,-1.904762
7,0.0,0.6,20,0.390476,2.0,0.2,-3.809524
4,0.0,0.4,20,0.485714,2.0,0.2,-5.714286
5,0.0,0.4,20,0.580952,2.0,0.2,-7.619048
2,0.0,0.2,20,0.67619,2.0,0.2,-9.52381
3,0.0,0.2,20,0.771429,2.0,0.2,-11.42857


In [36]:
#Take the sum of all gini values
gini_sum = df_gini['gini'].sum()

gini_sum

-47.04761904761903

In [38]:
#Check my work with the weighted_gini function
def weighted_gini(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        df = (pd.concat([y_true, y_pred], axis='columns')
              .sort_values('prediction', ascending=False))
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        df['random'] = (df['weight'] / df['weight'].sum()).cumsum()
        total_pos = (df['target'] * df['weight']).sum()
        df['cum_pos_found'] = (df['target'] * df['weight']).cumsum()
        df['lorentz'] = df['cum_pos_found'] / total_pos
        df['gini'] = (df['lorentz'] - df['random']) * df['weight']
        return df['gini'].sum()
    
pred_gini = weighted_gini(y_true, y_pred_scale) 
pred_gini

-47.04761904761903

In [40]:
#Create y_true_pred dataframe so function runs properly
y_true_pred = y_true.rename(columns={'target': 'prediction'})

In [41]:
#Produce the weighted_gini for the true predictions and calculate normalized gini
true_gini = weighted_gini(y_true, y_true_pred)
print(true_gini)

normalized_gini = pred_gini / true_gini
normalized_gini

90.95238095238098


-0.5172774869109944

This marks the end of the normalized gini calculation, with a final result of -0.52.

In [42]:
#Begin the calculation of the top four percent captured
df_top4 = (pd.concat([y_true, y_pred_scale], axis='columns')
              .sort_values('prediction', ascending=False))

In [43]:
#Apply weights to negative outcomes (no default)
df_top4['weight'] = df_top4['target'].apply(lambda x: 20 if x==0 else 1)

df_top4

Unnamed: 0,target,prediction,weight
10,1.0,1.0,1
11,1.0,1.0,1
8,0.0,0.8,20
9,0.0,0.8,20
6,0.0,0.6,20
7,0.0,0.6,20
4,0.0,0.4,20
5,0.0,0.4,20
2,0.0,0.2,20
3,0.0,0.2,20


In [44]:
#Establish a cutoff, the number represents 4% of the overall weight in the column
four_pct_cutoff = int(0.04 * df_top4['weight'].sum())

four_pct_cutoff

8

In [47]:
#Take the cumulative sum of the weights
df_top4['weight_cumsum'] = df_top4['weight'].cumsum()

df_top4

Unnamed: 0,target,prediction,weight,weight_cumsum
10,1.0,1.0,1,1
11,1.0,1.0,1,2
8,0.0,0.8,20,22
9,0.0,0.8,20,42
6,0.0,0.6,20,62
7,0.0,0.6,20,82
4,0.0,0.4,20,102
5,0.0,0.4,20,122
2,0.0,0.2,20,142
3,0.0,0.2,20,162


In [48]:
#Apply the 4% cutoff to find the cutoff in the table (less than or equal to 8 total weight)
df_cutoff = df_top4.loc[df_top4['weight_cumsum'] <= four_pct_cutoff]

df_cutoff

Unnamed: 0,target,prediction,weight,weight_cumsum
10,1.0,1,1,1
11,1.0,1,1,2


In [50]:
#Of all positive cases, how many are captured within the top 4% of predictions
top_four = (df_cutoff['target'] == 1).sum() / (df_top4['target'] == 1).sum()

top_four

0.2

This marks the final value for the top four percent captured. Of all positive cases present in my data, 20% of them were captured in the top 4% of my predictions (according to the cumulative weight).

In [51]:
#Average the two metrics to get the final result
final_metric = (normalized_gini + top_four) / 2

final_metric

-0.1586387434554972