# Setup

In [81]:
# imports
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score
from fairlearn.metrics import (
    MetricFrame, false_positive_rate, false_negative_rate, false_negative_rate_difference,
    false_positive_rate_difference, demographic_parity_ratio, equalized_odds_ratio, selection_rate_difference)

In [47]:
# read in data
df_train = pd.read_csv("data/train.csv")
df_songs = pd.read_csv("data/songs.csv")
df_artists = pd.read_csv("data/artists.csv")

df_artists.rename(columns={"gender": "artist_gender"}, inplace=True)
df_artists.rename(columns={"country": "artist_country"}, inplace=True)

In [48]:
# merge data together
seeds = [85, 25, 121, 61, 73, 37, 97, 49, 109, 13]
merged_dfs = []

for seed in seeds:
    predictions = pd.read_csv(f"data/predictions/final/final_seed{seed}.csv")
    df_merged = predictions.merge(df_train, on=["song_id", "msno"], how="left")
    df_merged = df_merged.merge(df_songs, on="song_id", how="left")
    df_merged = df_merged.merge(df_artists, on="artist_name", how="left")
    df_merged['prediction'] = df_merged['probability'] >= 0.5
    merged_dfs.append(df_merged)

# Overall Performance Metrics

In [49]:
performance_metrics = {
    "Accuracy": accuracy_score,
    "Precision": precision_score,
    "Recall": recall_score,
    "FNR": false_negative_rate,
    "FPR": false_positive_rate,
}

rows = []

for df in merged_dfs:
    y_pred = df["prediction"]
    y_true = df["target"]

    row = {name: func(y_true, y_pred) for name, func in performance_metrics.items()}
    rows.append(row)

df_performance_metrics = pd.DataFrame(rows)


In [50]:
df_performance_metrics

Unnamed: 0,Accuracy,Precision,Recall,FNR,FPR
0,0.765222,0.775116,0.754591,0.245409,0.223906
1,0.766371,0.776813,0.754797,0.245203,0.221792
2,0.765735,0.776692,0.753244,0.246756,0.22149
3,0.765873,0.776507,0.753952,0.246048,0.221934
4,0.765625,0.776225,0.753759,0.246241,0.222238
5,0.766197,0.777379,0.753323,0.246677,0.220637
6,0.765919,0.776709,0.753725,0.246275,0.22161
7,0.766405,0.776757,0.754989,0.245011,0.22192
8,0.766265,0.777363,0.75354,0.24646,0.22072
9,0.767076,0.777753,0.755104,0.244896,0.22068


In [79]:
df_performance_metrics.mean()

Accuracy     0.766069
Precision    0.776731
Recall       0.754102
FNR          0.245898
FPR          0.221693
dtype: float64

# Artist Gender Metrics

In [65]:
filtered_dfs = []
analysis_genders = ['Male', 'Female'] # filter out missing values, and genders with low counts (non-binary, not applicable, etc.)

for df in merged_dfs:
    filtered_df = df[df['artist_gender'].isin(analysis_genders)].copy()
    filtered_dfs.append(filtered_df)

In [None]:
# Proportion of positive predictions by gender
counts_list = []

for df in filtered_dfs:
    counts = df[df['prediction'] == 1].groupby('artist_gender').size()
    counts_list.append(counts)

average = pd.concat(counts_list, axis=1).mean(axis=1)

percent = (average / average.sum()) * 100
print(percent)

artist_gender
Female    44.65228
Male      55.34772
dtype: float64


In [105]:
# Within gender, proportion of positive predictions
value_counts_list = []

for df in filtered_dfs:
    value_counts = df.groupby('artist_gender')['prediction'].value_counts(normalize=True)
    value_counts_list.append(value_counts)

average_value_counts = pd.concat(value_counts_list, axis=1).mean(axis=1)
average_value_counts

artist_gender  prediction
Female         False         0.513169
               True          0.486831
Male           True          0.506054
               False         0.493946
dtype: float64

In [None]:
# Performance Metrics
metric_frames = []

for df in filtered_dfs:
    metric_frame = MetricFrame(
        metrics=performance_metrics,
        y_true=df['target'],
        y_pred=df['prediction'],
        sensitive_features=df['artist_gender']
    )
    metric_frames.append(metric_frame)

In [78]:
# Compute average metrics by gender
by_group_averages = pd.concat([mf.by_group for mf in metric_frames]).groupby(level=0).mean()
by_group_averages

Unnamed: 0_level_0,Accuracy,Precision,Recall,FNR,FPR
artist_gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,0.762354,0.771622,0.748136,0.251864,0.223308
Male,0.768591,0.782634,0.765376,0.234624,0.227962


In [85]:
def get_fairness_metrics(y_true, y_pred, sensitive_features):
    fnr_difference = false_negative_rate_difference(y_true, y_pred, sensitive_features=sensitive_features)
    fpr_difference = false_positive_rate_difference(y_true, y_pred, sensitive_features=sensitive_features)
    dpr = demographic_parity_ratio(y_true, y_pred, sensitive_features=sensitive_features)
    eor = equalized_odds_ratio(y_true, y_pred, sensitive_features=sensitive_features)
    sr_difference = selection_rate_difference(y_true, y_pred, sensitive_features=sensitive_features)

    return {
        'FNR Difference': fnr_difference,
        'FPR Difference': fpr_difference,
        'Demographic Parity Ratio': dpr,
        'Equalized Odds Ratio': eor,
        'Selection Rate Difference': sr_difference
    }

In [None]:
# Fairness metrics
fairness_metrics = []

for df in filtered_dfs:
    fairness_metrics.append(get_fairness_metrics(df['target'], df['prediction'], df['artist_gender']))

In [91]:
df_fairness_metrics = pd.DataFrame(fairness_metrics)
df_fairness_metrics


Unnamed: 0,FNR Difference,FPR Difference,Demographic Parity Ratio,Equalized Odds Ratio,Selection Rate Difference
0,0.017565,0.005379,0.960614,0.976609,0.019998
1,0.017474,0.005619,0.961341,0.975448,0.019593
2,0.018751,0.005246,0.958778,0.975473,0.020848
3,0.017508,0.003998,0.962172,0.977116,0.019142
4,0.016158,0.003126,0.964314,0.978873,0.018044
5,0.016273,0.004278,0.963813,0.978711,0.018276
6,0.016942,0.006481,0.96043,0.971592,0.02001
7,0.016199,0.004188,0.962764,0.978853,0.018854
8,0.018277,0.004368,0.962384,0.976129,0.019023
9,0.017257,0.003856,0.963539,0.977492,0.018441


In [92]:
df_fairness_metrics.mean()

FNR Difference               0.017241
FPR Difference               0.004654
Demographic Parity Ratio     0.962015
Equalized Odds Ratio         0.976629
Selection Rate Difference    0.019223
dtype: float64