In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from fairlearn.metrics import MetricFrame, selection_rate, false_negative_rate, false_positive_rate

In [16]:
df = pd.read_csv('adult_preprocessed.csv')
print(df.shape)

(48842, 43)


In [17]:
X = df.drop('income', axis=1)
y = df['income']
sex_col = df['sex']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1, stratify=y
)

verify_sex = sex_col.loc[X_test.index]

verify_race_white = df['race_White'].loc[X_test.index]
verify_race_black = df['race_Black'].loc[X_test.index]

In [18]:
from sklearn.metrics import accuracy_score

rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=15,
    min_samples_leaf=5,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)


In [19]:
metrics = {
    'accuracy': accuracy_score,
    'selection_rate': selection_rate,
    'FNR': false_negative_rate,
    'FPR': false_positive_rate
}

mf_sex = MetricFrame(
    metrics=metrics,
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=verify_sex
)

mf_black = MetricFrame(
    metrics=metrics,
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=verify_race_black
)

print(mf_sex.by_group)

print(mf_black.by_group)


     accuracy  selection_rate       FNR       FPR
sex                                              
0    0.912427        0.131668  0.300836  0.061026
1    0.761416        0.487282  0.089944  0.303277
            accuracy  selection_rate       FNR       FPR
race_Black                                              
False       0.803565        0.389009  0.117329  0.223031
True        0.884615        0.188150  0.213115  0.101190
