In [None]:
import pandas as pd

df = pd.read_csv('/Users/charlessanthakumar/fairness-awareness-ML/data/processed/adult/adult_model_ready.csv')


Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income,split
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K,train
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K,train
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K,train
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K,train
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45189,33,Private,245211,Bachelors,13,Never-married,Prof-specialty,Own-child,White,Male,0,0,40,United-States,<=50K,test
45190,39,Private,215419,Bachelors,13,Divorced,Prof-specialty,Not-in-family,White,Female,0,0,36,United-States,<=50K,test
45191,38,Private,374983,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,50,United-States,<=50K,test
45192,44,Private,83891,Bachelors,13,Divorced,Adm-clerical,Own-child,Asian-Pac-Islander,Male,5455,0,40,United-States,<=50K,test


In [6]:
df['sex'].value_counts()
df.groupby('sex')['income'].value_counts(normalize=True)

sex     income
Female  <=50K     0.886347
        >50K      0.113653
Male    <=50K     0.687404
        >50K      0.312596
Name: proportion, dtype: float64

In [8]:
df['race'].value_counts()
df.groupby('race')['income'].value_counts(normalize=True)

race                income
Amer-Indian-Eskimo  <=50K     0.878161
                    >50K      0.121839
Asian-Pac-Islander  <=50K     0.716590
                    >50K      0.283410
Black               <=50K     0.873669
                    >50K      0.126331
Other               <=50K     0.872521
                    >50K      0.127479
White               <=50K     0.737505
                    >50K      0.262495
Name: proportion, dtype: float64

In [None]:
import numpy as np
import pandas as pd

def demographic_parity(y, sensitive):
    """
    Computes P(Y=1 | group) for each group.
    """
    df = pd.DataFrame({"y": y, "sensitive": sensitive})
    return df.groupby("sensitive")["y"].mean().to_dict()


def statistical_parity_difference(y, sensitive, privileged_group):
    """
    SPD = P(Y=1 | privileged) - P(Y=1 | unprivileged)
    """
    dp = demographic_parity(y, sensitive)
    
    privileged_rate = dp[privileged_group]
    # unprivileged = the other group
    unprivileged_group = [g for g in dp.keys() if g != privileged_group][0]
    unprivileged_rate = dp[unprivileged_group]
    
    return privileged_rate - unprivileged_rate


def disparate_impact(y, sensitive, privileged_group):
    """
    DI = P(Y=1 | unprivileged) / P(Y=1 | privileged)
    """
    dp = demographic_parity(y, sensitive)

    privileged_rate = dp[privileged_group]
    unprivileged_group = [g for g in dp.keys() if g != privileged_group][0]
    unprivaged_rate = dp[unprivileged_group]

    # avoid division-by-zero
    if privileged_rate == 0:
        return np.nan

    return unprivaged_rate / privileged_rate


def compute_fairness_metrics(y_true, y_pred, sensitive, privileged_group):
    
    results = {}

    # DP (based on predictions)
    results["DP"] = demographic_parity(y_pred, sensitive)

    # SPD (predictions vs sensitive attribute)
    results["SPD"] = statistical_parity_difference(y_pred, sensitive, privileged_group)

    # DI
    results["DI"] = disparate_impact(y_pred, sensitive, privileged_group)

    return results

In [11]:
y_true = (df['income'] == '>50K').astype(int)   # convert to 0/1
sensitive_sex = df['sex']                       # “Male” / “Female”

dp_sex = demographic_parity(y_true, sensitive_sex)
spd_sex = statistical_parity_difference(y_true, sensitive_sex, privileged_group="Male")
di_sex = disparate_impact(y_true, sensitive_sex, privileged_group="Male")

print("=== Raw fairness (SEX) ===")
print("DP:", dp_sex)
print("SPD:", spd_sex)
print("DI:", di_sex)

=== Raw fairness (SEX) ===
DP: {'Female': 0.11365338781069118, 'Male': 0.3125962830640139}
SPD: 0.1989428952533227
DI: 0.3635788202491745


In [12]:
df['race_binary'] = df['race'].apply(lambda r: "White" if r == "White" else "Non-White")

In [13]:
sensitive_race = df['race_binary']

dp_race = demographic_parity(y_true, sensitive_race)
spd_race = statistical_parity_difference(y_true, sensitive_race, privileged_group="White")
di_race = disparate_impact(y_true, sensitive_race, privileged_group="White")

print("=== Raw fairness (RACE) ===")
print("DP:", dp_race)
print("SPD:", spd_race)
print("DI:", di_race)

=== Raw fairness (RACE) ===
DP: {'Non-White': 0.15846129491847397, 'White': 0.26249453404326467}
SPD: 0.1040332391247907
DI: 0.6036746460113191


In [14]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline

# Target
y = (df['income'] == '>50K').astype(int)

# Features (exclude target and sensitive attributes)
X = df.drop(columns=['income', 'sex', 'race', 'race_binary'])

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# Preprocessing: separate numeric and categorical columns
numeric_cols = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = X_train.select_dtypes(include=['object']).columns.tolist()

preprocess = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_cols),
        ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

In [15]:
from sklearn.linear_model import LogisticRegression

lr_clf = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", LogisticRegression(max_iter=500))
])

lr_clf.fit(X_train, y_train)
y_pred_lr = lr_clf.predict(X_test)

In [16]:
from sklearn.ensemble import RandomForestClassifier

rf_clf = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", RandomForestClassifier(
        n_estimators=300, 
        max_depth=None, 
        random_state=42,
        n_jobs=-1
    ))
])

rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)

In [17]:
sex_test = df.loc[X_test.index, 'sex']
race_test = df.loc[X_test.index, 'race_binary']

In [22]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def print_model_results(model_name, y_true, y_pred, sex_test, race_test):
    print(f"\n===== {model_name} PERFORMANCE =====")
    print("Accuracy :", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("Recall   :", recall_score(y_true, y_pred))
    print("F1-score :", f1_score(y_true, y_pred))

    print(f"\n===== {model_name} FAIRNESS: SEX =====")
    print(compute_fairness_metrics(
        y_true=y_true,
        y_pred=y_pred,
        sensitive=sex_test,
        privileged_group="Male"
    ))

    print(f"\n===== {model_name} FAIRNESS: RACE =====")
    print(compute_fairness_metrics(
        y_true=y_true,
        y_pred=y_pred,
        sensitive=race_test,
        privileged_group="White"
    ))


# Now print everything
print_model_results("Logistic Regression", y_test, y_pred_lr, sex_test, race_test)
print_model_results("Random Forest", y_test, y_pred_rf, sex_test, race_test)


===== Logistic Regression PERFORMANCE =====
Accuracy : 0.8474201256748385
Precision: 0.7404103479036575
Recall   : 0.5924339757316203
F1-score : 0.6582077716098335

===== Logistic Regression FAIRNESS: SEX =====
{'DP': {'Female': 0.08263114976895895, 'Male': 0.2543307086614173}, 'SPD': 0.17169955889245836, 'DI': 0.32489647122779525}

===== Logistic Regression FAIRNESS: RACE =====
{'DP': {'Non-White': 0.12538040170419965, 'White': 0.2108533554266777}, 'SPD': 0.08547295372247807, 'DI': 0.5946331821491905}

===== Random Forest PERFORMANCE =====
Accuracy : 0.8502522347110364
Precision: 0.7355687606112055
Recall   : 0.6184867951463241
F1-score : 0.6719658782473827

===== Random Forest FAIRNESS: SEX =====
{'DP': {'Female': 0.08833922261484099, 'Male': 0.26653543307086613}, 'SPD': 0.17819621045602513, 'DI': 0.3314351926760652}

===== Random Forest FAIRNESS: RACE =====
{'DP': {'Non-White': 0.1363359707851491, 'White': 0.2207953603976802}, 'SPD': 0.08445938961253108, 'DI': 0.6174766106479361}
