In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import jax
import jax.numpy as jnp

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from Temis.LogisticRegression import LogisticRegression
from sklearn.linear_model import LogisticRegression as SklearnLR

# ---------------------------------- #
# These metrics will be replaced with the ones from sklearn.
# from Temis.metrics.accuracy import compute_accuracy
# from Temis.metrics.precision import compute_precision
# from Temis.metrics.recall import compute_recall
# from Temis.metrics.f1_score import compute_f1
# ---------------------------------- #
from Temis.metrics.brier_score import compute_brier
from Temis.fairness_metrics.spd import compute_spd
from Temis.fairness_metrics.dir import compute_dir
from Temis.fairness_metrics.aod import compute_aod
from Temis.fairness_metrics.aaod import compute_aaod

from Temis.comparison_utils.cmp_fairness import compare_fairness 

In [3]:
import importlib
import Temis.LogisticRegression
importlib.reload(Temis.LogisticRegression)

<module 'Temis.LogisticRegression' from 'E:\\Faculdade\\2025_2s\\machine-learning-algorithms\\Temis\\LogisticRegression.py'>

In [4]:
path_dataset = os.path.join('..', 'datasets', 'german', 'german.data-numeric')
col_names = [f'feature_{i}' for i in range(1,25)] + ['label']

try:
    print(f'Importing data from: {path_dataset}')
    df = pd.read_csv(
        path_dataset,
        header=None,
        sep='\s+',
        engine='python',
        names=col_names
    )
    print(f'Data imported successfully')
except FileNotFoundError:
    print('Failed importing data')

Importing data from: ..\datasets\german\german.data-numeric
Data imported successfully


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=['label']).to_numpy(),
    df['label'].to_numpy(),
    test_size=0.2,
    random_state=42
)

''' 
Correcting dataset labels from {1, 2} to {0, 1}.
'''
y_train = y_train - 1
y_test = y_test - 1 

In [6]:
model_fair = LogisticRegression(lr=0.001, epochs=100, penalty='l2', penalty_weight=1.0, fair_penalty='Rpr', fair_penalty_weight=10.0)
model_fair.fit(X_train, y_train, S = X_train[:, 15], debug = True)
fair_pred_test = model_fair.predict(X_train)
fair_prob_test = model_fair.predict_probability(X_train)

----------------------------------------------------------------------
Epoch: 0
Gradient Magnitude - dw: 0.1811642348766327
Gradient Magnitude - db: 0.02542928233742714
Epoch: 10
Gradient Magnitude - dw: 0.4015483260154724
Gradient Magnitude - db: 0.06287898123264313
Epoch: 20
Gradient Magnitude - dw: 0.24536924064159393
Gradient Magnitude - db: 0.0889991819858551
Epoch: 30
Gradient Magnitude - dw: 0.1859787106513977
Gradient Magnitude - db: 0.028526149690151215
Epoch: 40
Gradient Magnitude - dw: 0.41344237327575684
Gradient Magnitude - db: 0.08597181737422943
Epoch: 50
Gradient Magnitude - dw: 0.6627362370491028
Gradient Magnitude - db: 0.1577656865119934
Epoch: 60
Gradient Magnitude - dw: 0.3165484666824341
Gradient Magnitude - db: 0.04999595135450363
Epoch: 70
Gradient Magnitude - dw: 0.11234809458255768
Gradient Magnitude - db: 0.008686643093824387
Epoch: 80
Gradient Magnitude - dw: 0.5107420682907104
Gradient Magnitude - db: 0.08645756542682648
Epoch: 90
Gradient Magnitude - dw: 0

In [7]:
model_not_fair = LogisticRegression(lr=0.001, epochs=100, penalty='l2', penalty_weight=1.0, fair_penalty='Rpr', fair_penalty_weight=0.0)
model_not_fair.fit(X_train, y_train, S = X_train[:, 15], debug = True)
not_fair_pred_test = model_not_fair.predict(X_train)
not_fair_prob_test = model_not_fair.predict_probability(X_train)

----------------------------------------------------------------------
Epoch: 0
Gradient Magnitude - dw: 0.5779361128807068
Gradient Magnitude - db: 0.11387259513139725
Epoch: 10
Gradient Magnitude - dw: 0.30837875604629517
Gradient Magnitude - db: 0.05851151794195175
Epoch: 20
Gradient Magnitude - dw: 0.46353089809417725
Gradient Magnitude - db: 0.05814595893025398
Epoch: 30
Gradient Magnitude - dw: 0.2165900319814682
Gradient Magnitude - db: 0.02102816477417946
Epoch: 40
Gradient Magnitude - dw: 0.8895630836486816
Gradient Magnitude - db: 0.14116643369197845
Epoch: 50
Gradient Magnitude - dw: 0.5347052812576294
Gradient Magnitude - db: 0.040967077016830444
Epoch: 60
Gradient Magnitude - dw: 0.9602672457695007
Gradient Magnitude - db: 0.1775478720664978
Epoch: 70
Gradient Magnitude - dw: 0.4313814342021942
Gradient Magnitude - db: 0.05994608253240585
Epoch: 80
Gradient Magnitude - dw: 0.2351866066455841
Gradient Magnitude - db: 0.0010108258575201035
Epoch: 90
Gradient Magnitude - dw: 

In [8]:
model_ultra_fair = LogisticRegression(lr=0.001, epochs=100, penalty='l2', penalty_weight=1.0, fair_penalty='Rpr', fair_penalty_weight=100.0)
model_ultra_fair.fit(X_train, y_train, S = X_train[:, 15], debug = True)
ultra_fair_pred_test = model_ultra_fair.predict(X_train)
ultra_fair_prob_test = model_ultra_fair.predict_probability(X_train)

----------------------------------------------------------------------
Epoch: 0
Gradient Magnitude - dw: 0.4447218179702759
Gradient Magnitude - db: 0.11646933853626251
Epoch: 10
Gradient Magnitude - dw: 1.9002001285552979
Gradient Magnitude - db: 0.22531665861606598
Epoch: 20
Gradient Magnitude - dw: 1.1322040557861328
Gradient Magnitude - db: 0.22229507565498352
Epoch: 30
Gradient Magnitude - dw: 0.1727854609489441
Gradient Magnitude - db: 0.011097218841314316
Epoch: 40
Gradient Magnitude - dw: 0.16672305762767792
Gradient Magnitude - db: 0.042769309133291245
Epoch: 50
Gradient Magnitude - dw: 1.8357433080673218
Gradient Magnitude - db: 0.10701902210712433
Epoch: 60
Gradient Magnitude - dw: 0.5382842421531677
Gradient Magnitude - db: 0.11099079251289368
Epoch: 70
Gradient Magnitude - dw: 1.668519377708435
Gradient Magnitude - db: 0.060682713985443115
Epoch: 80
Gradient Magnitude - dw: 0.9916301965713501
Gradient Magnitude - db: 0.17534077167510986
Epoch: 90
Gradient Magnitude - dw: 0

In [16]:
model_ultra_fair = LogisticRegression(lr=0.001, epochs=100, penalty='l2', penalty_weight=1.0, fair_penalty='Rpr', fair_penalty_weight=1000.0)
model_ultra_fair.fit(X_train, y_train, S = X_train[:, 15], debug = True)
ultra_fair_pred_test = model_ultra_fair.predict(X_train)
ultra_fair_prob_test = model_ultra_fair.predict_probability(X_train)

----------------------------------------------------------------------
Epoch: 0
Gradient Magnitude - dw: 1.8475825786590576
Gradient Magnitude - db: 0.4973689317703247
Epoch: 10
Gradient Magnitude - dw: 0.3995175063610077
Gradient Magnitude - db: 0.020100586116313934
Epoch: 20
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 30
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 40
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 50
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 60
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 70
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 80
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 90
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan


In [41]:
model_ultra_fair = LogisticRegression(lr=0.001, epochs=100, penalty='l2', penalty_weight=1.0, fair_penalty='Rpr', fair_penalty_weight=500.0)
model_ultra_fair.fit(X_train, y_train, S = X_train[:, 15], debug = True)
ultra_fair_pred_test = model_ultra_fair.predict(X_train)
ultra_fair_prob_test = model_ultra_fair.predict_probability(X_train)

----------------------------------------------------------------------
Epoch: 0
Gradient Magnitude - dw: 2.604874610900879
Gradient Magnitude - db: 0.5941992998123169
Epoch: 10
Gradient Magnitude - dw: 1.3174494504928589
Gradient Magnitude - db: 0.2949525713920593
Epoch: 20
Gradient Magnitude - dw: 1.7541764974594116
Gradient Magnitude - db: 0.08083391189575195
Epoch: 30
Gradient Magnitude - dw: 1.4695098400115967
Gradient Magnitude - db: 0.21579609811306
Epoch: 40
Gradient Magnitude - dw: 2.26045560836792
Gradient Magnitude - db: 0.43825024366378784
Epoch: 50
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 60
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 70
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 80
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan
Epoch: 90
Gradient Magnitude - dw: nan
Gradient Magnitude - db: nan


In [15]:
print(f'AUC: {roc_auc_score(y_train, fair_prob_test)}')
print(f'AUC: {roc_auc_score(y_train, not_fair_prob_test)}')
print(f'AUC: {roc_auc_score(y_train, ultra_fair_prob_test)}')

AUC: 0.7348926283597711
AUC: 0.7128022031042391
AUC: 0.6843578114445625


In [10]:
models = {
    "Rpr Fairness": model_fair,
    "Rpr No Fairness": model_not_fair,
    "Rpr Ultra Fairness": model_ultra_fair
}

results = compare_fairness(models, X_train, y_train, 15)
print(f'Feature {15}:\n{results}\n')

Feature 15:
                Model Demographic Parity Equalized Odds
0        Rpr Fairness                0.0            0.0
1     Rpr No Fairness        0.057463318     0.09862956
2  Rpr Ultra Fairness        0.066781685     0.10872246



In [11]:
S = X_train[:, 15]
y = y_train
count_y0_s0 = jnp.sum((y == 0) & (S == 0))
count_y0_s1 = jnp.sum((y == 0) & (S == 1))
count_y1_s0 = jnp.sum((y == 1) & (S == 0))
count_y1_s1 = jnp.sum((y == 1) & (S == 1))
print(f'Counts:\nY=0,S=0: {count_y0_s0}\nY=0,S=1: {count_y0_s1}\nY=1,S=0: {count_y1_s0}\nY=1,S=1: {count_y1_s1}\n')

Counts:
Y=0,S=0: 443
Y=0,S=1: 116
Y=1,S=0: 167
Y=1,S=1: 74



In [12]:
S = X_train[:, 15]
count_y0_s0 = jnp.sum((ultra_fair_pred_test == 0) & (S == 0))
count_y0_s1 = jnp.sum((ultra_fair_pred_test == 0) & (S == 1))
count_y1_s0 = jnp.sum((ultra_fair_pred_test == 1) & (S == 0))
count_y1_s1 = jnp.sum((ultra_fair_pred_test == 1) & (S == 1))
print(f'Counts:\nY=0,S=0: {count_y0_s0}\nY=0,S=1: {count_y0_s1}\nY=1,S=0: {count_y1_s0}\nY=1,S=1: {count_y1_s1}\n')

Counts:
Y=0,S=0: 428
Y=0,S=1: 146
Y=1,S=0: 182
Y=1,S=1: 44



In [13]:
S = X_train[:, 15]
count_y0_s0 = jnp.sum((fair_pred_test == 0) & (S == 0))
count_y0_s1 = jnp.sum((fair_pred_test == 0) & (S == 1))
count_y1_s0 = jnp.sum((fair_pred_test == 1) & (S == 0))
count_y1_s1 = jnp.sum((fair_pred_test == 1) & (S == 1))
print(f'Counts:\nY=0,S=0: {count_y0_s0}\nY=0,S=1: {count_y0_s1}\nY=1,S=0: {count_y1_s0}\nY=1,S=1: {count_y1_s1}\n')

Counts:
Y=0,S=0: 610
Y=0,S=1: 190
Y=1,S=0: 0
Y=1,S=1: 0



In [14]:
S = X_train[:, 15]
count_y0_s0 = jnp.sum((not_fair_pred_test == 0) & (S == 0))
count_y0_s1 = jnp.sum((not_fair_pred_test == 0) & (S == 1))
count_y1_s0 = jnp.sum((not_fair_pred_test == 1) & (S == 0))
count_y1_s1 = jnp.sum((not_fair_pred_test == 1) & (S == 1))
print(f'Counts:\nY=0,S=0: {count_y0_s0}\nY=0,S=1: {count_y0_s1}\nY=1,S=0: {count_y1_s0}\nY=1,S=1: {count_y1_s1}\n')

Counts:
Y=0,S=0: 530
Y=0,S=1: 176
Y=1,S=0: 80
Y=1,S=1: 14

