In [3]:
import sys
sys.path.append("../scripts")

import data
import experiments
import models


from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder

%load_ext autoreload
%autoreload 2

# load code profiling
%load_ext line_profiler

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [39]:
dataset = "compas"
fold = 0
X_train, Y_train, X_val, Y_val, X_test, Y_test =  data.get_fold(dataset, fold, 10, 0)
A_train, A_val, A_test = experiments.get_subgroup_feature(dataset, X_train, X_val, X_test, 8)
col_trans = ColumnTransformer(
    [
        ("numeric", StandardScaler(), data.NUM_FEATURES[dataset]),
        (
            "categorical",
            OneHotEncoder(
                drop="if_binary", sparse_output=False, handle_unknown="ignore"
            ),
            data.CAT_FEATURES[dataset],
        ),
    ],
    verbose_feature_names_out=False,
)
col_trans.set_output(transform="pandas")

preprocess = Pipeline([("preprocess", col_trans)])
preprocess.fit(X_train)
X_train = preprocess.transform(X_train)
X_val = preprocess.transform(X_val)
X_test = preprocess.transform(X_test)

{'1_False': 0, '1_True': 1, '2_False': 2, '2_True': 3, '4_True': 4, '4_False': 5, '3_False': 6, '3_True': 7}


In [45]:
model = models.M2FGB(fair_weight = 0.4, n_estimators=100)
model.fit(X_train, Y_train, A_train)

In [13]:
import lightgbm as lgb
import numpy as np

In [36]:
# fit

X = X_train.values
y = Y_train.values
sensitive_attribute = A_train.values

# sort X and y based on sensitive_attribute
idx = np.argsort(sensitive_attribute)
X = X[idx]
y = y[idx]
sensitive_attribute = sensitive_attribute[idx]


I = models.get_subgroup_indicator_test(sensitive_attribute)
subgroup = sensitive_attribute
fairness_constraint = "equalized_loss"
dual_learning = "gradient_norm"
fair_weight = 0.5
multiplier_learning_rate = 0.1


group_losses = []
mu_opt_list = [None]
dtrain = lgb.Dataset(X, label=y)


def custom_obj(predt, dtrain):
    loss_group = models.logloss_group(predt, dtrain, subgroup, fairness_constraint)
    group_losses.append(loss_group)

    if dual_learning == "optim":
        # dual problem solved analytically
        idx_biggest_loss = np.where(loss_group == np.max(loss_group))[0]
        # if is more than one, randomly choose one
        idx_biggest_loss = np.random.choice(idx_biggest_loss)
        mu_opt = np.zeros(loss_group.shape[0])
        mu_opt[idx_biggest_loss] = fair_weight

    elif dual_learning == "gradient":
        if mu_opt_list[0] is None:
            mu_opt = np.zeros(loss_group.shape[0])
        else:
            mu_opt = mu_opt_list[-1].copy()
        mu_opt += multiplier_learning_rate * fair_weight * loss_group

    elif dual_learning == "gradient_norm":
        if mu_opt_list[0] is None:
            mu_opt = np.ones(loss_group.shape[0])
        else:
            mu_opt = mu_opt_list[-1].copy()

        mu_opt += multiplier_learning_rate * loss_group
        mu_opt = models.projection_to_simplex(mu_opt, z=fair_weight)

    elif dual_learning == "gradient_norm2":
        if mu_opt_list[0] is None:
            mu_opt = np.ones(loss_group.shape[0])
        else:
            mu_opt = mu_opt_list[-1].copy()

        mu_opt += multiplier_learning_rate * loss_group
        mu_opt = mu_opt / np.sum(mu_opt) * fair_weight

    if mu_opt_list[0] is None:
        mu_opt_list[0] = mu_opt
    else:
        mu_opt_list.append(mu_opt)

    grad_fair = models.logloss_group_grad(predt, dtrain, fairness_constraint)
    grad_fair = I * grad_fair.reshape(-1, 1) @ mu_opt

    hess_fair = models.logloss_group_hess(predt, dtrain, fairness_constraint)
    hess_fair = I * hess_fair.reshape(-1, 1) @ mu_opt

    grad = models.logloss_grad(predt, dtrain)
    hess = models.logloss_hessian(predt, dtrain)

    # It is not necessary to multiply fairness gradient by fair_weight because it is already included on mu
    # grad = (1 - fair_weight) * grad + fair_weight * grad_fair
    # hess = (1 - fair_weight) * hess + fair_weight * hess_fair

    grad = (1 - fair_weight) * grad + grad_fair
    hess = (1 - fair_weight) * hess + hess_fair

    return grad, hess



params = {
    "objective": custom_obj,
    "learning_rate": 0.1,
    "max_depth": 6,
    "min_child_weight": 1,
    "reg_lambda": 1,
    "verbose": -1,
    "random_seed" : 0
}

def prof_function():
    model_ = lgb.train(
        params,
        dtrain,
        num_boost_round=100,
    )


In [37]:
%lprun -f custom_obj prof_function()

Timer unit: 1e-09 s

Total time: 23.6848 s
File: /tmp/ipykernel_31/2126739085.py
Function: custom_obj at line 27

Line #      Hits         Time  Per Hit   % Time  Line Contents
    27                                           def custom_obj(predt, dtrain):
    28       100 4692345359.0    5e+07     19.8      loss_group = models.logloss_group(predt, dtrain, subgroup, fairness_constraint)
    29       100     212172.0   2121.7      0.0      group_losses.append(loss_group)
    30                                           
    31       100      94666.0    946.7      0.0      if dual_learning == "optim":
    32                                                   # dual problem solved analytically
    33                                                   idx_biggest_loss = np.where(loss_group == np.max(loss_group))[0]
    34                                                   # if is more than one, randomly choose one
    35                                                   idx_biggest_loss = np.

In [38]:
%lprun -f custom_obj prof_function()

Timer unit: 1e-09 s

Total time: 24.8406 s
File: /tmp/ipykernel_31/2126739085.py
Function: custom_obj at line 27

Line #      Hits         Time  Per Hit   % Time  Line Contents
    27                                           def custom_obj(predt, dtrain):
    28       100 5844837464.0    6e+07     23.5      loss_group = models.logloss_group(predt, dtrain, subgroup, fairness_constraint)
    29       100     290911.0   2909.1      0.0      group_losses.append(loss_group)
    30                                           
    31       100      98822.0    988.2      0.0      if dual_learning == "optim":
    32                                                   # dual problem solved analytically
    33                                                   idx_biggest_loss = np.where(loss_group == np.max(loss_group))[0]
    34                                                   # if is more than one, randomly choose one
    35                                                   idx_biggest_loss = np.

In [28]:
%lprun -f models.logloss_group prof_function()

Timer unit: 1e-09 s

Total time: 12.9627 s
File: /work/giovani.valdrighi/dual_fair_boost/notebooks/../scripts/models.py
Function: logloss_group at line 163

Line #      Hits         Time  Per Hit   % Time  Line Contents
   163                                           def logloss_group(predt, dtrain, subgroup, fairness_constraint):
   164                                               """For each subgroup, calculates the mean log loss of the samples."""
   165       100     447631.0   4476.3      0.0      y = dtrain.get_label()
   166       100 1378007250.0    1e+07     10.6      predt = 1 / (1 + np.exp(-predt))
   167       100  230973195.0    2e+06      1.8      predt = np.clip(predt, 1e-7, 1 - 1e-7)  # avoid log(0)
   168       100     156036.0   1560.4      0.0      if fairness_constraint == "equalized_loss":
   169       100 2544025647.0    3e+07     19.6          loss = -(y * np.log(predt) + (1 - y) * np.log(1 - predt))
   170       100     218875.0   2188.8      0.0      if fairn

In [27]:
%lprun -f models.logloss_group prof_function()

Timer unit: 1e-09 s

Total time: 9.59288 s
File: /work/giovani.valdrighi/dual_fair_boost/notebooks/../scripts/models.py
Function: logloss_group at line 163

Line #      Hits         Time  Per Hit   % Time  Line Contents
   163                                           def logloss_group(predt, dtrain, subgroup, fairness_constraint):
   164                                               """For each subgroup, calculates the mean log loss of the samples."""
   165       100     415054.0   4150.5      0.0      y = dtrain.get_label()
   166       100 1440874801.0    1e+07     15.0      predt = 1 / (1 + np.exp(-predt))
   167       100  233058839.0    2e+06      2.4      predt = np.clip(predt, 1e-7, 1 - 1e-7)  # avoid log(0)
   168       100     149005.0   1490.0      0.0      if fairness_constraint == "equalized_loss":
   169       100 2562866798.0    3e+07     26.7          loss = -(y * np.log(predt) + (1 - y) * np.log(1 - predt))
   170       100     233676.0   2336.8      0.0      if fairn