In [1]:
from sklearn.model_selection import StratifiedShuffleSplit as ss

from DataPreprocessing.my_utils import get_fairness, get_score, vis
from Maximus_optimized_non_dominated import Multi_Fair as maximus



In [2]:
def train_classifier1(
    X_train, X_test, y_train, y_test, sa_index, p_Group, base_learners, preference
):

    classifier = maximus(
        n_estimators=base_learners,
        saIndex=sa_index,
        saValue=p_Group,
        preference=preference,
    )

    classifier.fit(X_train, y_train)

    y_pred_probs = classifier.predict_proba(X_test)[:, 1]
    y_pred_labels = classifier.predict(X_test)
    f = classifier.feature_importances_
    # return classifier.conf_scores, classifier.get_weights_over_iterations(), classifier.get_initial_weights()
    return y_pred_probs, y_pred_labels, classifier

In [3]:
def train(X, y):
    in_ts, pred1, fx = [], [], []
    sss = ss(n_splits=5, test_size=0.4)
    for train_index, test_index in sss.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        in_ts.append(test_index)
        pb1, pd1, f1 = train_classifier1(
            X_train, X_test, y_train, y_test, sa_index, p_Group, 499, [0.33, 0.34, 0.33]
        )
        pred1.append(pd1)
        fx.append(f1)
        print(f1.theta - 1, " : ", f1.ob[f1.theta - 1])
    return in_ts, pred1, fx

In [4]:
results, performance, Hx = [], [], []
for dt in ["Adult"]:
    if dt == "Adult":
        from DataPreprocessing.load_adult import load_adult

        X, y, sa_index, p_Group, x_control, F = load_adult()
        how_many = 1000
        X = X[:how_many, :]
        y = y[:how_many]
        x_control["race"] = x_control["race"][:how_many]
        x_control["sex"] = x_control["sex"][:how_many]
        # v='Adult_2_sensi_Mari_Sex'
        saf = sa_index[1]
    elif dt == "Bank":
        from DataPreprocessing.load_bank import load_bank

        X, y, sa_index, p_Group, x_control, F = load_bank()
        saf = sa_index[0]
        print(saf)
    elif dt == "Credit":
        from DataPreprocessing.load_credit import load_credit

        X, y, sa_index, p_Group, x_control, F = load_credit()
        saf = sa_index[0]
    elif dt == "Compas":
        from DataPreprocessing.load_compas_data import load_compas

        X, y, sa, p_G, x_control, F = load_compas()
        sa_index = [sa[-1], sa[0]]
        p_Group = [p_G[-1], p_G[0]]
    sensitives = [F[v] for v in sa_index]
    in_ts, pred1, f1 = train(X, y)
    results.append(list(get_fairness(sa_index, p_Group, in_ts, pred1, X, y).values()))
    performance.append(get_score(pred1, in_ts, X, y))
    Hx.append(f1)

45175
Features we will be using for classification are: ["workclass_' Federal-gov'", "workclass_' Local-gov'", "workclass_' Private'", "workclass_' Self-emp-inc'", "workclass_' Self-emp-not-inc'", "workclass_' State-gov'", "workclass_' Without-pay'", "education_' 10th'", "education_' 11th'", "education_' 12th'", "education_' 1st-4th'", "education_' 5th-6th'", "education_' 7th-8th'", "education_' 9th'", "education_' Assoc-acdm'", "education_' Assoc-voc'", "education_' Bachelors'", "education_' Doctorate'", "education_' HS-grad'", "education_' Masters'", "education_' Preschool'", "education_' Prof-school'", "education_' Some-college'", "Maritial-status_' Divorced'", "Maritial-status_' Married-AF-spouse'", "Maritial-status_' Married-civ-spouse'", "Maritial-status_' Married-spouse-absent'", "Maritial-status_' Never-married'", "Maritial-status_' Separated'", "Maritial-status_' Widowed'", "occupation_' Adm-clerical'", "occupation_' Armed-Forces'", "occupation_' Craft-repair'", "occupation_' 

In [6]:
Hx[0][0].preference

[0.33, 0.34, 0.33]

In [7]:
from selection import PreferenceSurvival
import numpy as np

In [35]:
objective_values = np.row_stack(Hx[0][0].PF.values())
preference_vectors = np.array(Hx[0][0].preference).reshape(1,-1)
# Add more for testing
preference_vectors = np.row_stack([[0,0.8,0.2], [1.0,0,0], [0,0.5,0.5], preference_vectors])
solutions = Hx[0][0].ob

max_solutions = len(solutions)
optimal_solution_set = PreferenceSurvival(preference_vectors).do(solutions, objective_values)

In [42]:
print(f"{optimal_solution_set.shape[0]}/{max_solutions} final solutions with respect to the {len(preference_vectors)} reference vector")

4/500 final solutions with respect to the 4 reference vector


In [37]:
preference_vectors

array([[0.  , 0.8 , 0.2 ],
       [1.  , 0.  , 0.  ],
       [0.  , 0.5 , 0.5 ],
       [0.33, 0.34, 0.33]])

In [39]:
optimal_solution_set

array([[0.23833333, 0.85628743, 0.06180556],
       [0.23333333, 0.83832335, 0.07575758],
       [0.23333333, 0.83832335, 0.07575758],
       [0.19      , 0.64114727, 0.11157407]])

In [41]:
np.unique(optimal_solution_set, axis=0)

array([[0.19      , 0.64114727, 0.11157407],
       [0.23333333, 0.83832335, 0.07575758],
       [0.23833333, 0.85628743, 0.06180556]])