# Grid Search (Mitigation Algorithm) with Adult/Census Data

Census dataset is used to predict if an individual's income is below or above 50k per year.

In [1]:
from fairlearn.widget import FairlearnDashboard
from sklearn.model_selection import train_test_split
from fairlearn.reductions import GridSearch
from fairlearn.reductions import DemographicParity, ErrorRate

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.datasets import fetch_openml

In [2]:
data = fetch_openml(data_id=1590, as_frame=True)
X_raw = data.data
Y = (data.target == '>50K') * 1
X_raw

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country
0,25.0,Private,226802.0,11th,7.0,Never-married,Machine-op-inspct,Own-child,Black,Male,0.0,0.0,40.0,United-States
1,38.0,Private,89814.0,HS-grad,9.0,Married-civ-spouse,Farming-fishing,Husband,White,Male,0.0,0.0,50.0,United-States
2,28.0,Local-gov,336951.0,Assoc-acdm,12.0,Married-civ-spouse,Protective-serv,Husband,White,Male,0.0,0.0,40.0,United-States
3,44.0,Private,160323.0,Some-college,10.0,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688.0,0.0,40.0,United-States
4,18.0,,103497.0,Some-college,10.0,Never-married,,Own-child,White,Female,0.0,0.0,30.0,United-States
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27.0,Private,257302.0,Assoc-acdm,12.0,Married-civ-spouse,Tech-support,Wife,White,Female,0.0,0.0,38.0,United-States
48838,40.0,Private,154374.0,HS-grad,9.0,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0.0,0.0,40.0,United-States
48839,58.0,Private,151910.0,HS-grad,9.0,Widowed,Adm-clerical,Unmarried,White,Female,0.0,0.0,40.0,United-States
48840,22.0,Private,201490.0,HS-grad,9.0,Never-married,Adm-clerical,Own-child,White,Male,0.0,0.0,20.0,United-States


In [3]:
Y

0        0
1        0
2        1
3        1
4        0
        ..
48837    0
48838    1
48839    0
48840    0
48841    1
Name: class, Length: 48842, dtype: int32

In [5]:
#in this setup, gender is the sensitive data and we are dropping that column from the dataset.
A = X_raw["sex"]
X = X_raw.drop(labels=['sex'], axis=1)
X = pd.get_dummies(X)

sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

le = LabelEncoder()
Y = le.fit_transform(Y)
Y

array([0, 0, 1, ..., 0, 0, 1], dtype=int64)

In [6]:
#train-test split
X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(X_scaled,
                                                                     Y,
                                                                     A,
                                                                     test_size=0.2,
                                                                     random_state=0,
                                                                     stratify=Y)

# Work around indexing bug
X_train = X_train.reset_index(drop=True)
A_train = A_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
A_test = A_test.reset_index(drop=True)


### Training a fairness-unaware predictor

In [7]:
#logistic regression
unmitigated_predictor = LogisticRegression(solver='liblinear', fit_intercept=True)
unmitigated_predictor.fit(X_train, Y_train)

LogisticRegression(solver='liblinear')

In [8]:
#Display of dashboard to investigate disparity
FairlearnDashboard(sensitive_features=A_test, sensitive_feature_names=['sex'],
                   y_true=Y_test,
                   y_pred={"unmitigated": unmitigated_predictor.predict(X_test)})

  warn("The FairlearnDashboard will move from Fairlearn to the "


FairlearnWidget(value={'true_y': [0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x23668ad83d0>

__Result:__ Despite the fact that we removed the feature from the training data, our predictor still discriminates based on sex. This demonstrates that simply ignoring a sensitive feature when fitting a predictor rarely eliminates unfairness. There will generally be enough other features correlated with the removed feature to lead to disparate impact.


### Disparity mitigation with GridSearch algorithm

The user supplies a standard ML estimator to this algorithm, which is treated as a blackbox. GridSearch works by generating a sequence of relabellings and reweightings, and trains a predictor for each. Fairness metric is chosen as _demographic parity_.

In [12]:
#takes ~5 mins to run
sweep = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
                   constraints=DemographicParity(),
                   grid_size=71)
#grid size gives the number of predictors calculated (The number of Lagrange multipliers to generate in the grid)
sweep.fit(X_train, Y_train,
          sensitive_features=A_train)

predictors = sweep.predictors_
predictors

[LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegress

We could load these predictors into the Fairness dashboard now. However, the plot would be somewhat confusing due to their number. In this case, we are going to remove the predictors which are dominated in the error-disparity space by others from the sweep (note that the disparity will only be calculated for the sensitive feature; other potentially sensitive features will not be mitigated). In general, one might not want to do this, since there may be other considerations beyond the strict optimization of error and disparity (of the given sensitive feature). After eliminating the _dominated_ models, we can put the _dominant_ models into the Fairness dashboard, along with the unmitigated model.

In [10]:
errors, disparities = [], []
for m in predictors:
    def classifier(X): return m.predict(X)
    
    error = ErrorRate()
    #load_data loads the specified data into the object.
    error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train)
    disparity = DemographicParity()
    disparity.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train)

    errors.append(error.gamma(classifier)[0])
    disparities.append(disparity.gamma(classifier).max())

all_results = pd.DataFrame({"predictor": predictors, "error": errors, "disparity": disparities})

all_results

Unnamed: 0,predictor,error,disparity
0,LogisticRegression(solver='liblinear'),0.529496,0.458619
1,LogisticRegression(solver='liblinear'),0.525657,0.455768
2,LogisticRegression(solver='liblinear'),0.521357,0.453586
3,LogisticRegression(solver='liblinear'),0.517263,0.451892
4,LogisticRegression(solver='liblinear'),0.512733,0.449115
...,...,...,...
66,LogisticRegression(solver='liblinear'),0.354439,0.464455
67,LogisticRegression(solver='liblinear'),0.356410,0.465320
68,LogisticRegression(solver='liblinear'),0.357869,0.466213
69,LogisticRegression(solver='liblinear'),0.359302,0.466976


In [13]:
non_dominated = []
#itertuples() method will return an iterator yielding a named tuple for each row in the DataFrame. The first element of the tuple will be the row’s corresponding index value, while the remaining values are the row values.
for row in all_results.itertuples():
    errors_for_lower_or_eq_disparity = all_results["error"][all_results["disparity"] <= row.disparity]
    if row.error <= errors_for_lower_or_eq_disparity.min():
        non_dominated.append(row.predictor)
non_dominated

[LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear'),
 LogisticRegression(solver='liblinear')]

In [28]:
dashboard_predicted = {"unmitigated": unmitigated_predictor.predict(X_test)}
for i in range(len(non_dominated)):
    key = "dominant_model_{0}".format(i)
    value = non_dominated[i].predict(X_test)
    dashboard_predicted[key] = value


FairlearnDashboard(sensitive_features=A_test, sensitive_feature_names=['sex'],
                   y_true=Y_test,
                   y_pred=dashboard_predicted)

  warn("The FairlearnDashboard will move from Fairlearn to the "


FairlearnWidget(value={'true_y': [0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x2366ee22a90>

In [42]:
#check the difference between the feature coefficients assigned by the GridSearch algorithm
print(non_dominated[0].coef_)

print(non_dominated[2].coef_)

[[ 3.43014056e-01  6.35720604e-02  3.65265978e-01  2.39166162e+00
   2.61677659e-01  3.40802299e-01  2.57827735e-01 -1.60706732e-02
   1.14264676e-01  1.59363139e-01  1.17218860e-01  5.63478395e-02
  -2.38846959e-02 -7.28564296e-02  1.48404021e-01  3.00457624e-02
  -1.13295810e-01 -7.24179634e-02  1.16840154e-01  4.65145301e-03
   1.47020107e-03 -8.32172035e-02 -1.19561131e-01 -6.02615097e-02
   1.39117118e-01 -3.93332502e-02 -1.05010664e-01  1.02154497e-01
  -3.25104525e-02 -3.37592118e-01  8.14181346e-01 -2.41227819e-01
  -6.05837911e-01 -1.68868540e-01 -6.35956913e-03 -7.86656853e-02
   4.45737100e-02  1.34171021e-01  4.72833726e-02 -1.59154391e-01
   1.24821602e-01  3.21985213e-01  2.59885176e-01 -1.22663270e-01
  -3.39335132e-02  9.68055098e-02 -1.32258367e-01 -6.03028401e-03
  -1.09010767e-01  8.69631268e-02  2.74683156e-02  5.57524765e-01
  -2.12959950e-01 -4.15130229e-01  2.26144285e-01 -9.81704263e-02
   2.61065567e-01  2.16458117e-02  4.53509262e-02 -3.78409849e-02
  -1.71427