In [2]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from aif360.algorithms.inprocessing import GerryFairClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import sklearn

import aif360
from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions\
            import get_distortion_adult, get_distortion_german, get_distortion_compas

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas

from aif360.datasets import AdultDataset, BankDataset, CompasDataset, GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms import preprocessing, inprocessing, postprocessing
from aif360.algorithms.preprocessing.optim_preproc import OptimPreproc
from aif360.algorithms.preprocessing.lfr import LFR
from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools
import copy

from IPython.display import Markdown, display
import warnings
warnings.filterwarnings('ignore')
%load_ext jupyternotify
np.random.seed(1)

<IPython.core.display.Javascript object>

In [3]:
def run_classification_metrics(CM:ClassificationMetric):
    def f1_score(priv=None):
            numer = CM.num_true_positives(privileged=priv)
            denom = CM.num_true_positives(privileged=priv) + 0.5*float(CM.num_false_positives(privileged=priv) + CM.num_false_negatives(privileged=priv))
            return float(numer/denom)
    return np.array([
        round(CM.accuracy(), 4),
        round(f1_score(), 4),
        round(CM.theil_index(), 4),
        round(CM.false_positive_rate(privileged=False), 4),
        round(CM.false_positive_rate(privileged=True), 4),
        round(CM.false_negative_rate(privileged=False), 4),
        round(CM.false_negative_rate(privileged=True), 4),
        round(1-CM.error_rate(privileged=False), 4),
        round(1-CM.error_rate(privileged=True), 4),
        round(CM.false_discovery_rate(privileged=False), 4),
        round(CM.false_discovery_rate(privileged=True), 4),
        round(CM.false_omission_rate(privileged=False), 4),
        round(CM.false_omission_rate(privileged=True), 4),
        
        #all results
        CM.num_true_positives(),
        CM.num_true_negatives(),
        CM.num_false_positives(),
        CM.num_false_negatives(),
        
        #privileged
        CM.num_true_positives(privileged=True),
        CM.num_true_negatives(privileged=True),
        CM.num_false_positives(privileged=True),
        CM.num_false_negatives(privileged=True),
        
        #unprivileged
        CM.num_true_positives(privileged=False),
        CM.num_true_negatives(privileged=False),
        CM.num_false_positives(privileged=False),
        CM.num_false_negatives(privileged=False),
        
        round(f1_score(True), 4),
        round(f1_score(False), 4),
    ])

In [4]:
def run_binary_dataset_metrics(BLDM:BinaryLabelDatasetMetric):
    #print("Consistency: ", BLDM.consistency())
    return np.array([
        round(BLDM.base_rate(privileged=True), 4), # 1 means privileged bias
        round(BLDM.base_rate(privileged=False), 4), # 1 means unprivileged bias
        round(BLDM.consistency()[0], 4)
    ])

In [5]:
def get_model_name(model):
    if isinstance(model, sklearn.linear_model.LogisticRegression):
        return "Logistic Regression"
    if isinstance(model, sklearn.linear_model.LinearRegression):
        return "Linear Regression"
    if isinstance(model, sklearn.ensemble.BaggingClassifier):
        return "Meta Classifier"
    
    if isinstance(model, preprocessing.DisparateImpactRemover):
        return "DIR"
    if isinstance(model, preprocessing.LFR):
        return "LFR"
    if isinstance(model, preprocessing.OptimPreproc):
        return "OP"
    if isinstance(model, preprocessing.Reweighing):
        return "RW"
    
    if isinstance(model, inprocessing.PrejudiceRemover):
        return "PR"
    if isinstance(model, inprocessing.AdversarialDebiasing):
        return "AD"
    if isinstance(model, inprocessing.ARTClassifier):
        return "ARTC"
    if isinstance(model, inprocessing.ExponentiatedGradientReduction):
        return "EGR"
    if isinstance(model, inprocessing.GerryFairClassifier):
        return "GFC"
    if isinstance(model, inprocessing.GridSearchReduction):
        return "GSR"
    if isinstance(model, inprocessing.MetaFairClassifier):
        return "MFC"
    
    if isinstance(model, postprocessing.EqOddsPostprocessing):
        return "EOP"
    if isinstance(model, postprocessing.CalibratedEqOddsPostprocessing):
        return "CEOP"
    if isinstance(model, postprocessing.RejectOptionClassification):
        return "ROC"
    
    return "None"

In [6]:
def get_dataset_name(dataset):
    if isinstance(dataset, aif360.datasets.german_dataset.GermanDataset):
        return "German Dataset"
    if isinstance(dataset, aif360.datasets.adult_dataset.AdultDataset):
        return "Adult Dataset"
    if isinstance(dataset, aif360.datasets.bank_dataset.BankDataset):
        return "Bank Dataset"
    if isinstance(dataset, aif360.datasets.compas_dataset.CompasDataset):
        return "Compas Dataset"

In [43]:
# get specialized dataset for Optimized Preprocessing
def get_OP_dataset(dataset_name):
    dataset = None
    if dataset_name=="bank":
        return
    if dataset_name=="adult":
        dataset = load_preproc_data_adult(['sex'])
    elif dataset_name=="compas":
        dataset = load_preproc_data_compas(['race'])
    elif dataset_name=="german":
        dataset = load_preproc_data_german(['age'])
        dataset.labels = (dataset.labels-1.0).astype('float64')
        dataset.favorable_label = 1.0
        dataset.unfavorable_label = 0.0
        dataset.metadata['label_maps'] = [{1.0: 'Good Credit', 0.0: 'Bad Credit'}]

    np.random.seed(0)
    dataset_train, dataset_test = dataset.split([0.7], shuffle = True)
    return dataset_train, dataset_test

In [44]:
def get_dataset_options(dataset_name):
    optim_options = None
    if dataset_name=="adult":
        optim_options = {
        "distortion_fun": get_distortion_adult,
        "epsilon": 0.05,
        "clist": [0.99, 1.99, 2.99],
        "dlist": [.1, 0.05, 0]
        }
        pro_attr = 'sex'
        return (AdultDataset(), pro_attr, [{'sex': 1}], [{'sex': 0}], optim_options)
    elif dataset_name=="compas":
        optim_options = {
        "distortion_fun": get_distortion_compas,
        "epsilon": 0.05,
        "clist": [0.99, 1.99, 2.99],
        "dlist": [.1, 0.05, 0]
        }
        pro_attr = 'race'
        return (CompasDataset(), pro_attr, [{'race': 1}], [{'race': 0}], optim_options)
    elif dataset_name=="bank":
        pro_attr = 'age'
        return (BankDataset(protected_attribute_names=['age'],
            privileged_classes=[lambda x: x >= 25], 
            features_to_drop=['day_of_week']), pro_attr, [{'age': 1}], [{'age': 0}], None)
    elif dataset_name=="german":
        optim_options = {
            "distortion_fun": get_distortion_german,
            "epsilon": 0.1,
            "clist": [0.99, 1.99, 2.99],
            "dlist": [.1, 0.05, 0]
        }   
        pro_attr = 'age'
        label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
        g = GermanDataset(metadata={'label_maps': [label_map]})
        g.labels = (g.labels-1.0).astype('float64')
        g.favorable_label = 1.0
        g.unfavorable_label = 0.0
        #g.metadata['label_maps'] = [label_map]

        # load_preproc_data_german(['age'])
        return (g, pro_attr, 
                    [{'age': 1}], [{'age': 0}], optim_options)

In [58]:
def analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, classifier=None, 
                 preprocessing_algo=None, inprocessing_algo=None, postprocessing_algo=None):
    print(get_model_name(preprocessing_algo), get_model_name(inprocessing_algo), get_model_name(postprocessing_algo))
    model = sklearn.linear_model.LogisticRegression() # solver='liblinear', class_weight='balanced', 
    
    dataset_train_pred = dataset_train.copy(deepcopy=True)
    dataset_test_pred = dataset_test.copy(deepcopy=True)
            
    if preprocessing_algo is not None:
        dataset_train_pred = preprocessing_algo.fit_transform(dataset_train_pred)
        dataset_train_pred = dataset_train.align_datasets(dataset_train_pred)
    
    if inprocessing_algo is not None:
        inp = inprocessing_algo
        inp.fit(dataset_train_pred)
        dataset_train_pred = inp.predict(dataset_train_pred)
        dataset_test_pred = inp.predict(dataset_test_pred) 
    else:
        model.fit(dataset_train_pred.features, dataset_train_pred.labels)   # .ravel()
        fav_idx = np.where(model.classes_ == dataset_train.favorable_label)[0][0]
        dataset_train_pred.scores = model.predict_proba(dataset_train_pred.features)[:,fav_idx].reshape(-1,1) 
        dataset_train_pred.labels = model.predict(dataset_train_pred.features).reshape(-1,1) 
        dataset_test_pred.scores = model.predict_proba(dataset_test_pred.features)[:,fav_idx].reshape(-1,1) 
        dataset_test_pred.labels = model.predict(dataset_test_pred.features).reshape(-1,1)  
            
    if postprocessing_algo is not None:
        dataset_train_pred.features = dataset_train.features
        pp = postprocessing_algo
        pp = pp.fit(dataset_train, dataset_train_pred)
        dataset_test_pred = pp.predict(dataset_test_pred)

    CM = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    
    BLDM = BinaryLabelDatasetMetric(dataset_test_pred,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    name = ""
    if preprocessing_algo is not None:
        name += get_model_name(preprocessing_algo) + " + "
    if inprocessing_algo is not None:
        name += get_model_name(inprocessing_algo) + " + "
    if postprocessing_algo is not None:
        name += get_model_name(postprocessing_algo)
    if name == "":
        name = get_model_name(model)
        
    if name.endswith(" + "):
        lastIndex = name.rindex(" + ")
        name = name[:lastIndex]
    
    #print(run_classification_metrics(CM))
    #print(run_binary_dataset_metrics(BLDM))
    metrics = np.concatenate((run_classification_metrics(CM), run_binary_dataset_metrics(BLDM)))
       
    return {"key":name, "val":metrics}

In [60]:
#datasets = ["compas", "german", "bank", "adult"]
datasets = ["adult"]
for dataset_name in datasets:
    dataset, pro_attr, privileged_groups, unprivileged_groups, optim_options = get_dataset_options(dataset_name)
    
    print("DATASET NAME: ", dataset_name)
    np.random.seed(0)
    dataset_train, dataset_test = dataset.split([0.7], shuffle = True)

    preprocessing_algos = [preprocessing.DisparateImpactRemover(sensitive_attribute=pro_attr),
                           None,
                           #LFR(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, k=10, Ax=0.1, Ay=1.0, Az=2.0, verbose=0),
                          OptimPreproc(OptTools, optim_options, unprivileged_groups = unprivileged_groups, privileged_groups = privileged_groups),
                          #preprocessing.Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups),
                           ]
                          
    inprocessing_algos = [None,
                           #inprocessing.AdversarialDebiasing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=0),
                          GerryFairClassifier(),
                          inprocessing.PrejudiceRemover(sensitive_attr=pro_attr),
                          inprocessing.ExponentiatedGradientReduction(LogisticRegression(), constraints="DemographicParity", drop_prot_attr=False),
                          inprocessing.GridSearchReduction(LogisticRegression(), prot_attr=pro_attr, constraints="DemographicParity", drop_prot_attr=False),
                          ]

    postprocessing_algos = [None,
                            postprocessing.EqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=0),
                            postprocessing.CalibratedEqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=0),
                            postprocessing.RejectOptionClassification(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups),  
                            ]

    df = {}

    for pre in preprocessing_algos:
        for inproc in inprocessing_algos:
            for post in postprocessing_algos:    
                
                # get specialized dataset for Optimized Preprocessing technique
                if get_model_name(pre)=="OP" and dataset_name!="bank":
                    print("Specialized function: ", dataset_name)
                    dataset_train, dataset_test = get_OP_dataset(dataset_name) 
                
                try:
                    res = analyze_algo(dataset_train, dataset_test, privileged_groups, unprivileged_groups, 
                             preprocessing_algo=copy.deepcopy(pre), 
                             inprocessing_algo=copy.deepcopy(inproc),
                             postprocessing_algo=copy.deepcopy(post))
                    df[res["key"]] = res["val"]

                except KeyboardInterrupt:
                    raise KeyboardInterrupt()
                except Exception as e:
                    print("FAILED: " + get_model_name(pre) + ", " + get_model_name(inproc) + ", " + get_model_name(post) + " on dataset " + get_dataset_name(dataset), e)

    df = pd.DataFrame.from_dict(df)                
    df.index = ["Accuracy", "F1 Score", "Theil Index",
                    "False Positive Rate - Unprivileged", "False Positive Rate - Privileged",
                    "False Negative Rate - Unprivileged", "False Negative Rate - Privileged",
                    "Accuracy - Unprivileged", "Accuracy - Privileged",
                    "False Discovery Rate - Unprivileged", "False Discovery Rate - Privileged",
                    "False Omission Rate - Unprivileged", "False Omission Rate - Privileged",
                    "Num True Pos", "Num True Neg", "Num False Pos", "Num False Neg",
                    "Num True Pos - Privileged", "Num True Neg - Privileged", "Num False Pos - Privileged", "Num False Neg - Privileged",
                    "Num True Pos - Unprivileged", "Num True Neg - Unprivileged", "Num False Pos - Unprivileged", "Num False Neg - Unprivileged",
                    "F1 Score - Privileged", "F1 Score - Unprivileged",
                    "Privileged base Rate", "Unprivileged base Rate", "Consistency"]

    df = df.T
    df.to_csv("Data -"+ dataset_name +".csv", sep=',', encoding='utf-8')
    display(df)        



DATASET NAME:  adult
DIR None None
DIR None EOP
DIR None CEOP
DIR None ROC
DIR GFC None
DIR GFC EOP
DIR GFC CEOP
DIR GFC ROC
DIR PR None
DIR PR EOP
DIR PR CEOP
DIR PR ROC
DIR EGR None
DIR EGR EOP
DIR EGR CEOP
DIR EGR ROC
DIR GSR None
DIR GSR EOP
DIR GSR CEOP
DIR GSR ROC
None None None
None None EOP
None None CEOP
None None ROC
None GFC None
None GFC EOP
None GFC CEOP
None GFC ROC
None PR None
None PR EOP
None PR CEOP
None PR ROC
None EGR None
None EGR EOP
None EGR CEOP
None EGR ROC
None GSR None
None GSR EOP
None GSR CEOP
None GSR ROC
Specialized function:  adult
OP None None
Optimized Preprocessing: Objective converged to 0.011652
Specialized function:  adult
OP None EOP
Optimized Preprocessing: Objective converged to 0.011652
Specialized function:  adult
OP None CEOP
Optimized Preprocessing: Objective converged to 0.011652
Specialized function:  adult
OP None ROC
Optimized Preprocessing: Objective converged to 0.011652
Specialized function:  adult
OP GFC None
Optimized Preprocessing:

Unnamed: 0,Accuracy,F1 Score,Theil Index,False Positive Rate - Unprivileged,False Positive Rate - Privileged,False Negative Rate - Unprivileged,False Negative Rate - Privileged,Accuracy - Unprivileged,Accuracy - Privileged,False Discovery Rate - Unprivileged,...,Num False Neg - Privileged,Num True Pos - Unprivileged,Num True Neg - Unprivileged,Num False Pos - Unprivileged,Num False Neg - Unprivileged,F1 Score - Privileged,F1 Score - Unprivileged,Privileged base Rate,Unprivileged base Rate,Consistency
DIR,0.8432,0.6549,0.1238,0.0297,0.1105,0.4487,0.3844,0.9231,0.8048,0.2974,...,1089.0,274.0,3795.0,116.0,223.0,0.6611,0.6178,0.2667,0.0885,0.9253
DIR + EOP,0.8161,0.6023,0.1398,0.1048,0.1018,0.4125,0.4363,0.8605,0.7947,0.584,...,1236.0,292.0,3501.0,410.0,205.0,0.6295,0.4871,0.2447,0.1593,0.8795
DIR + CEOP,0.8316,0.6042,0.144,0.0,0.1105,1.0,0.3844,0.8873,0.8048,0.0,...,1089.0,0.0,3911.0,0.0,497.0,0.6611,0.0,0.2667,0.0,0.9218
DIR + ROC,0.7873,0.6302,0.1089,0.2373,0.1718,0.0905,0.2916,0.7793,0.7911,0.6725,...,826.0,452.0,2983.0,928.0,45.0,0.6772,0.4816,0.3378,0.3131,0.8788
DIR + GFC,0.2454,0.3942,0.034,1.0,1.0,0.0,0.0,0.1127,0.3093,0.8873,...,0.0,497.0,0.0,3911.0,0.0,0.4725,0.2027,1.0,1.0,1.0
DIR + GFC + EOP,0.5053,0.3365,0.1871,0.5004,0.4942,0.501,0.4868,0.4995,0.5081,0.8875,...,1379.0,248.0,1954.0,1957.0,249.0,0.3923,0.1836,0.5001,0.5002,0.5988
DIR + GFC + CEOP,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,497.0,3911.0,0.0,0.0,1.0,1.0,0.3093,0.1127,0.8464
DIR + GFC + ROC,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,497.0,3911.0,0.0,0.0,1.0,1.0,0.3093,0.1127,0.8464
DIR + PR,0.8448,0.6524,0.1258,0.0215,0.1053,0.5151,0.3879,0.9229,0.8073,0.2585,...,1099.0,241.0,3827.0,84.0,256.0,0.6627,0.5864,0.262,0.0737,0.9204
DIR + PR + EOP,0.8104,0.5647,0.1555,0.0903,0.088,0.495,0.4998,0.8641,0.7846,0.5844,...,1416.0,251.0,3558.0,353.0,246.0,0.5896,0.4559,0.2155,0.137,0.8698


<h3>EXTRA</h3>

In [57]:
dataset_name = "german"
dataset, pro_attr, privileged_groups, unprivileged_groups, optim_options = get_dataset_options(dataset_name)

np.random.seed(0)
dataset_train, dataset_test = dataset.split([0.7], shuffle = True)

#print("Specialized function: ", dataset_name)
#dataset_train, dataset_test = get_OP_dataset(dataset_name)

dataset_train_pred = dataset_train.copy(deepcopy=True)
dataset_test_pred = dataset_test.copy(deepcopy=True)

#inp = LFR(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, k=10, Ax=0.1, Ay=1.0, Az=2.0, verbose=0)
inp = preprocessing.DisparateImpactRemover(sensitive_attribute=pro_attr)
#inp = OptimPreproc(OptTools, optim_options, unprivileged_groups = unprivileged_groups, privileged_groups = privileged_groups)
dataset_train_pred = inp.fit_transform(dataset_train_pred)

'''
X_train = dataset_train_pred.features
y_train = dataset_train_pred.labels #.ravel()
model = LogisticRegression()  
model.fit(X_train, y_train)
fav_idx = np.where(model.classes_ == dataset_train.favorable_label)[0][0]
dataset_train_pred.scores = model.predict_proba(dataset_train_pred.features)[:,fav_idx].reshape(-1,1) 
dataset_train_pred.labels = model.predict(dataset_train_pred.features).reshape(-1,1)
dataset_test_pred.scores = model.predict_proba(dataset_test_pred.features)[:,fav_idx].reshape(-1,1) 
dataset_test_pred.labels = model.predict(dataset_test_pred.features).reshape(-1,1)     
'''

inp = inprocessing.GridSearchReduction(LogisticRegression(), prot_attr=pro_attr, constraints="DemographicParity", drop_prot_attr=False)
#inp = inprocessing.PrejudiceRemover(sensitive_attr=pro_attr)
#inp = GerryFairClassifier()
inp.fit(dataset_train_pred)
dataset_train_pred = inp.predict(dataset_train_pred)
dataset_test_pred = inp.predict(dataset_test_pred) 
#dataset_train_pred.features = dataset_train.features

#pp = postprocessing.RejectOptionClassification(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
#pp = postprocessing.EqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=0)
#pp = pp.fit(dataset_train, dataset_train_pred)
#dataset_test_pred = pp.predict(dataset_test_pred)


CM = ClassificationMetric(dataset_test,
                          dataset_test_pred,
                          unprivileged_groups=unprivileged_groups,
                          privileged_groups=privileged_groups)
    
BLDM = BinaryLabelDatasetMetric(dataset_test_pred,
                                unprivileged_groups=unprivileged_groups,
                                privileged_groups=privileged_groups)

print(run_classification_metrics(CM))
print(run_binary_dataset_metrics(BLDM))

[7.700e-01 5.548e-01 1.979e-01 1.034e-01 1.050e-01 6.923e-01 4.935e-01
 7.143e-01 7.791e-01 4.286e-01 3.276e-01 2.571e-01 1.900e-01 4.300e+01
 1.880e+02 2.200e+01 4.700e+01 3.900e+01 1.620e+02 1.900e+01 3.800e+01
 4.000e+00 2.600e+01 3.000e+00 9.000e+00 5.778e-01 4.000e-01]
[0.2248 0.1667 0.7327]


In [52]:
#dataset_train.metadata  #.labels.ravel()
CM.binary_confusion_matrix()

{'TP': 0.0, 'FP': 0.0, 'TN': 210.0, 'FN': 90.0}

In [30]:
dataset = GermanDataset()
dataset.metadata

{'label_maps': [{1.0: 'Good Credit', 2.0: 'Bad Credit'}],
 'protected_attribute_maps': [{1.0: 'Male', 0.0: 'Female'},
  {1.0: 'Old', 0.0: 'Young'}],
 'transformer': 'GermanDataset.__init__',
 'params': {'df':      month  credit_amount  investment_as_income_percentage  residence_since  \
  0      6.0         1169.0                              4.0              4.0   
  1     48.0         5951.0                              2.0              2.0   
  2     12.0         2096.0                              2.0              3.0   
  3     42.0         7882.0                              2.0              4.0   
  4     24.0         4870.0                              3.0              4.0   
  ..     ...            ...                              ...              ...   
  995   12.0         1736.0                              3.0              4.0   
  996   30.0         3857.0                              4.0              4.0   
  997   12.0          804.0                              4.0    

In [108]:
from sklearn.metrics import accuracy_score
accuracy_score(dataset_test.labels, dataset_test_pred.labels)

0.7566666666666667

In [79]:
dataset_test_pred.labels = dataset_test_pred.labels.astype('float64') #.ravel()

In [86]:
dataset_test_pred.labels

array([1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0.

In [87]:
dataset_test.labels

array([1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1.,
       0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0.,
       0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0.,
       1., 1., 0., 1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0.,
       0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0.,
       1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1.,
       0., 0., 0., 0., 0.

In [88]:
#type(dataset_test_pred.labels[0][0])
type(dataset_test)

aif360.datasets.german_dataset.GermanDataset

In [54]:
dataset = GermanDataset()
dataset.labels = dataset.labels-1
inp = inprocessing.GridSearchReduction(LogisticRegression(), prot_attr=pro_attr, constraints="DemographicParity", drop_prot_attr=False)
inp.fit(dataset)

<aif360.algorithms.inprocessing.grid_search_reduction.GridSearchReduction at 0x24a2689c850>