In [1]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from aif360.algorithms.inprocessing import GerryFairClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import sklearn

import aif360
from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions\
            import get_distortion_adult, get_distortion_german, get_distortion_compas

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas

from aif360.datasets import AdultDataset, BankDataset, CompasDataset, GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms import preprocessing, inprocessing, postprocessing
from aif360.algorithms.preprocessing.optim_preproc import OptimPreproc
from aif360.algorithms.preprocessing.lfr import LFR
from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools
import copy

from IPython.display import Markdown, display
import warnings
warnings.filterwarnings('ignore')
%load_ext jupyternotify
np.random.seed(1)

<IPython.core.display.Javascript object>

In [2]:
def run_classification_metrics(CM:ClassificationMetric):
    def f1_score(priv=None):
            numer = CM.num_true_positives(privileged=priv)
            denom = CM.num_true_positives(privileged=priv) + 0.5*float(CM.num_false_positives(privileged=priv) + CM.num_false_negatives(privileged=priv))
            return float(numer/denom)
    return np.array([
        round(CM.accuracy(), 4),
        round(f1_score(), 4),
        round(CM.theil_index(), 4),
        round(CM.false_positive_rate(privileged=False), 4),
        round(CM.false_positive_rate(privileged=True), 4),
        round(CM.false_negative_rate(privileged=False), 4),
        round(CM.false_negative_rate(privileged=True), 4),
        round(1-CM.error_rate(privileged=False), 4),
        round(1-CM.error_rate(privileged=True), 4),
        round(CM.false_discovery_rate(privileged=False), 4),
        round(CM.false_discovery_rate(privileged=True), 4),
        round(CM.false_omission_rate(privileged=False), 4),
        round(CM.false_omission_rate(privileged=True), 4),
        
        #all results
        CM.num_true_positives(),
        CM.num_true_negatives(),
        CM.num_false_positives(),
        CM.num_false_negatives(),
        
        #privileged
        CM.num_true_positives(privileged=True),
        CM.num_true_negatives(privileged=True),
        CM.num_false_positives(privileged=True),
        CM.num_false_negatives(privileged=True),
        
        #unprivileged
        CM.num_true_positives(privileged=False),
        CM.num_true_negatives(privileged=False),
        CM.num_false_positives(privileged=False),
        CM.num_false_negatives(privileged=False),
        
        round(f1_score(True), 4),
        round(f1_score(False), 4),
    ])

In [3]:
def run_binary_dataset_metrics(BLDM:BinaryLabelDatasetMetric):
    #print("Consistency: ", BLDM.consistency())
    return np.array([
        round(BLDM.base_rate(privileged=True), 4), # 1 means privileged bias
        round(BLDM.base_rate(privileged=False), 4), # 1 means unprivileged bias
        round(BLDM.consistency()[0], 4)
    ])

In [4]:
def get_model_name(model):
    if isinstance(model, sklearn.linear_model.LogisticRegression):
        return "Logistic Regression"
    if isinstance(model, sklearn.linear_model.LinearRegression):
        return "Linear Regression"
    if isinstance(model, sklearn.ensemble.BaggingClassifier):
        return "Meta Classifier"
    
    if isinstance(model, preprocessing.DisparateImpactRemover):
        return "DIR"
    if isinstance(model, preprocessing.LFR):
        return "LFR"
    if isinstance(model, preprocessing.OptimPreproc):
        return "OP"
    if isinstance(model, preprocessing.Reweighing):
        return "RW"
    
    if isinstance(model, inprocessing.PrejudiceRemover):
        return "PR"
    if isinstance(model, inprocessing.AdversarialDebiasing):
        return "AD"
    if isinstance(model, inprocessing.ARTClassifier):
        return "ARTC"
    if isinstance(model, inprocessing.ExponentiatedGradientReduction):
        return "EGR"
    if isinstance(model, inprocessing.GerryFairClassifier):
        return "GFC"
    if isinstance(model, inprocessing.GridSearchReduction):
        return "GSR"
    if isinstance(model, inprocessing.MetaFairClassifier):
        return "MFC"
    
    if isinstance(model, postprocessing.EqOddsPostprocessing):
        return "EOP"
    if isinstance(model, postprocessing.CalibratedEqOddsPostprocessing):
        return "CEOP"
    if isinstance(model, postprocessing.RejectOptionClassification):
        return "ROC"
    
    return "None"

In [5]:
def get_dataset_name(dataset):
    if isinstance(dataset, aif360.datasets.german_dataset.GermanDataset):
        return "German Dataset"
    if isinstance(dataset, aif360.datasets.adult_dataset.AdultDataset):
        return "Adult Dataset"
    if isinstance(dataset, aif360.datasets.bank_dataset.BankDataset):
        return "Bank Dataset"
    if isinstance(dataset, aif360.datasets.compas_dataset.CompasDataset):
        return "Compas Dataset"

In [32]:
# get specialized dataset for Optimized Preprocessing
def get_OP_dataset(dataset_name):
    dataset = None
    if dataset_name=="bank":
        return
    if dataset_name=="adult":
        dataset = load_preproc_data_adult(['sex'])
    elif dataset_name=="compas":
        dataset = load_preproc_data_compas(['race'])
    elif dataset_name=="german":
        dataset = load_preproc_data_german(['age'])
        dataset.labels = (dataset.labels-1.0).astype('float64')
        dataset.favorable_label = 1.0
        dataset.unfavorable_label = 0.0
        dataset.metadata['label_maps'] = [{1.0: 'Good Credit', 0.0: 'Bad Credit'}]
    return dataset

In [30]:
def get_dataset_options(dataset_name):
    optim_options = None
    if dataset_name=="adult":
        optim_options = {
        "distortion_fun": get_distortion_adult,
        "epsilon": 0.05,
        "clist": [0.99, 1.99, 2.99],
        "dlist": [.1, 0.05, 0]
        }
        pro_attr = 'sex'
        return (AdultDataset(), pro_attr, [{'sex': 1}], [{'sex': 0}], optim_options)
    elif dataset_name=="compas":
        optim_options = {
        "distortion_fun": get_distortion_compas,
        "epsilon": 0.05,
        "clist": [0.99, 1.99, 2.99],
        "dlist": [.1, 0.05, 0]
        }
        pro_attr = 'race'
        return (CompasDataset(), pro_attr, [{'race': 1}], [{'race': 0}], optim_options)
    elif dataset_name=="bank":
        pro_attr = 'age'
        return (BankDataset(protected_attribute_names=['age'],
            privileged_classes=[lambda x: x >= 25], 
            features_to_drop=['day_of_week']), pro_attr, [{'age': 1}], [{'age': 0}], None)
    elif dataset_name=="german":
        optim_options = {
            "distortion_fun": get_distortion_german,
            "epsilon": 0.1,
            "clist": [0.99, 1.99, 2.99],
            "dlist": [.1, 0.05, 0]
        }   
        pro_attr = 'age'
        label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
        g = GermanDataset(metadata={'label_maps': [label_map]})
        g.labels = (g.labels-1.0).astype('float64')
        g.favorable_label = 1.0
        g.unfavorable_label = 0.0
        #g.metadata['label_maps'] = [label_map]

        # load_preproc_data_german(['age'])
        return (g, pro_attr, 
                    [{'age': 1}], [{'age': 0}], optim_options)

In [166]:
def execute_intervention(dataset, dataset_name, privileged_groups, unprivileged_groups, 
                 preprocessing_algo=None, inprocessing_algo=None, postprocessing_algo=None, seed=123):
    print(seed, get_model_name(preprocessing_algo), get_model_name(inprocessing_algo), get_model_name(postprocessing_algo))
    
    # get specialized dataset for Optimized Preprocessing technique
    if get_model_name(preprocessing_algo)=="OP" and dataset_name!="bank":
        print("Specialized function: ", dataset_name)
        dataset = get_OP_dataset(dataset_name) 

    np.random.seed(seed)
    dataset_train, dataset_test = dataset.split([0.7], shuffle = True, seed=seed)

    model = sklearn.linear_model.LogisticRegression() # solver='liblinear', class_weight='balanced', 
    
    dataset_train_pred = dataset_train.copy(deepcopy=True)
    dataset_test_pred = dataset_test.copy(deepcopy=True)
            
    if preprocessing_algo is not None:
        dataset_train_pred = preprocessing_algo.fit_transform(dataset_train_pred)
        dataset_train_pred = dataset_train.align_datasets(dataset_train_pred)
    
    if inprocessing_algo is not None:
        inp = inprocessing_algo
        inp.fit(dataset_train_pred)
        dataset_train_pred = inp.predict(dataset_train_pred)
        dataset_test_pred = inp.predict(dataset_test_pred) 
        
        # exception for GFC 
        if get_model_name(inprocessing_algo)=="GFC":
            dataset_train_pred.scores = inp.predict(dataset_train_pred, threshold=None).labels
            dataset_test_pred.scores = inp.predict(dataset_test_pred, threshold=None).labels
            dataset_train_pred.labels = inp.predict(dataset_train_pred, threshold=0.5).labels
            dataset_test_pred.labels = inp.predict(dataset_test_pred, threshold=0.5).labels

    else:
        model.fit(dataset_train_pred.features, dataset_train_pred.labels)   # .ravel()
        fav_idx = np.where(model.classes_ == dataset_train.favorable_label)[0][0]
        dataset_train_pred.scores = model.predict_proba(dataset_train_pred.features)[:,fav_idx].reshape(-1,1) 
        dataset_train_pred.labels = model.predict(dataset_train_pred.features).reshape(-1,1) 
        dataset_test_pred.scores = model.predict_proba(dataset_test_pred.features)[:,fav_idx].reshape(-1,1) 
        dataset_test_pred.labels = model.predict(dataset_test_pred.features).reshape(-1,1)  
            
    if postprocessing_algo is not None:
        dataset_train_pred.features = dataset_train.features
        pp = postprocessing_algo
        pp = pp.fit(dataset_train, dataset_train_pred)
        dataset_test_pred = pp.predict(dataset_test_pred)

    CM = ClassificationMetric(dataset_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)
    
    BLDM = BinaryLabelDatasetMetric(dataset_test_pred,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    name = ""
    if preprocessing_algo is not None:
        name += get_model_name(preprocessing_algo) + " + "
    if inprocessing_algo is not None:
        name += get_model_name(inprocessing_algo) + " + "
    if postprocessing_algo is not None:
        name += get_model_name(postprocessing_algo)
    if name == "":
        name = get_model_name(model)
        
    if name.endswith(" + "):
        lastIndex = name.rindex(" + ")
        name = name[:lastIndex]
    
    #print(run_classification_metrics(CM))
    #print(run_binary_dataset_metrics(BLDM))
    metrics = np.concatenate((run_classification_metrics(CM), run_binary_dataset_metrics(BLDM)))
       
    return {"key":name, "val":metrics}

In [None]:
#datasets = ["compas", "german", "bank", "adult"]
datasets = ["compas", "german"]
for dataset_name in datasets:
    print("DATASET NAME: ", dataset_name)
    dataset, pro_attr, privileged_groups, unprivileged_groups, optim_options = get_dataset_options(dataset_name)
    
    preprocessing_algos = [preprocessing.DisparateImpactRemover(sensitive_attribute=pro_attr),
                           None,
                           #LFR(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, k=10, Ax=0.1, Ay=1.0, Az=2.0, verbose=0),
                           OptimPreproc(OptTools, optim_options, unprivileged_groups = unprivileged_groups, privileged_groups = privileged_groups),
                          #preprocessing.Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups),
                           ]
                          
    inprocessing_algos = [None,
                           #inprocessing.AdversarialDebiasing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=seed),
                          GerryFairClassifier(),
                          inprocessing.PrejudiceRemover(sensitive_attr=pro_attr),
                          inprocessing.ExponentiatedGradientReduction(LogisticRegression(), constraints="DemographicParity", drop_prot_attr=False),
                          inprocessing.GridSearchReduction(LogisticRegression(), prot_attr=pro_attr, constraints="DemographicParity", drop_prot_attr=False),
                          ]

    postprocessing_algos = [None,
                            postprocessing.EqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=0),
                            postprocessing.CalibratedEqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=0),
                            #postprocessing.RejectOptionClassification(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups),  
                            ]

    df = {}
    all_data = {}

    for pre in preprocessing_algos:
        for inproc in inprocessing_algos:
            for post in postprocessing_algos:    
                kfold_data = []   # store data for each fold
                col_name = None
                # k-fold cross vaidation - Repeat the process 3 times
                for seed in [7]:  #[3, 5, 11, 17, 29]:
                    try:
                        res = execute_intervention(dataset, dataset_name, privileged_groups, unprivileged_groups, 
                                 preprocessing_algo=copy.deepcopy(pre), 
                                 inprocessing_algo=copy.deepcopy(inproc),
                                 postprocessing_algo=copy.deepcopy(post), seed=seed)
                        col_name = res["key"]
                        kfold_data.append(res["val"])
                        all_data[col_name+" - "+str(seed)] = res["val"]
                        
                    except KeyboardInterrupt:
                        raise KeyboardInterrupt()
                    except Exception as e:
                        print("FAILED: " + get_model_name(pre) + ", " + get_model_name(inproc) + ", " + get_model_name(post) + " on dataset " + get_dataset_name(dataset), e)

                df[col_name] = np.array(kfold_data).mean(axis=0).tolist()
    
    df = pd.DataFrame.from_dict(df)
    all_data = pd.DataFrame.from_dict(all_data)
    df.index = all_data.index = ["Accuracy", "F1 Score", "Theil Index",
                    "False Positive Rate - Unprivileged", "False Positive Rate - Privileged",
                    "False Negative Rate - Unprivileged", "False Negative Rate - Privileged",
                    "Accuracy - Unprivileged", "Accuracy - Privileged",
                    "False Discovery Rate - Unprivileged", "False Discovery Rate - Privileged",
                    "False Omission Rate - Unprivileged", "False Omission Rate - Privileged",
                    "Num True Pos", "Num True Neg", "Num False Pos", "Num False Neg",
                    "Num True Pos - Privileged", "Num True Neg - Privileged", "Num False Pos - Privileged", "Num False Neg - Privileged",
                    "Num True Pos - Unprivileged", "Num True Neg - Unprivileged", "Num False Pos - Unprivileged", "Num False Neg - Unprivileged",
                    "F1 Score - Privileged", "F1 Score - Unprivileged",
                    "Privileged base Rate", "Unprivileged base Rate", "Consistency"]

    df = df.T
    all_data = all_data.T
    df.to_csv("./data/"+ dataset_name +".csv", sep=',', encoding='utf-8')
    all_data.to_csv("./data/All_Data -"+ dataset_name +".csv", sep=',', encoding='utf-8')
    display(df)        



DATASET NAME:  compas
7 DIR None None
7 DIR None EOP
7 DIR None CEOP


<h3>EXTRA</h3>

In [89]:
#all_data  #.mean(axis=0)
#g = GermanDataset()
#g.split([0.7], shuffle = True, seed=17)[0].convert_to_dataframe()[0]

In [None]:
DIR + GFC + CEOP
DIR + GFC + ROC
GFC + CEOP
GFC + ROC
OP + GFC + ROC

In [157]:
dataset_name = "adult"
dataset, pro_attr, privileged_groups, unprivileged_groups, optim_options = get_dataset_options(dataset_name)
dataset_train, dataset_test = dataset.split([0.7], shuffle = True, seed=7)

#print("Specialized function: ", dataset_name)
#dataset_train, dataset_test = get_OP_dataset(dataset_name)

dataset_train_pred = dataset_train.copy(deepcopy=True)
dataset_test_pred = dataset_test.copy(deepcopy=True)

#pre = LFR(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, k=10, Ax=0.1, Ay=1.0, Az=2.0, verbose=0)
#pre = preprocessing.DisparateImpactRemover(sensitive_attribute=pro_attr)
#pre = OptimPreproc(OptTools, optim_options, unprivileged_groups = unprivileged_groups, privileged_groups = privileged_groups)
#dataset_train_pred = pre.fit_transform(dataset_train_pred)

'''
X_train = dataset_train_pred.features
y_train = dataset_train_pred.labels #.ravel()
model = LogisticRegression()  
model.fit(X_train, y_train)
fav_idx = np.where(model.classes_ == dataset_train.favorable_label)[0][0]
dataset_train_pred.scores = model.predict_proba(dataset_train_pred.features)[:,fav_idx].reshape(-1,1) 
dataset_train_pred.labels = model.predict(dataset_train_pred.features).reshape(-1,1)
dataset_test_pred.scores = model.predict_proba(dataset_test_pred.features)[:,fav_idx].reshape(-1,1) 
dataset_test_pred.labels = model.predict(dataset_test_pred.features).reshape(-1,1)     
'''

#inp = inprocessing.GridSearchReduction(LogisticRegression(), prot_attr=pro_attr, constraints="DemographicParity", drop_prot_attr=False)
#inp = inprocessing.PrejudiceRemover(sensitive_attr=pro_attr)
inp = GerryFairClassifier()
inp.fit(dataset_train_pred)


dataset_train_pred.scores = inp.predict(dataset_train_pred, threshold=None).labels
dataset_test_pred.scores = inp.predict(dataset_test_pred, threshold=None).labels

dataset_train_pred.scores = inp.predict(dataset_train_pred, threshold=None).labels
dataset_test_pred.scores = inp.predict(dataset_test_pred, threshold=None).labels
dataset_train_pred.labels = inp.predict(dataset_train_pred, threshold=0.5).labels
dataset_test_pred.labels = inp.predict(dataset_test_pred, threshold=0.5).labels


dataset_train_pred = inp.predict(dataset_train_pred)
dataset_test_pred = inp.predict(dataset_test_pred)

'''
y_train_pred = np.zeros_like(dataset_train_pred.scores)
y_train_pred[dataset_train_pred.scores >= class_thresh] = dataset_train_pred.favorable_label
y_train_pred[~(dataset_train_pred.scores >= class_thresh)] = dataset_train_pred.unfavorable_label
dataset_train_pred.labels = y_train_pred

y_test_pred = np.zeros_like(dataset_test_pred.scores)
y_test_pred[dataset_test_pred.scores >= class_thresh] = dataset_test_pred.favorable_label
y_test_pred[~(dataset_test_pred.scores >= class_thresh)] = dataset_test_pred.unfavorable_label
dataset_test_pred.labels = y_test_pred
#dataset_test_pred.labels = y_test_pred#dataset_train_pred.features = dataset_train.features
'''

#pp = postprocessing.CalibratedEqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=7)
#pp = postprocessing.RejectOptionClassification(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
#pp = postprocessing.EqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=0)
#dataset_train_pred.features = dataset_train.features
#pp = pp.fit(dataset_train, dataset_train_pred)
#dataset_test_pred = pp.predict(dataset_test_pred)


CM = ClassificationMetric(dataset_test,
                          dataset_test_pred,
                          unprivileged_groups=unprivileged_groups,
                          privileged_groups=privileged_groups)
    
BLDM = BinaryLabelDatasetMetric(dataset_test_pred,
                                unprivileged_groups=unprivileged_groups,
                                privileged_groups=privileged_groups)

print(run_classification_metrics(CM))
print(run_binary_dataset_metrics(BLDM))



[2.4710e-01 3.9630e-01 3.4200e-02 1.0000e+00 1.0000e+00 0.0000e+00
 0.0000e+00 1.0880e-01 3.1140e-01 8.9120e-01 6.8860e-01 0.0000e+00
 0.0000e+00 3.3530e+03 0.0000e+00 1.0214e+04 0.0000e+00 2.8850e+03
 0.0000e+00 6.3800e+03 0.0000e+00 4.6800e+02 0.0000e+00 3.8340e+03
 0.0000e+00 4.7490e-01 1.9620e-01]
[1. 1. 1.]


In [165]:
CM.binary_confusion_matrix()

{'TP': 3353.0, 'FP': 10214.0, 'TN': 0.0, 'FN': 0.0}

In [160]:
dataset_train_pred.scores

array([[0.66666667],
       [0.66666667],
       [0.88888889],
       ...,
       [0.88888889],
       [0.66666667],
       [0.88888889]])

In [164]:
dataset_train_pred.labels

array([[1],
       [1],
       [1],
       ...,
       [1],
       [1],
       [1]])

In [121]:
dataset_train_pred = inp.predict(dataset_train_pred)
dataset_test_pred = inp.predict(dataset_test_pred) 

In [107]:
dataset_test_pred.labels.ravel()

array([1, 1, 1, ..., 1, 1, 1])

In [15]:
#dataset_train.metadata  #.labels.ravel()
CM.binary_confusion_matrix()

{'TP': 3330.0, 'FP': 0.0, 'TN': 10237.0, 'FN': 0.0}

In [108]:
from sklearn.metrics import accuracy_score
accuracy_score(dataset_test.labels, dataset_test_pred.labels)

0.7566666666666667

In [79]:
dataset_test_pred.labels = dataset_test_pred.labels.astype('float64') #.ravel()

In [86]:
dataset_test_pred.labels

array([1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0.

In [87]:
dataset_test.labels

array([1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1.,
       0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0.,
       0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0.,
       1., 1., 0., 1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0.,
       0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0.,
       1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1.,
       0., 0., 0., 0., 0.

In [88]:
#type(dataset_test_pred.labels[0][0])
type(dataset_test)

aif360.datasets.german_dataset.GermanDataset

In [54]:
dataset = GermanDataset()
dataset.labels = dataset.labels-1
inp = inprocessing.GridSearchReduction(LogisticRegression(), prot_attr=pro_attr, constraints="DemographicParity", drop_prot_attr=False)
inp.fit(dataset)

<aif360.algorithms.inprocessing.grid_search_reduction.GridSearchReduction at 0x24a2689c850>