In [1]:
import sys

In [2]:
sys.path.insert(1, "../") 

In [17]:
import numpy as np
np.random.seed(0)
import pandas as pd

In [4]:
from aif360.datasets import GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing


In [23]:
from IPython.display import Markdown, display

In [9]:
dataset_orig = GermanDataset(
    protected_attribute_names=['age'],
    privileged_classes = [lambda x: x>=25],
    features_to_drop=['personal_status', 'sex'])
    

In [36]:
type(GermanDataset)

abc.ABCMeta

In [10]:
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

In [11]:
type(dataset_orig_train)

aif360.datasets.german_dataset.GermanDataset

In [16]:
privileged_groups= [{'age':1}]
unprivileged_groups=[{'age': 0}]

In [32]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups, 
                                            privileged_groups=privileged_groups)

In [33]:
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.169905


In [26]:
RW = Reweighing(unprivileged_groups=unprivileged_groups, 
               privileged_groups=privileged_groups)

In [27]:
dataset_transf_train = RW.fit_transform(dataset_orig_train)


In [34]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, 
                                             unprivileged_groups=unprivileged_groups, 
                                            privileged_groups=privileged_groups)

In [35]:
display(Markdown('#### Transformed training dataset'))
print('Difference in mean outcomes between privileged and unprivileged = %f' % metric_transf_train.mean_difference())

#### Transformed training dataset

Difference in mean outcomes between privileged and unprivileged = 0.000000


In [38]:
type(dataset_transf_train)

aif360.datasets.german_dataset.GermanDataset

In [50]:
df = dataset_transf_train.convert_to_dataframe(de_dummy_code=False, sep='=', set_category=False)

In [52]:
df

(     month  credit_amount  investment_as_income_percentage  residence_since  \
 993   36.0         3959.0                              4.0              3.0   
 859    9.0         3577.0                              1.0              2.0   
 298   18.0         2515.0                              3.0              4.0   
 553   12.0         1995.0                              4.0              1.0   
 672   60.0        10366.0                              2.0              4.0   
 ..     ...            ...                              ...              ...   
 509   39.0         8588.0                              4.0              2.0   
 340   24.0         5743.0                              2.0              4.0   
 221   12.0         1200.0                              4.0              4.0   
 928   30.0         1867.0                              4.0              4.0   
 146    6.0          860.0                              1.0              4.0   
 
      age  number_of_credits  people_l

In [54]:
dataset_transf_train.protected_attribute_names

['age']

In [55]:
dataset_transf_train.feature_names

['month',
 'credit_amount',
 'investment_as_income_percentage',
 'residence_since',
 'age',
 'number_of_credits',
 'people_liable_for',
 'status=A11',
 'status=A12',
 'status=A13',
 'status=A14',
 'credit_history=A30',
 'credit_history=A31',
 'credit_history=A32',
 'credit_history=A33',
 'credit_history=A34',
 'purpose=A40',
 'purpose=A41',
 'purpose=A410',
 'purpose=A42',
 'purpose=A43',
 'purpose=A44',
 'purpose=A45',
 'purpose=A46',
 'purpose=A48',
 'purpose=A49',
 'savings=A61',
 'savings=A62',
 'savings=A63',
 'savings=A64',
 'savings=A65',
 'employment=A71',
 'employment=A72',
 'employment=A73',
 'employment=A74',
 'employment=A75',
 'other_debtors=A101',
 'other_debtors=A102',
 'other_debtors=A103',
 'property=A121',
 'property=A122',
 'property=A123',
 'property=A124',
 'installment_plans=A141',
 'installment_plans=A142',
 'installment_plans=A143',
 'housing=A151',
 'housing=A152',
 'housing=A153',
 'skill_level=A171',
 'skill_level=A172',
 'skill_level=A173',
 'skill_level=A17

In [56]:
dataset_transf_train.index

AttributeError: 'GermanDataset' object has no attribute 'index'

In [58]:
dataset_transf_train.instance_names

['993',
 '859',
 '298',
 '553',
 '672',
 '971',
 '27',
 '231',
 '306',
 '706',
 '496',
 '558',
 '784',
 '239',
 '578',
 '55',
 '906',
 '175',
 '14',
 '77',
 '31',
 '481',
 '310',
 '311',
 '883',
 '788',
 '45',
 '103',
 '760',
 '1',
 '823',
 '710',
 '614',
 '790',
 '408',
 '736',
 '957',
 '366',
 '918',
 '267',
 '230',
 '996',
 '635',
 '698',
 '251',
 '783',
 '819',
 '141',
 '316',
 '587',
 '331',
 '295',
 '262',
 '432',
 '862',
 '582',
 '272',
 '270',
 '987',
 '319',
 '569',
 '643',
 '142',
 '202',
 '413',
 '196',
 '264',
 '531',
 '252',
 '576',
 '738',
 '299',
 '740',
 '247',
 '926',
 '412',
 '389',
 '796',
 '601',
 '654',
 '261',
 '456',
 '386',
 '982',
 '909',
 '693',
 '236',
 '501',
 '497',
 '874',
 '452',
 '494',
 '923',
 '279',
 '638',
 '485',
 '568',
 '108',
 '367',
 '644',
 '785',
 '873',
 '65',
 '902',
 '317',
 '636',
 '666',
 '789',
 '958',
 '214',
 '97',
 '583',
 '466',
 '523',
 '255',
 '385',
 '545',
 '382',
 '489',
 '513',
 '780',
 '904',
 '122',
 '946',
 '60',
 '844',
 '3

In [63]:
dataset_transf_train.instance_names[0]

'993'

### Creating a Logistic Regression Model

In [66]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score