In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
!pip install aif360
!pip install BlackBoxAuditing
!pip install fairlearn==0.4.6

Collecting fairlearn==0.4.6
  Downloading fairlearn-0.4.6-py3-none-any.whl (21.2 MB)
[K     |████████████████████████████████| 21.2 MB 3.4 MB/s eta 0:00:01
Installing collected packages: fairlearn
  Attempting uninstall: fairlearn
    Found existing installation: fairlearn 0.6.1
    Uninstalling fairlearn-0.6.1:
      Successfully uninstalled fairlearn-0.6.1
Successfully installed fairlearn-0.4.6


# Load and Split Data

In [115]:
from aif360.datasets import AdultDataset, BinaryLabelDataset

In [122]:
adult_numerical = pd.read_csv('fairness/data/preprocessed/adult_numerical.csv')
filtered_idx = adult_numerical['race'] != 'Other'
adult_numerical = adult_numerical[filtered_idx]
adult_numerical['race-sex'] = adult_numerical['race-sex'].astype('category').cat.codes
adult_numerical['race'] = adult_numerical['race-sex'].astype('category').cat.codes

In [None]:
# TO FINISH
adult_binary = BinaryLabelDataset(df=adult_numerical,
                                  label_names=['<=50K', '>50K'], 
                                  protected_attribute_names=['race', 'sex'], 
                                  instance_weights_name=None, 
                                  scores_names=[], 
                                  unprivileged_protected_attributes=[], 
                                  privileged_protected_attributes=[], 
                                  metadata=None)

In [None]:
aif360.datasets.StructuredDataset(df, label_names, protected_attribute_names, instance_weights_name=None, scores_names=[], unprivileged_protected_attributes=[], privileged_protected_attributes=[], metadata=None)

In [4]:
ad = AdultDataset(instance_weights_name='fnlwgt', features_to_drop=[])



In [5]:
ad_train, ad_test = ad.split(2)

# Unconstrained Model

In [6]:
from sklearn.linear_model import LogisticRegression
from copy import deepcopy
from aif360.metrics import ClassificationMetric

In [7]:
ad_df_train, ad_attrs_train = ad_train.convert_to_dataframe(de_dummy_code=False, sep='=', set_category=True)
ad_df_test, ad_attrs_test = ad_test.convert_to_dataframe(de_dummy_code=False, sep='=', set_category=True)

In [8]:
Y_train = np.array(ad_df_train[['income-per-year']]).reshape(((len(ad_df_train),))).astype(int)
Xs_train = np.array(ad_df_train.drop(columns='income-per-year'))
Y_test = np.array(ad_df_test[['income-per-year']]).reshape(((len(ad_df_test),))).astype(int)
Xs_test = np.array(ad_df_test.drop(columns='income-per-year'))

In [9]:
clf = LogisticRegression(max_iter = 300, solver='liblinear').fit(Xs_train, Y_train)

In [10]:
predicted_labels_train = clf.predict(Xs_train).reshape((len(Y_train), 1))
predicted_labels = clf.predict(Xs_test).reshape((len(Y_test), 1))

In [11]:
ad_pred = deepcopy(ad_test)
ad_pred_train = deepcopy(ad_train)
ad_pred.labels = predicted_labels
ad_pred_train.labels = predicted_labels_train

In [12]:
u = [{'race': 0}]
p = [{'race': 1}]
metrics = ClassificationMetric(ad_test,ad_pred,unprivileged_groups=u, privileged_groups=p)
test_acc = metrics.accuracy()
DI = metrics.disparate_impact()
EO = metrics.average_abs_odds_difference()
print('Original Test Accuracy:', test_acc)
print('Original Demographic Parity ratio:', DI)
print('Original Average Absolute Odds diff:', EO)

Original Test Accuracy: 0.8516964899300383
Original Demographic Parity ratio: 0.5652787213393603
Original Average Absolute Odds diff: 0.049887323350057494


In [13]:
from mia.membership_inference_attacks import black_box_benchmarks

In [14]:
# Membership inference attack

# train shadow model
shadow_train, shadow_test = ad_test.split(2)
shadow_train_df, _ = shadow_train.convert_to_dataframe(de_dummy_code=False, sep='=', set_category=True)
shadow_test_df, _ = shadow_test.convert_to_dataframe(de_dummy_code=False, sep='=', set_category=True)
shadow_Y_train = np.array(shadow_train_df[['income-per-year']]).reshape(((len(shadow_train_df),))).astype(int)
shadow_Xs_train = np.array(shadow_train_df.drop(columns='income-per-year'))
shadow_Y_test = np.array(shadow_test_df[['income-per-year']]).reshape(((len(shadow_test_df),))).astype(int)
shadow_Xs_test = np.array(shadow_test_df.drop(columns='income-per-year'))

true_pred_on_shadow_train = clf.predict(shadow_Xs_train)
true_pred_on_shadow_test = clf.predict(shadow_Xs_test)

shadow_model = LogisticRegression(max_iter = 300, solver='liblinear').fit(shadow_Xs_train, true_pred_on_shadow_train)


In [15]:
# get outputs
shadow_out_train = shadow_model.predict_proba(shadow_Xs_train)
shadow_out_test = shadow_model.predict_proba(shadow_Xs_test)
true_model_train = clf.predict_proba(Xs_train)
true_model_test = clf.predict_proba(Xs_test)

In [16]:
shadow_train_performance = (shadow_out_train, true_pred_on_shadow_train)
shadow_test_performance = (shadow_out_test, true_pred_on_shadow_test)
target_train_performance = (true_model_train, Y_train)
target_test_performance = (true_model_test, Y_test)

In [17]:
shadow_train_df.head()

Unnamed: 0,age,education-num,race,sex,capital-gain,capital-loss,hours-per-week,workclass=Federal-gov,workclass=Local-gov,workclass=Private,...,native-country=Puerto-Rico,native-country=Scotland,native-country=South,native-country=Taiwan,native-country=Thailand,native-country=Trinadad&Tobago,native-country=United-States,native-country=Vietnam,native-country=Yugoslavia,income-per-year
24471,23.0,9.0,1.0,0.0,0.0,0.0,33.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
24472,40.0,14.0,1.0,0.0,0.0,1876.0,35.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
24473,45.0,10.0,1.0,0.0,0.0,0.0,35.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
24475,22.0,10.0,1.0,1.0,0.0,0.0,40.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
24476,38.0,13.0,1.0,1.0,0.0,0.0,50.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


In [18]:
shadow_train_df['race-sex'] = shadow_train_df['race'].astype(str) + '-' + shadow_train_df['sex'].astype(str)
shadow_train_df['race-sex'] = shadow_train_df['race-sex'].astype('category').cat.codes
shadow_test_df['race-sex'] = shadow_test_df['race'].astype(str) + '-' + shadow_test_df['sex'].astype(str)
shadow_test_df['race-sex'] = shadow_test_df['race-sex'].astype('category').cat.codes
ad_df_train['race-sex'] = ad_df_train['race'].astype(str) + '-' + ad_df_train['sex'].astype(str)
ad_df_train['race-sex'] = ad_df_train['race-sex'].astype('category').cat.codes
ad_df_test['race-sex'] = ad_df_test['race'].astype(str) + '-' + ad_df_test['sex'].astype(str)
ad_df_test['race-sex'] = ad_df_test['race-sex'].astype('category').cat.codes

In [19]:
# run MIA
MIA = black_box_benchmarks(shadow_train_performance,shadow_test_performance,
                         target_train_performance,target_test_performance,num_classes=2)

In [20]:
MIA._mem_inf_benchmarks(shadow_train_df['race-sex'].astype(int), shadow_test_df['race-sex'].astype(int), ad_df_train['race-sex'].astype(int), ad_df_test['race-sex'].astype(int))

For membership inference attack via correctness, the attack acc is 0.500, with train acc 0.847 and test acc 0.848
For membership inference attack via confidence, group attr 0, the attack acc is 0.533
For membership inference attack via confidence, group attr 1, the attack acc is 0.515
For membership inference attack via confidence, group attr 2, the attack acc is 0.506
For membership inference attack via confidence, group attr 3, the attack acc is 0.505
For membership inference attack via confidence, the shadow attack acc is 0.509
For membership inference attack via confidence, the attack acc is 0.499
For membership inference attack via entropy, group attr 0, the attack acc is 0.533
For membership inference attack via entropy, group attr 1, the attack acc is 0.515
For membership inference attack via entropy, group attr 2, the attack acc is 0.505
For membership inference attack via entropy, group attr 3, the attack acc is 0.505
For membership inference attack via entropy, the shadow att

# Privacy Risk Score Analysis

In [52]:
from mia.privacy_risk_score_utils import calculate_risk_score

In [67]:
tr_values = MIA._entr_comp(shadow_out_train)
te_values = MIA._entr_comp(shadow_out_test)
data_values = MIA._entr_comp(true_model_train)

In [106]:
# get risk scores
risk_scores = calculate_risk_score(tr_values, te_values, 
                                   true_pred_on_shadow_train, 
                                   true_pred_on_shadow_test, 
                                   data_values, Y_train, num_bins=5, log_bins=True)

In [95]:
white_idx = ad_df_train['race'] == 1
nonwhite_idx = ad_df_train['race'] == 0
male_idx = ad_df_train['sex'] == 1
female_idx = ad_df_train['sex'] == 0
income_above_idx = ad_df_train['income-per-year'] == 1
income_below_idx = ad_df_train['income-per-year'] == 0

In [96]:
plt.clf()
plt.hist(risk_scores[white_idx])
plt.savefig('white.png')

In [97]:
plt.clf()
plt.hist(risk_scores[nonwhite_idx])
plt.savefig('nonwhite.png')

In [109]:
plt.clf()
plt.hist(risk_scores[male_idx])
plt.savefig('male.png')

In [110]:
plt.clf()
plt.hist(risk_scores[female_idx])
plt.savefig('female.png')

In [108]:
plt.clf()
plt.hist(risk_scores[income_above_idx])
plt.savefig('income_above.png')

In [107]:
plt.clf()
plt.hist(risk_scores[income_below_idx])
plt.savefig('income_below.png')

# Demographic Parity (Independence)

## Feldman et al. Repair (Preprocessing)

In [21]:
import aif360.algorithms.preprocessing as AIF

In [22]:
repairer = AIF.DisparateImpactRemover(repair_level=1.0, sensitive_attribute='race')
repaired_train = repairer.fit_transform(ad_train)
repaired_test = repairer.fit_transform(ad_test)

In [23]:
repaired_df_train, repaired_attrs_train = repaired_train.convert_to_dataframe(de_dummy_code=False, sep='=', set_category=True)
repaired_df_test, repaired_attrs_test = repaired_test.convert_to_dataframe(de_dummy_code=False, sep='=', set_category=True)

In [24]:
Y_rep_train = np.array(repaired_df_train[['income-per-year']]).reshape(((len(repaired_df_train),)))
Xs_rep_train = np.array(repaired_df_train.drop(columns='income-per-year'))

Y_rep_test = np.array(repaired_df_test[['income-per-year']]).reshape(((len(repaired_df_test),)))
Xs_rep_test = np.array(repaired_df_test.drop(columns='income-per-year'))

In [25]:
clf_rep = LogisticRegression(max_iter = 300, solver='liblinear').fit(Xs_rep_train, Y_rep_train)

In [26]:
predicted_labels_rep = clf_rep.predict(Xs_rep_test).reshape((len(Y_rep_test), 1))

In [27]:
ad_pred_rep = deepcopy(ad_test)
ad_pred_rep.labels = predicted_labels_rep

In [28]:
metrics_rep = ClassificationMetric(ad_test,ad_pred_rep,unprivileged_groups=u, privileged_groups=p)
test_acc_rep = metrics_rep.accuracy()
DI_rep = metrics_rep.disparate_impact()
EO_rep = metrics_rep.average_abs_odds_difference()
print('Repair Test Accuracy:', test_acc_rep)
print('Repair Demographic Parity ratio:', DI_rep)
print('Repair Average Absolute Odds diff:', EO_rep)

Repair Test Accuracy: 0.8529882411264145
Repair Demographic Parity ratio: 0.5042660361320814
Repair Average Absolute Odds diff: 0.070722554642618


## Kamishima et al. Regularization (Regularization)

In [29]:
!pip install tensorflow



In [30]:
from aif360.algorithms.inprocessing import PrejudiceRemover

In [31]:
PrejRemover = PrejudiceRemover(eta=0.1, sensitive_attr='race', class_attr='income-per-year')

In [32]:
PrejRemover = PrejRemover.fit(ad_train)
kamishima_pred = PrejRemover.predict(ad_test)

In [33]:
metrics_kamishima = ClassificationMetric(ad_test,kamishima_pred,unprivileged_groups=u, privileged_groups=p)
test_acc_prej = metrics_kamishima.accuracy()
DI_prej = metrics_kamishima.disparate_impact()
EO_prej = metrics_kamishima.average_abs_odds_difference()
print('Prej Remover Test Accuracy:', test_acc_prej)
print('Prej Remover Demographic Parity ratio:', DI_prej)
print('Prej Remover Average Absolute Odds diff:', EO_prej)

Prej Remover Test Accuracy: 0.84544298717375
Prej Remover Demographic Parity ratio: 0.4978427301386072
Prej Remover Average Absolute Odds diff: 0.07570807950451995


## Agarwal et al (Reduction)

In [34]:
from aif360.algorithms.inprocessing.exponentiated_gradient_reduction import ExponentiatedGradientReduction

In [35]:
estimator = LogisticRegression(solver='liblinear')
np.random.seed(0) #need for reproducibility
exp_grad_red_dp = ExponentiatedGradientReduction(estimator=estimator, 
                                              constraints="DemographicParity",
                                              drop_prot_attr=False)

In [36]:
exp_grad_red_dp.fit(ad_train)
exp_grad_red_pred_dp = exp_grad_red_dp.predict(ad_test)

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


In [37]:
metrics_red_dp = ClassificationMetric(ad_test,exp_grad_red_pred_dp,unprivileged_groups=u, privileged_groups=p)
test_acc_red_dp = metrics_red_dp.accuracy()
DI_red_dp = metrics_red_dp.disparate_impact()
EO_red_dp = metrics_red_dp.average_abs_odds_difference()
print('DP Reduction Test Accuracy:', test_acc_red_dp)
print('DP Reduction Demographic Parity ratio:', DI_red_dp)
print('DP Reduction Average Absolute Odds diff:', EO_red_dp)

DP Reduction Test Accuracy: 0.8337852994174766
DP Reduction Demographic Parity ratio: 0.9537495729438386
DP Reduction Average Absolute Odds diff: 0.0704127959398408


# Equalized Odds (Separation)

## Hardt et al. (Postprocessing)

In [38]:
from aif360.algorithms.postprocessing import EqOddsPostprocessing

In [39]:
postprocessing = EqOddsPostprocessing(u, p)

In [40]:
postprocessing = postprocessing.fit(ad_train, ad_pred_train)
postprocess_pred = postprocessing.predict(ad_pred)

In [41]:
metrics_post = ClassificationMetric(ad_test,postprocess_pred,unprivileged_groups=u, privileged_groups=p)
test_acc_post = metrics_post.accuracy()
DI_post = metrics_post.disparate_impact()
EO_ppost = metrics_post.average_abs_odds_difference()
print('Postprocessing Test Accuracy:', test_acc_post)
print('Postprocessing Demographic Parity ratio:', DI_post)
print('Postprocessing Average Absolute Odds diff:', EO_ppost)

Postprocessing Test Accuracy: 0.8447642402751687
Postprocessing Demographic Parity ratio: 0.7119058923062103
Postprocessing Average Absolute Odds diff: 0.012286884883292742


## Agarwal et al (Reduction)

In [42]:
estimator = LogisticRegression(solver='liblinear')
np.random.seed(0) #need for reproducibility
exp_grad_red = ExponentiatedGradientReduction(estimator=estimator, 
                                              constraints="EqualizedOdds",
                                              drop_prot_attr=False)

In [43]:
exp_grad_red.fit(ad_train)
exp_grad_red_pred = exp_grad_red.predict(ad_test)

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


In [44]:
metrics_red_eo = ClassificationMetric(ad_test,exp_grad_red_pred,unprivileged_groups=u, privileged_groups=p)
test_acc_red_eo = metrics_red_eo.accuracy()
DI_red_eo = metrics_red_eo.disparate_impact()
EO_red_eo = metrics_red_eo.average_abs_odds_difference()
print('EO Reduction Test Accuracy:', test_acc_red_eo)
print('EO Reduction Demographic Parity ratio:', DI_red_eo)
print('EO Reduction Average Absolute Odds diff:', EO_red_eo)

EO Reduction Test Accuracy: 0.8390311892594322
EO Reduction Demographic Parity ratio: 0.6709987046165171
EO Reduction Average Absolute Odds diff: 0.010889519330083027


In [45]:
# Membership inference attack

# train shadow model
true_pred_on_shadow_train = exp_grad_red.predict(shadow_train)
true_pred_on_shadow_test = exp_grad_red.predict(shadow_test)

shadow_model = LogisticRegression(max_iter = 300, solver='liblinear').fit(shadow_Xs_train, true_pred_on_shadow_train.labels)

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


In [46]:
X_df_train = pd.DataFrame(ad_train.features, columns=ad_train.feature_names)
X_df_test = pd.DataFrame(ad_test.features, columns=ad_test.feature_names)

In [47]:
# get outputs
shadow_out_train = shadow_model.predict_proba(shadow_Xs_train)
shadow_out_test = shadow_model.predict_proba(shadow_Xs_test)
true_model_train = exp_grad_red.model.predict_proba(X_df_train)
true_model_test = exp_grad_red.model.predict_proba(X_df_test)

In [48]:
true_pred_on_shadow_train = true_pred_on_shadow_train.labels.astype(int).reshape(len(true_pred_on_shadow_train.labels),)
true_pred_on_shadow_test = true_pred_on_shadow_test.labels.astype(int).reshape(len(true_pred_on_shadow_test.labels),)

In [49]:
shadow_train_performance = (shadow_out_train, true_pred_on_shadow_train)
shadow_test_performance = (shadow_out_test, true_pred_on_shadow_test)
target_train_performance = (true_model_train, Y_train)
target_test_performance = (true_model_test, Y_test)

In [50]:
# run MIA
MIA = black_box_benchmarks(shadow_train_performance,shadow_test_performance,
                         target_train_performance,target_test_performance,num_classes=2)

In [51]:
MIA._mem_inf_benchmarks(shadow_train_df['race-sex'].astype(int), shadow_test_df['race-sex'].astype(int), ad_df_train['race-sex'].astype(int), ad_df_test['race-sex'].astype(int))

For membership inference attack via correctness, the attack acc is 0.499, with train acc 0.846 and test acc 0.847
For membership inference attack via confidence, group attr 0, the attack acc is 0.531
For membership inference attack via confidence, group attr 1, the attack acc is 0.510
For membership inference attack via confidence, group attr 2, the attack acc is 0.508
For membership inference attack via confidence, group attr 3, the attack acc is 0.510
For membership inference attack via confidence, the shadow attack acc is 0.512
For membership inference attack via confidence, the attack acc is 0.501
For membership inference attack via entropy, group attr 0, the attack acc is 0.532
For membership inference attack via entropy, group attr 1, the attack acc is 0.510
For membership inference attack via entropy, group attr 2, the attack acc is 0.507
For membership inference attack via entropy, group attr 3, the attack acc is 0.510
For membership inference attack via entropy, the shadow att