In [None]:
!pip install 'aif360[all]'  

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting aif360[all]
  Downloading aif360-0.5.0-py3-none-any.whl (214 kB)
[K     |████████████████████████████████| 214 kB 5.4 MB/s 
Collecting tempeh
  Downloading tempeh-0.1.12-py3-none-any.whl (39 kB)
Collecting jupyter
  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting adversarial-robustness-toolbox>=1.0.0
  Downloading adversarial_robustness_toolbox-1.13.0-py3-none-any.whl (1.4 MB)
[K     |████████████████████████████████| 1.4 MB 65.2 MB/s 
Collecting fairlearn~=0.7
  Downloading fairlearn-0.8.0-py3-none-any.whl (235 kB)
[K     |████████████████████████████████| 235 kB 76.1 MB/s 
[?25hCollecting igraph[plotting]
  Downloading igraph-0.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 51.4 MB/s 
Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[K     |█████████████████████████████

In [None]:
cp adult* /usr/local/lib/python3.8/dist-packages/aif360/data/raw/adult/.

In [None]:
%matplotlib inline
# Load all necessary packages
import sys
sys.path.append("../")
from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult, load_preproc_data_compas, load_preproc_data_german

from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display
import matplotlib.pyplot as plt

import tensorflow.compat.v1 as tf
tf.disable_eager_execution()

In [None]:
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
import pandas as pd
import numpy as np

def load_preproc_data_adult(protected_attributes=None, sub_samp=False, balance=False):
    def custom_preprocessing(df):
        """The custom pre-processing function is adapted from
            https://github.com/fair-preprocessing/nips2017/blob/master/Adult/code/Generate_Adult_Data.ipynb
            If sub_samp != False, then return smaller version of dataset truncated to tiny_test data points.
        """

        # Group age by decade
        df['Age (decade)'] = df['age'].apply(lambda x: x//10*10)
        # df['Age (decade)'] = df['age'].apply(lambda x: np.floor(x/10.0)*10.0)

        def group_edu(x):
            if x <= 5:
                return '<6'
            elif x >= 13:
                return '>12'
            else:
                return x

        def age_cut(x):
            if x >= 70:
                return '>=70'
            else:
                return x

        # Cluster education and age attributes.
        # Limit education range
        df['Education Years'] = df['education-num'].apply(lambda x: group_edu(x))
        df['Education Years'] = df['Education Years'].astype('category')

        # Limit age range
        df['Age (decade)'] = df['Age (decade)'].apply(lambda x: age_cut(x))

        # Rename income variable
        df['Income Binary'] = df['income-per-year']
        df['Income Binary'] = df['Income Binary'].replace(to_replace='>50K.', value='>50K', regex=True)
        df['Income Binary'] = df['Income Binary'].replace(to_replace='<=50K.', value='<=50K', regex=True)

        # Recode sex and race
        df['sex'] = df['sex'].replace({'Female': 0.0, 'Male': 1.0})

        if sub_samp and not balance:
            df = df.sample(sub_samp)
        if sub_samp and balance:
            df_0 = df[df['Income Binary'] == '<=50K']
            df_1 = df[df['Income Binary'] == '>50K']
            df_0 = df_0.sample(int(sub_samp/2))
            df_1 = df_1.sample(int(sub_samp/2))
            df = pd.concat([df_0, df_1])
        return df

    XD_features = ['Age (decade)', 'Education Years', 'hours-per-week', 'sex']
    D_features = ['sex'] if protected_attributes is None else protected_attributes
    Y_features = ['Income Binary']
    X_features = list(set(XD_features)-set(D_features))
    categorical_features = ['Age (decade)', 'Education Years', 'marital-status', 'workclass']

    # privileged classes
    all_privileged_classes = {"sex": [1.0]}

    # protected attribute maps
    all_protected_attribute_maps = {"sex": {1.0: 'Male', 0.0: 'Female'}}

    return AdultDataset(
        label_name=Y_features[0],
        favorable_classes=['>50K', '>50K.'],
        protected_attribute_names=D_features,
        privileged_classes=[all_privileged_classes[x] for x in D_features],
        instance_weights_name=None,
        categorical_features=categorical_features,
        features_to_keep=X_features+Y_features+D_features,
        na_values=['?'],
        metadata={'label_maps': [{1.0: '>50K', 0.0: '<=50K'}],
                  'protected_attribute_maps': [all_protected_attribute_maps[x]
                                for x in D_features]},
        custom_preprocessing=custom_preprocessing)

In [None]:
import os

import pandas as pd

from aif360.datasets import StandardDataset


default_mappings = {
    'label_maps': [{1.0: '>50K', 0.0: '<=50K'}],
    'protected_attribute_maps': [{1.0: 'Male', 0.0: 'Female'}]
}

class AdultDataset(StandardDataset):
    """Adult Census Income Dataset.
    See :file:`aif360/data/raw/adult/README.md`.
    """

    def __init__(self, label_name='income-per-year',
                 favorable_classes=['>50K', '>50K.'],
                 protected_attribute_names=['sex'],
                 privileged_classes=[ ['Male']],
                 instance_weights_name=None,
                 categorical_features=['workclass',
                     'marital-status'],
                 features_to_keep=[], features_to_drop=[],
                 na_values=['?'], custom_preprocessing=None,
                 metadata=default_mappings):
        """See :obj:`StandardDataset` for a description of the arguments.
        Examples:
            The following will instantiate a dataset which uses the `fnlwgt`
            feature:
            >>> from aif360.datasets import AdultDataset
            >>> ad = AdultDataset(instance_weights_name='fnlwgt',
            ... features_to_drop=[])
            WARNING:root:Missing Data: 3620 rows removed from dataset.
            >>> not np.all(ad.instance_weights == 1.)
            True
            To instantiate a dataset which utilizes only numerical features and
            a single protected attribute, run:
            >>> single_protected = ['sex']
            >>> single_privileged = [['Male']]
            >>> ad = AdultDataset(protected_attribute_names=single_protected,
            ... privileged_classes=single_privileged,
            ... categorical_features=[],
            ... features_to_keep=['age', 'education-num'])
            >>> print(ad.feature_names)
            ['education-num', 'age', 'sex']
            >>> print(ad.label_names)
            ['income-per-year']
            Note: the `protected_attribute_names` and `label_name` are kept even
            if they are not explicitly given in `features_to_keep`.
            In some cases, it may be useful to keep track of a mapping from
            `float -> str` for protected attributes and/or labels. If our use
            case differs from the default, we can modify the mapping stored in
            `metadata`:
            >>> label_map = {1.0: '>50K', 0.0: '<=50K'}
            >>> protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
            >>> ad = AdultDataset(protected_attribute_names=['sex'],
            ... categorical_features=['workclass', 'education', 'marital-status',
            ... 'occupation', 'relationship', 'native-country', 'race'],
            ... privileged_classes=[['Male']], metadata={'label_map': label_map,
            ... 'protected_attribute_maps': protected_attribute_maps})
            Note that we are now adding `race` as a `categorical_features`.
            Now this information will stay attached to the dataset and can be
            used for more descriptive visualizations.
        """

        train_path = '/usr/local/lib/python3.8/dist-packages/aif360/data/raw/adult/adult.data'
        test_path = '/usr/local/lib/python3.8/dist-packages/aif360/data/raw/adult/adult.test'
        # as given by adult.names
        column_names = ['age', 'workclass',
            'education-num', 'marital-status',
             'sex', 'hours-per-week',
             'income-per-year']
        try:
            train = pd.read_csv(train_path, header=None, names=column_names,
                skipinitialspace=True, na_values=na_values)
            print(column_names)
            print(test_path)
            test = pd.read_csv(test_path, header=0, names=column_names,
                skipinitialspace=True, na_values=na_values)
        except IOError as err:
            print("IOError: {}".format(err))
            print("To use this class, please download the following files:")
            print("\n\thttps://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data")
            print("\thttps://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test")
            print("\thttps://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names")
            print("\nand place them, as-is, in the folder:")
            print("\n\t{}\n".format(os.path.abspath(os.path.join(
               os.path.abspath(__file__), '..', '..', 'data', 'raw', 'adult'))))
            import sys
            sys.exit(1)

        df = pd.concat([test, train], ignore_index=True)

        super(AdultDataset, self).__init__(df=df, label_name=label_name,
            favorable_classes=favorable_classes,
            protected_attribute_names=protected_attribute_names,
            privileged_classes=privileged_classes,
            instance_weights_name=instance_weights_name,
            categorical_features=categorical_features,
            features_to_keep=features_to_keep,
            features_to_drop=features_to_drop, na_values=na_values,
            custom_preprocessing=custom_preprocessing, metadata=metadata)

In [None]:
# Get the dataset and split into train and test
dataset_orig = load_preproc_data_adult()

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

['age', 'workclass', 'education-num', 'marital-status', 'sex', 'hours-per-week', 'income-per-year']
/usr/local/lib/python3.8/dist-packages/aif360/data/raw/adult/adult.test


In [None]:
dataset_orig_train, _ = dataset_orig_train.split([0.1], shuffle=True)


dataset_orig_test, _ = dataset_orig_test.split([0.1], shuffle=True)

In [None]:
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes, 
      dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)

#### Training Dataset shape

(11587, 31)


#### Favorable and unfavorable labels

1.0 0.0


#### Protected attribute names

['sex']


#### Privileged and unprivileged protected attribute values

[array([1.])] [array([0.])]


#### Dataset feature names

['sex', 'hours-per-week', 'workclass=1', 'workclass=2', 'workclass=3', 'workclass=4', 'workclass=5', 'workclass=6', 'workclass=7', 'workclass=8', 'marital-status=1', 'marital-status=2', 'marital-status=3', 'marital-status=4', 'marital-status=5', 'Age (decade)=10.0', 'Age (decade)=20.0', 'Age (decade)=30.0', 'Age (decade)=40.0', 'Age (decade)=50.0', 'Age (decade)=60.0', 'Age (decade)=>=70', 'Education Years=6.0', 'Education Years=7.0', 'Education Years=8.0', 'Education Years=9.0', 'Education Years=10.0', 'Education Years=11.0', 'Education Years=12.0', 'Education Years=<6', 'Education Years=>12']


In [None]:
min_max_scaler = MaxAbsScaler()
dataset_orig_train.features = min_max_scaler.fit_transform(dataset_orig_train.features)
dataset_orig_test.features = min_max_scaler.transform(dataset_orig_test.features)
metric_scaled_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
display(Markdown("#### Scaled dataset - Verify that the scaling does not affect the group label statistics"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_train.mean_difference())
metric_scaled_test = BinaryLabelDatasetMetric(dataset_orig_test, 
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_test.mean_difference())

#### Scaled dataset - Verify that the scaling does not affect the group label statistics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.146590
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.147583


In [None]:
# Load post-processing algorithm that equalizes the odds
# Learn parameters with debias set to False
sess = tf.Session()
plain_model = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='plain_classifier',
                          debias=False,
                          sess=sess)

In [None]:
plain_model.fit(dataset_orig_train)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


epoch 0; iter: 0; batch classifier loss: 0.692704
epoch 1; iter: 0; batch classifier loss: 0.615162
epoch 2; iter: 0; batch classifier loss: 0.525696
epoch 3; iter: 0; batch classifier loss: 0.544165
epoch 4; iter: 0; batch classifier loss: 0.511569
epoch 5; iter: 0; batch classifier loss: 0.541053
epoch 6; iter: 0; batch classifier loss: 0.521616
epoch 7; iter: 0; batch classifier loss: 0.484768
epoch 8; iter: 0; batch classifier loss: 0.546925
epoch 9; iter: 0; batch classifier loss: 0.541958
epoch 10; iter: 0; batch classifier loss: 0.524301
epoch 11; iter: 0; batch classifier loss: 0.550457
epoch 12; iter: 0; batch classifier loss: 0.493889
epoch 13; iter: 0; batch classifier loss: 0.504341
epoch 14; iter: 0; batch classifier loss: 0.554702
epoch 15; iter: 0; batch classifier loss: 0.477274
epoch 16; iter: 0; batch classifier loss: 0.509806
epoch 17; iter: 0; batch classifier loss: 0.493482
epoch 18; iter: 0; batch classifier loss: 0.527600
epoch 19; iter: 0; batch classifier loss:

<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x7fcc80bf0eb0>

In [None]:
# Apply the plain model to test data
dataset_nodebiasing_train = plain_model.predict(dataset_orig_train)
dataset_nodebiasing_test = plain_model.predict(dataset_orig_test)

In [None]:
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
metric_dataset_nodebiasing_train = BinaryLabelDatasetMetric(dataset_nodebiasing_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_train.mean_difference())

metric_dataset_nodebiasing_test = BinaryLabelDatasetMetric(dataset_nodebiasing_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_test.mean_difference())

display(Markdown("#### Plain model - without debiasing - classification metrics"))
classified_metric_nodebiasing_test = ClassificationMetric(dataset_orig_test, 
                                                 dataset_nodebiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_nodebiasing_test.accuracy())
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_nodebiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_nodebiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_nodebiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_nodebiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_nodebiasing_test.theil_index())

#### Plain model - without debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.314102
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.295687


#### Plain model - without debiasing - classification metrics

Test set: Classification accuracy = 0.726943
Test set: Balanced classification accuracy = 0.702780
Test set: Disparate impact = 0.401240
Test set: Equal opportunity difference = -0.358905
Test set: Average odds difference = -0.266772
Test set: Theil_index = 0.200454


In [None]:
sess.close()
tf.reset_default_graph()
sess = tf.Session()

In [None]:
# Learn parameters with debias set to True
debiased_model = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='debiased_classifier',
                          debias=True,
                          sess=sess)

In [None]:
debiased_model.fit(dataset_orig_train)

epoch 0; iter: 0; batch classifier loss: 0.703276; batch adversarial loss: 0.706240
epoch 1; iter: 0; batch classifier loss: 0.642951; batch adversarial loss: 0.690546
epoch 2; iter: 0; batch classifier loss: 0.576663; batch adversarial loss: 0.686421
epoch 3; iter: 0; batch classifier loss: 0.573468; batch adversarial loss: 0.683117
epoch 4; iter: 0; batch classifier loss: 0.573726; batch adversarial loss: 0.681579
epoch 5; iter: 0; batch classifier loss: 0.537503; batch adversarial loss: 0.655962
epoch 6; iter: 0; batch classifier loss: 0.588944; batch adversarial loss: 0.675696
epoch 7; iter: 0; batch classifier loss: 0.476611; batch adversarial loss: 0.686252
epoch 8; iter: 0; batch classifier loss: 0.586070; batch adversarial loss: 0.672720
epoch 9; iter: 0; batch classifier loss: 0.556531; batch adversarial loss: 0.681553
epoch 10; iter: 0; batch classifier loss: 0.504035; batch adversarial loss: 0.689391
epoch 11; iter: 0; batch classifier loss: 0.489872; batch adversarial loss:

<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x7fcc7ef8fb80>

In [None]:
# Apply the plain model to test data
dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(dataset_orig_test)

In [None]:
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_train.mean_difference())
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_test.mean_difference())

# Metrics for the dataset from model with debiasing
display(Markdown("#### Model - with debiasing - dataset metrics"))
metric_dataset_debiasing_train = BinaryLabelDatasetMetric(dataset_debiasing_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_train.mean_difference())

metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_test.mean_difference())



display(Markdown("#### Plain model - without debiasing - classification metrics"))
print("Test set: Classification accuracy = %f" % classified_metric_nodebiasing_test.accuracy())
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_nodebiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_nodebiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_nodebiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_nodebiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_nodebiasing_test.theil_index())



display(Markdown("#### Model - with debiasing - classification metrics"))
classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test, 
                                                 dataset_debiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_debiasing_test.accuracy())
TPR = classified_metric_debiasing_test.true_positive_rate()
TNR = classified_metric_debiasing_test.true_negative_rate()
bal_acc_debiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_debiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_debiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_debiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_debiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_debiasing_test.theil_index())

#### Plain model - without debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.314102
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.295687


#### Model - with debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.178944
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.164469


#### Plain model - without debiasing - classification metrics

Test set: Classification accuracy = 0.726943
Test set: Balanced classification accuracy = 0.702780
Test set: Disparate impact = 0.401240
Test set: Equal opportunity difference = -0.358905
Test set: Average odds difference = -0.266772
Test set: Theil_index = 0.200454


#### Model - with debiasing - classification metrics

Test set: Classification accuracy = 0.712646
Test set: Balanced classification accuracy = 0.666136
Test set: Disparate impact = 0.531173
Test set: Equal opportunity difference = -0.203487
Test set: Average odds difference = -0.136575
Test set: Theil_index = 0.251107


In [None]:
export_data = dataset_orig_test.convert_to_dataframe()

In [None]:
df = export_data[0].copy()

In [None]:
def onehot_to_label_encode(feature, df):
  cols = [x  for x in df.columns if feature in x]
  df.loc[:, feature] = df.loc[:, cols].values.argmax(axis=1)
  df = df.drop(cols, axis=1)

  return df

In [None]:
df = onehot_to_label_encode("workclass", df)

In [None]:
df = onehot_to_label_encode("marital-status", df)
df = onehot_to_label_encode("Education", df)

In [None]:
df = onehot_to_label_encode("Age", df)

In [None]:
df = df.drop(["race"], axis=1)

In [None]:
df.to_csv("adult.csv",index=False)

In [None]:
dir(dataset_debiasing_test)

In [None]:
(dataset_orig_test.labels == dataset_debiasing_test.labels).sum()/len(dataset_debiasing_test.labels)

In [None]:
df.columns

In [None]:
(df.loc[:, "Income Binary"].values  == dataset_debiasing_test.labels.reshape((-1))).sum()/len(dataset_debiasing_test.labels)

1.0

In [None]:
df.loc[:, "Income Binary"] = dataset_debiasing_test.labels.reshape((-1)).astype(int)

In [None]:
df.to_csv("acs_adv_debias.csv",index=False)

In [None]:
df

Unnamed: 0,sex,hours-per-week,Income Binary,workclass,marital-status,Education,Age
1464217,0.0,0.272727,0,0,0,8,3
412428,1.0,0.404040,1,4,0,8,4
1412891,0.0,0.202020,0,0,0,8,3
327833,1.0,0.404040,0,0,0,8,6
501086,0.0,0.404040,0,0,3,8,1
...,...,...,...,...,...,...,...
252636,1.0,0.404040,0,0,0,8,3
1588743,0.0,0.202020,0,3,4,8,1
1537878,0.0,0.404040,1,3,0,8,4
1350800,1.0,0.404040,0,0,4,8,2
