In [1]:
import sys
import numpy as np
import pandas as pd
from preprocess_datasets import (Adult, Arrhythmia, Bank, Compas, Contraceptive, DrugCrack, DrugHeroin,
                                 German, Heart, Student, Titanic)

# Adult (Census Income)

In [2]:
def convert_index(l, privileged_group):
    if l == privileged_group:
        return 'Male/White'
    else:
        return 'Others'

convert_index = np.vectorize(convert_index) 
''' Load Adult'''
adult = Adult()
''' Filter only examples with White and Black race '''
filter_ = {'race' : ['White', 'Black']}
adult.data_filtering(filter_)
''' Preprocessing (preprocess_datasets.py) '''
adult.basic_preprocessing()
''' Create Multindex (AIF360)'''
frame = pd.DataFrame(convert_index(list(map('/'.join, list(adult.dataset.index))), 'Male/White'), columns=['race'])
multindex = pd.MultiIndex.from_frame(frame)
adult.dataset = pd.DataFrame(adult.dataset.to_numpy(), index=multindex, columns=adult.dataset.columns)
adult.dataset.index.set_names('Group', inplace=True)
adult.dataset.rename(columns = {'class' : 'target'}, 
                     index = {'Male/White' : 'Privileged', 'Others' : 'Unprivileged'}, inplace=True)
''' Save '''
adult.dataset.to_csv('adult.csv', sep=';')

# Arrhythmia

In [3]:
''' Load Arrhythmia '''
arrhythmia = Arrhythmia()
''' Preprocessing '''
arrhythmia.basic_preprocessing()
''' Create Multindex (AIF360)'''
arrhythmia.dataset.index.set_names('Group', inplace=True)
arrhythmia.dataset.rename(columns = {'class' : 'target'}, index = {'Male' : 'Privileged', 'Female' : 'Unprivileged'}, inplace=True)
''' Save '''
arrhythmia.dataset.to_csv('arrhythmia.csv', sep=';')

# Bank

In [4]:
''' Load Bank '''
bank = Bank()
''' Preprocessing '''
bank.basic_preprocessing()
''' Create Multindex (AIF360)'''
bank.dataset.index.set_names('Group', inplace=True)
bank.dataset.rename(columns = {'y' : 'target'}, 
                    index = {'>25' : 'Privileged', '<=25' : 'Unprivileged'}, inplace=True)
''' Save '''
bank.dataset.to_csv('bank.csv', sep=';')

# Compas - Men (Recidivism Male)

In [5]:
def convert_index(l, privileged_group):
    if l == privileged_group:
        return 'Caucasian'
    else:
        return 'Others'

convert_index = np.vectorize(convert_index) 
''' Load Compas'''
compas = Compas()
''' Filter only examples with White and Black race '''
filter_ = {'sex' : ['Male'], 'race' : ['Caucasian', 'African-American']}
compas.data_filtering(filter_)
''' Preprocessing '''
compas.basic_preprocessing()
''' Create Multindex (AIF360)'''
frame = pd.DataFrame(convert_index(list(map('/'.join, list(compas.dataset.index))), 'Male/Caucasian'), columns=['race'])
multindex = pd.MultiIndex.from_frame(frame)
compas.dataset = pd.DataFrame(compas.dataset.to_numpy(), index=multindex, columns=compas.dataset.columns)
compas.dataset.index.set_names('Group', inplace=True)
compas.dataset.rename(columns = {'two_year_recid' : 'target'}, 
                             index = {'Caucasian' : 'Privileged', 'Others' : 'Unprivileged'}, inplace=True)
''' Save Compas with only male examples '''
compas.dataset.to_csv('compasMen.csv', sep=';')

# Compas - Women (Recidivism Female)

In [6]:
def convert_index(l, privileged_group):
    if l == privileged_group:
        return 'Caucasian'
    else:
        return 'Others'

convert_index = np.vectorize(convert_index) 
''' Load Compas '''
compas2 = Compas()
''' Filter only examples with White and Black race '''
filter_ = {'sex' : ['Female'], 'race' : ['Caucasian', 'African-American']}
compas2.data_filtering(filter_)
''' Preprocessing '''
compas2.basic_preprocessing()
''' Create Multindex (AIF360)'''
frame = pd.DataFrame(convert_index(list(map('/'.join, list(compas2.dataset.index))), 'Female/Caucasian'), columns=['race'])
multindex = pd.MultiIndex.from_frame(frame)
compas2.dataset = pd.DataFrame(compas2.dataset.to_numpy(), index=multindex, columns=compas2.dataset.columns)
compas2.dataset.index.set_names('Group', inplace=True)
compas2.dataset.rename(columns = {'two_year_recid' : 'target'}, 
                            index = {'Caucasian' : 'Privileged', 'Others' : 'Unprivileged'}, inplace=True)
''' Save Compas with only female examples'''
compas2.dataset.to_csv('compasWomen.csv', sep=';')

# Contraceptive

In [7]:
''' Load Contraceptive '''
contraceptive = Contraceptive()
''' Preprocessing '''
contraceptive.basic_preprocessing()
''' Create Multindex (AIF360)'''
contraceptive.dataset.index.set_names('Group', inplace=True)
contraceptive.dataset.rename(columns = {'use_contraceptive' : 'target'}, 
                             index = {'Non-Islam' : 'Privileged', 'Islam' : 'Unprivileged'}, inplace=True)
''' Save '''
contraceptive.dataset.to_csv('contraceptive.csv', sep=';')

# Drug - Crack

In [8]:
''' Load Drug - Crack '''
crack = DrugCrack()
''' Preprocessing '''
crack.basic_preprocessing()
''' Create Multindex (AIF360)'''
crack.dataset.index.set_names('Group', inplace=True)
crack.dataset.rename(columns = {'crack' : 'target'}, 
                      index = {'White' : 'Privileged', 'Non-White' : 'Unprivileged'}, inplace=True)
''' Save '''
crack.dataset.to_csv('crack.csv', sep=';')

# Drug - Heroin

In [9]:
''' Load Drug - Heroin '''
heroin = DrugHeroin()
''' Preprocessing '''
heroin.basic_preprocessing()
''' Create Multindex (AIF360)'''
heroin.dataset.index.set_names('Group', inplace=True)
heroin.dataset.rename(columns = {'heroin' : 'target'}, 
                      index = {'White' : 'Privileged', 'Non-White' : 'Unprivileged'}, inplace=True)
''' Save '''
heroin.dataset.to_csv('heroin.csv', sep=';')

# German Credit

In [10]:
''' Load German Credit '''
german = German()
''' Preprocessing '''
german.basic_preprocessing()
''' Create Multindex (AIF360)'''
german.dataset.index.set_names('Group', inplace=True)
german.dataset.rename(columns = {'class' : 'target'}, 
                      index = {'Male' : 'Privileged', 'Female' : 'Unprivileged'}, inplace=True)
''' Save '''
german.dataset.to_csv('german.csv', sep=';')

# Heart

In [11]:
''' Load Heart '''
heart = Heart()
''' Preprocessing '''
heart.basic_preprocessing()
''' Create Multindex (AIF360)'''
heart.dataset.index.set_names('Group', inplace=True)
heart.dataset.rename(columns = {'class' : 'target'}, 
                     index = {'Non-Senior' : 'Privileged', 'Senior' : 'Unprivileged'}, inplace=True)
''' Save '''
heart.dataset.to_csv('heart.csv', sep=';')

# Student

In [12]:
student = Student()
student.basic_preprocessing()


student.dataset.index.set_names('Group', inplace=True)
student.dataset.rename(columns = {'Class' : 'target'}, 
                       index = {'M' : 'Privileged', 'F' : 'Unprivileged'}, inplace=True)

student.dataset.to_csv('student.csv', sep=';')

# Titanic

In [13]:
''' Load Titanic '''
titanic = Titanic()
''' Preprocessing '''
titanic.basic_preprocessing()
''' Create Multindex (AIF360)'''
titanic.dataset.index.set_names('Group', inplace=True)
titanic.dataset.rename(columns = {'Survived' : 'target'}, 
                       index = {'female' : 'Privileged', 'male' : 'Unprivileged'}, inplace=True)
''' Save '''
titanic.dataset.to_csv('titanic.csv', sep=';')