### Código que pega os conjuntos de dados originais e gera a versão pré-processada de cada um deles

In [1]:
''' 
Bibliotecas necessários
'''
import sys
import numpy as np
import pandas as pd
from preprocess_datasets import (Adult, Arrhythmia, Bank, Compas, Contraceptive, DrugAmphet,
                                 DrugEcstasy, German, Heart, Student, Titanic)

# Adult (Census Income)

In [2]:
def convert_index(l, privileged_group):
    if l == privileged_group:
        return 'Male/White'
    else:
        return 'Others'

convert_index = np.vectorize(convert_index) 
''' Carrega dataset Adult'''
adult = Adult()
''' Filtra somente exemplos com atributo race = Black ou Race '''
filter_ = {'race' : ['White', 'Black']}
adult.data_filtering(filter_)
''' Pré-processamento (preprocess_datasets.py) '''
adult.basic_preprocessing()
''' Cria Multindex (AIF360)'''
frame = pd.DataFrame(convert_index(list(map('/'.join, list(adult.dataset.index))), 'Male/White'), columns=['race'])
multindex = pd.MultiIndex.from_frame(frame)
adult.dataset = pd.DataFrame(adult.dataset.to_numpy(), index=multindex, columns=adult.dataset.columns)
adult.dataset.index.set_names('Group', inplace=True)
adult.dataset.rename(columns = {'class' : 'target'}, 
                     index = {'Male/White' : 'Privileged', 'Others' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
adult.dataset.to_csv('adult.csv', sep=';')

# Arrhythmia

In [3]:
''' Carrega dataset Arrhythmia '''
arrhythmia = Arrhythmia()
''' Pré-processamento '''
arrhythmia.basic_preprocessing()
''' Cria Multindex (AIF360)'''
arrhythmia.dataset.index.set_names('Group', inplace=True)
arrhythmia.dataset.rename(columns = {'class' : 'target'}, index = {'Male' : 'Privileged', 'Female' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
arrhythmia.dataset.to_csv('arrhythmia.csv', sep=';')

# Bank

In [4]:
''' Carrega dataset Bank '''
bank = Bank()
''' Pré-processamento '''
bank.basic_preprocessing()
''' Creia Multindex (AIF360)'''
bank.dataset.index.set_names('Group', inplace=True)
bank.dataset.rename(columns = {'y' : 'target'}, 
                    index = {'>25' : 'Privileged', '<=25' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
bank.dataset.to_csv('bank.csv', sep=';')

# Compas - Men (Recidivism Male)

In [5]:
def convert_index(l, privileged_group):
    if l == privileged_group:
        return 'Caucasian'
    else:
        return 'Others'

convert_index = np.vectorize(convert_index) 
''' Carrega dataset Compas'''
compas = Compas()
''' Filtra somente exemplos masculinos e que a raça seja Caucasian ou African-American '''
filter_ = {'sex' : ['Male'], 'race' : ['Caucasian', 'African-American']}
compas.data_filtering(filter_)
''' Pré-processamento '''
compas.basic_preprocessing()
''' Cria Multindex (AIF360)'''
frame = pd.DataFrame(convert_index(list(map('/'.join, list(compas.dataset.index))), 'Male/Caucasian'), columns=['race'])
multindex = pd.MultiIndex.from_frame(frame)
compas.dataset = pd.DataFrame(compas.dataset.to_numpy(), index=multindex, columns=compas.dataset.columns)
compas.dataset.index.set_names('Group', inplace=True)
compas.dataset.rename(columns = {'two_year_recid' : 'target'}, 
                             index = {'Caucasian' : 'Privileged', 'Others' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
compas.dataset.to_csv('compasMen.csv', sep=';')

# Compas - Women (Recidivism Female)

In [6]:
def convert_index(l, privileged_group):
    if l == privileged_group:
        return 'Caucasian'
    else:
        return 'Others'

convert_index = np.vectorize(convert_index) 
''' Carrega dataset Compas '''
compas2 = Compas()
''' Filtra somente exemplos femininos e que a raça seja Caucasian ou African-American '''
filter_ = {'sex' : ['Female'], 'race' : ['Caucasian', 'African-American']}
compas2.data_filtering(filter_)
''' Pré-processamento '''
compas2.basic_preprocessing()
''' Cria Multindex (AIF360)'''
frame = pd.DataFrame(convert_index(list(map('/'.join, list(compas2.dataset.index))), 'Female/Caucasian'), columns=['race'])
multindex = pd.MultiIndex.from_frame(frame)
compas2.dataset = pd.DataFrame(compas2.dataset.to_numpy(), index=multindex, columns=compas2.dataset.columns)
compas2.dataset.index.set_names('Group', inplace=True)
compas2.dataset.rename(columns = {'two_year_recid' : 'target'}, 
                            index = {'Caucasian' : 'Privileged', 'Others' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
compas2.dataset.to_csv('compasWomen.csv', sep=';')

# Contraceptive

In [7]:
''' Carrega dataset Contraceptive '''
contraceptive = Contraceptive()
''' Pré-processamento '''
contraceptive.basic_preprocessing()
''' Cria Multindex (AIF360)'''
contraceptive.dataset.index.set_names('Group', inplace=True)
contraceptive.dataset.rename(columns = {'use_contraceptive' : 'target'}, 
                             index = {'Non-Islam' : 'Privileged', 'Islam' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
contraceptive.dataset.to_csv('contraceptive.csv', sep=';')

# Drug - Amphet

In [8]:
''' Carrega dataset Drug - Amphet '''
amphet = DrugAmphet()
''' Pré-processamento '''
amphet.basic_preprocessing()
''' Cria Multindex (AIF360)'''
amphet.dataset.index.set_names('Group', inplace=True)
amphet.dataset.rename(columns = {'amphet' : 'target'}, 
                      index = {'White' : 'Privileged', 'Non-White' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
amphet.dataset.to_csv('amphet.csv', sep=';')

# Drug - Ecstasy

In [9]:
''' Carrega dataset Drug - Ecstasy '''
ecstasy = DrugEcstasy()
''' Pré-processamento '''
ecstasy.basic_preprocessing()
''' Cria Multindex (AIF360)'''
ecstasy.dataset.index.set_names('Group', inplace=True)
ecstasy.dataset.rename(columns = {'ecstasy' : 'target'}, 
                      index = {'White' : 'Privileged', 'Non-White' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
ecstasy.dataset.to_csv('ecstasy.csv', sep=';')

# German Credit

In [10]:
''' Carrega dataset German Credit '''
german = German()
''' Pré-processamento '''
german.basic_preprocessing()
''' Cria Multindex (AIF360)'''
german.dataset.index.set_names('Group', inplace=True)
german.dataset.rename(columns = {'class' : 'target'}, 
                      index = {'Male' : 'Privileged', 'Female' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
german.dataset.to_csv('german.csv', sep=';')

# Heart

In [11]:
''' Carrega dataset Heart '''
heart = Heart()
''' Pré-processamento '''
heart.basic_preprocessing()
''' Cria Multindex (AIF360)'''
heart.dataset.index.set_names('Group', inplace=True)
heart.dataset.rename(columns = {'class' : 'target'}, 
                     index = {'Non-Senior' : 'Privileged', 'Senior' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
heart.dataset.to_csv('heart.csv', sep=';')

# Student

In [12]:
''' Carrega dataset Student '''
student = Student()
''' Pré-processamento '''
student.basic_preprocessing()
''' Cria Multindex (AIF360)'''
student.dataset.index.set_names('Group', inplace=True)
student.dataset.rename(columns = {'Class' : 'target'}, 
                       index = {'M' : 'Privileged', 'F' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
student.dataset.to_csv('student.csv', sep=';')

# Titanic

In [13]:
''' Carrega dataset Titanic '''
titanic = Titanic()
''' Pré-processamento '''
titanic.basic_preprocessing()
''' Cria Multindex (AIF360)'''
titanic.dataset.index.set_names('Group', inplace=True)
titanic.dataset.rename(columns = {'Survived' : 'target'}, 
                       index = {'female' : 'Privileged', 'male' : 'Unprivileged'}, inplace=True)
''' Salva o conjuntos de dados pré-processado '''
titanic.dataset.to_csv('titanic.csv', sep=';')