In [38]:
!pip install https://github.com/adebayoj/fairml/archive/master.zip
!pip install fairml --user

Collecting https://github.com/adebayoj/fairml/archive/master.zip
  Using cached https://github.com/adebayoj/fairml/archive/master.zip
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


First we import modules for model building and data
processing.

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

Now, we import the two key methods from fairml.
audit_model takes:

- (required) black-box function, which is the model to be audited
- (required) sample_data to be perturbed for querying the function. This has to be a pandas dataframe with no missing data.

- other optional parameters that control the mechanics of the auditing process, for example:
  - number_of_runs : number of iterations to perform
  - interactions : flag to enable checking model dependence on interactions.

audit_model returns an overloaded dictionary where keys are the column names of input pandas dataframe and values are lists containing model  dependence on that particular feature. These lists of size number_of_runs.

In [39]:
from fairml import audit_model
from fairml import plot_dependencies
import fairml
from collections import defaultdict

In [40]:
help(fairml)

Help on package fairml:

NAME
    fairml

PACKAGE CONTENTS
    black_box_functionality
    graphing
    non_linear_expansion
    orthogonal_projection
    perturbation_strategies
    utils

FILE
    c:\users\equipo\anaconda3\lib\site-packages\fairml\__init__.py




# Ryan´s classes


In [4]:
#Original code is fairdetect_functions
import fairdetect_classes 
from fairdetect_classes import FairDetect, Report, Backup, EnhancedLabelEncoder, Splitting
#Everything is imported from the fairdetect_classes import list so next lines are skippable

# Read in the propublica data to be used for our analysis


In [5]:
data = pd.read_csv("synthetic_credit_card_approval.csv")
data2 = data.copy()

# Data Cleaning

In [6]:
from fairdetect_classes import EnhancedLabelEncoder
le = EnhancedLabelEncoder()
le.fit_transform_columns(data)

Unnamed: 0,Num_Children,Group,Income,Own_Car,Own_Housing,Target
0,4,0,36151,0,0,0
1,0,0,36095,1,0,0
2,2,1,62110,1,1,0
3,0,1,73644,1,0,0
4,3,0,99146,0,0,1
...,...,...,...,...,...,...
499995,1,0,43299,1,1,0
499996,8,1,75956,1,1,1
499997,3,0,66476,0,0,0
499998,2,1,74524,0,0,0


In [7]:
data.drop_duplicates()

Unnamed: 0,Num_Children,Group,Income,Own_Car,Own_Housing,Target
0,4,0,36151,0,0,0
1,0,0,36095,1,0,0
2,2,1,62110,1,1,0
3,0,1,73644,1,0,0
4,3,0,99146,0,0,1
...,...,...,...,...,...,...
499995,1,0,43299,1,1,0
499996,8,1,75956,1,1,1
499997,3,0,66476,0,0,0
499998,2,1,74524,0,0,0


# Splitting

In [8]:
from fairdetect_classes import Splitting
sp = Splitting()
X_train, X_test, y_train, y_test = sp.split_data(data,'Target') #define the dependent parameter 
print("Original data file now splitted in 4 files")

Original data file now splitted in 4 files


# Fitting the model

In [9]:
import xgboost
model = xgboost.XGBClassifier().fit(X_train, y_train)
y_test_predict = model.predict(X_test)
y_test_predict
print("Success: XGBoost model fitted")

Success: XGBoost model fitted


In [10]:
fd = FairDetect(model, X_test, y_test)

# this is just for demonstration, any classifier or regressor can be used here. fairml only requires a predict function to diagnose a black-box model we fit a quick and dirty logistic regression sklearn model here.

In [48]:
import matplotlib
matplotlib.use('Agg')
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
from sklearn.linear_model import LogisticRegression

In [45]:
# call audit model with model
total, _ = audit_model(model.predict , X_train)

In [46]:
direct_pertubation_feature_output_dictionary = defaultdict(list)
complete_perturbation_dictionary = defaultdict(list)

In [49]:
# print feature importance
print(total)

# generate feature dependence plot
fig = plot_dependencies(
    total.median(),
    reverse_values=False,
    title="FairML feature dependence"
)
plt.savefig("fairml_ldp.eps", transparent=False, bbox_inches='tight')

Feature: Num_Children,	 Importance: 0.3256
Feature: Group,	 Importance: 0.2880125
Feature: Income,	 Importance: 0.387365
Feature: Own_Car,	 Importance: 0.29948
Feature: Own_Housing,	 Importance: 0.1945575


In [52]:
file_name = "fairml_ldp.png"
plt.savefig(file_name, transparent=False, bbox_inches='tight', dpi=250)

In [56]:
plt.show(file_name)

TypeError: show() takes 1 positional argument but 2 were given

# Not working moving to the next ... EthicML

https://wearepal.ai/EthicML/tutorials/adult_dataset.html

In [None]:
!pip install ethicml

In [None]:
import ethicml as em
import ethicml.data as emda
from ethicml import metrics, models

results = em.evaluate_models(
    datasets=[emda.Adult()],
    inprocess_models=[models.SVM(), models.Kamiran()],
    preprocess_models=[models.Upsampler()],
    metrics=[metrics.Accuracy()],
    per_sens_metrics=[metrics.ProbPos(), metrics.TPR()],
    repeats=5,
)
em.plot_results(results, "Accuracy", "prob_pos_Male_0÷Male_1")

In [None]:
data = em.data.data()

In [None]:
data: em.DataTuple = adult.load()