# 1. Install and import libraries needed

In [1]:
!pip install azureml-contrib-fairness --q
!pip install fairlearn --q

In [2]:
from sklearn.model_selection import train_test_split
from fairlearn.widget import FairlearnDashboard
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pandas as pd
import numpy as np
import shap

# 2. Load the dataset

In [3]:
# Load the census dataset
X_raw, Y = shap.datasets.adult()
X_raw["Race"].value_counts().to_dict()

{4: 27816, 2: 3124, 1: 1039, 0: 311, 3: 271}

In [4]:
X_raw.head(2)

Unnamed: 0,Age,Workclass,Education-Num,Marital Status,Occupation,Relationship,Race,Sex,Capital Gain,Capital Loss,Hours per week,Country
0,39.0,7,13.0,4,1,0,4,1,2174.0,0.0,40.0,39
1,50.0,6,13.0,2,4,4,4,1,0.0,0.0,13.0,39


In [5]:
np.unique(Y, return_counts=True)

(array([False,  True]), array([24720,  7841]))

We can see the class is unbalanced and in the majority of the cases, people is denied to get a loan

## 2.1 Some feature transformation

In [6]:
# (Optional) Separate the "sex" and "race" sensitive features out and drop them from the main data prior to training your model
A = X_raw[['Sex','Race']]
X = X_raw.drop(labels=['Sex', 'Race'],axis = 1)
X = pd.get_dummies(X)

sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

# Perform some standard data preprocessing steps to convert the data into a format suitable for the ML algorithms
le = LabelEncoder()
Y = le.fit_transform(Y)

## 2.2 Split the dataset into train and test

In [7]:
# Split data into train and test
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(X_scaled, 
                                                    Y, 
                                                    A,
                                                    test_size = 0.2,
                                                    random_state=0,
                                                    stratify=Y)

# Work around indexing issue
X_train = X_train.reset_index(drop=True)
A_train = A_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
A_test = A_test.reset_index(drop=True)

# Improve labels
A_test.Sex.loc[(A_test['Sex'] == 0)] = 'female'
A_test.Sex.loc[(A_test['Sex'] == 1)] = 'male'


A_test.Race.loc[(A_test['Race'] == 0)] = 'Amer-Indian-Eskimo'
A_test.Race.loc[(A_test['Race'] == 1)] = 'Asian-Pac-Islander'
A_test.Race.loc[(A_test['Race'] == 2)] = 'Black'
A_test.Race.loc[(A_test['Race'] == 3)] = 'Other'
A_test.Race.loc[(A_test['Race'] == 4)] = 'White'


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [8]:
X_train.head(2)

Unnamed: 0,Age,Workclass,Education-Num,Marital Status,Occupation,Relationship,Capital Gain,Capital Loss,Hours per week,Country
0,-1.435581,-2.65732,-0.03136,0.921634,-1.554283,-0.281263,-0.14592,-0.21666,-0.845327,0.291569
1,0.837109,0.09005,-0.03136,-0.406212,0.101036,0.856261,-0.14592,-0.21666,-0.035429,0.291569


# 3. Azure ML Workspace Configuration

In [9]:
from azureml.core import Workspace
ws = Workspace.from_config()

In [10]:
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name="exp-loan-fairness")

# 4. Training Models

In [11]:
from sklearn.metrics import roc_auc_score,accuracy_score
from sklearn.externals import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from xgboost import plot_importance
import lightgbm as lgb



In [12]:
### LogisticRegression
run = experiment.start_logging(snapshot_directory=None)
run.log("model_type", "LogisticRegression")

lr_predictor = LogisticRegression()
lr_predictor.fit(X_train, Y_train)

y_pred_log = lr_predictor.predict_proba(X_test)[:, 1]
print('LogReg AUC: ' + str(roc_auc_score(Y_test, y_pred_log)))
run.log("AUC", roc_auc_score(Y_test, y_pred_log))

model_name = "Model_logregression.pkl"
filename = "outputs/" + model_name
joblib.dump(value=lr_predictor, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()

LogReg AUC: 0.8801876766884713


In [13]:
### Support Vector Machine
run = experiment.start_logging(snapshot_directory=None)
run.log("model_type", "SVC")

svm_predictor = SVC(probability=True)
svm_predictor.fit(X_train, Y_train)

y_pred_svc = svm_predictor.predict_proba(X_test)[:, 1]
print('SVC AUC: ' + str(roc_auc_score(Y_test, y_pred_svc)))
run.log("AUC", roc_auc_score(Y_test, y_pred_svc))

model_name = "Model_svc.pkl"
filename = "outputs/" + model_name
joblib.dump(value=svm_predictor, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()

SVC AUC: 0.8843116113988567


In [14]:
### Decision Tree
run = experiment.start_logging(snapshot_directory=None)
run.log("model_type", "DecisionTree")

dt_predictor = DecisionTreeClassifier()
dt_predictor.fit(X_train, Y_train)

y_pred_dt = dt_predictor.predict_proba(X_test)[:, 1]
print('DecisionTree AUC: ' + str(roc_auc_score(Y_test, y_pred_dt)))
run.log("AUC", roc_auc_score(Y_test, y_pred_dt))

model_name = "Model_decisiontree.pkl"
filename = "outputs/" + model_name
joblib.dump(value=dt_predictor, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()

DecisionTree AUC: 0.7733715642475393


In [15]:
### Random Forest
run = experiment.start_logging(snapshot_directory=None)
run.log("model_type", "RandomForest")

rf_predictor = RandomForestClassifier(max_depth= 90, max_features=3, min_samples_leaf=4, min_samples_split=10, n_estimators=100)
rf_predictor.fit(X_train, Y_train)

y_pred_rf = rf_predictor.predict_proba(X_test)[:, 1]
print('RandomForest AUC: ' + str(roc_auc_score(Y_test, y_pred_rf)))
run.log("AUC", roc_auc_score(Y_test, y_pred_rf))

model_name = "Model_randomforest.pkl"
filename = "outputs/" + model_name
joblib.dump(value=rf_predictor, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()

RandomForest AUC: 0.9159503905202122


In [16]:
### Gradient Boosting
run = experiment.start_logging(snapshot_directory=None)
run.log("model_type", "GradientBoosting")

gb_predictor = GradientBoostingClassifier(learning_rate=0.01, 
                                     max_depth=3, 
                                     max_features=0.1,
                                     min_samples_leaf=16, 
                                     n_estimators=1000)
gb_predictor.fit(X_train, Y_train)

y_pred_gbr = gb_predictor.predict_proba(X_test)[:, 1]
print('GradientBoosting accuracy: ' + str(roc_auc_score(Y_test, y_pred_gbr)))
run.log("Accuracy", roc_auc_score(Y_test, y_pred_gbr))

model_name = "Model_gradientboosting.pkl"
filename = "outputs/" + model_name
joblib.dump(value=gb_predictor, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()

GradientBoosting accuracy: 0.9117854434493717


In [17]:
### XGBoost
run = experiment.start_logging(snapshot_directory=None)
run.log("model_type", "XGBoost")

xgb_predictor = XGBClassifier(colsample_bytree = 0.3,
                        learning_rate = 0.03,
                        max_depth = 5,
                        min_child_weight = 5, 
                        n_estimators = 500,
                        objective ='binary:logistic',
                        metric = 'auc')
xgb_predictor.fit(X_train, Y_train)

y_pred_xgbm = xgb_predictor.predict_proba(X_test)[:, 1]
print('XGBoost accuracy: ' + str(roc_auc_score(Y_test, y_pred_xgbm)))
run.log("Accuracy", roc_auc_score(Y_test, y_pred_xgbm))

model_name = "Model_xgboost.pkl"
filename = "outputs/" + model_name
joblib.dump(value=xgb_predictor, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()

XGBoost accuracy: 0.9256579388580509


In [18]:
### LightGBM
run = experiment.start_logging(snapshot_directory=None)
run.log("model_type", "LightGBM")

params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': { 'AUC' },
    'num_leaves': 32,
    'max_depth': 3,
    'min_data_in_leaf': 16,
    'bagging_freq': 1,
    'feature_fraction': 0.7,
    'verbose': 1,
    'is_unbalance':True,
    'learning_rate': 0.005,
    'bagging_fraction': 0.9,
}

train_set = lgb.Dataset(X_train, Y_train)
validation_sets = lgb.Dataset(X_test, Y_test, reference=train_set)

lgbm_predictor = lgb.train(
    params,
    train_set,
    num_boost_round=10000,
    valid_sets=validation_sets,
    early_stopping_rounds=500,
    verbose_eval=False
    )

y_pred_lgbm = lgbm_predictor.predict(X_test)
print('LightGBM accuracy: ' + str(roc_auc_score(Y_test, y_pred_lgbm)))
run.log("Accuracy", roc_auc_score(Y_test, y_pred_lgbm))

model_name = "Model_lgbm.pkl"
filename = "outputs/" + model_name
joblib.dump(value=lgbm_predictor, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()

LightGBM accuracy: 0.9262470079032624


# 5. Registering the Models

In [19]:
import joblib
import os
from azureml.core import  Model

def register_model(name, model):
    print("Registering ", name)
    model_path = "outputs/{0}.pkl".format(name)
    joblib.dump(value=model, filename=model_path)
    registered_model = Model.register(model_path=model_path,
                                      model_name=name,
                                      workspace=ws)
    print("Registered ", registered_model.id)
    return registered_model.id

In [22]:
model_dict = {}

lr_reg_id = register_model("Model_logregression", lr_predictor)
model_dict[lr_reg_id] = lr_predictor
svm_reg_id = register_model("Model_svc", svm_predictor)
model_dict[svm_reg_id] = svm_predictor
dt_reg_id = register_model("Model_decisiontree", dt_predictor)
model_dict[dt_reg_id] = dt_predictor
rf_reg_id = register_model("Model_randomforest", rf_predictor)
model_dict[rf_reg_id] = rf_predictor
gb_reg_id = register_model("Model_gradientboosting", gb_predictor)
model_dict[gb_reg_id] = gb_predictor
xgb_reg_id = register_model("Model_xgboost", xgb_predictor)
model_dict[xgb_reg_id] = xgb_predictor
lgbm_reg_id = register_model("Model_lgbm", lgbm_predictor)
model_dict[lgbm_reg_id] = lgbm_predictor

Registering  Model_logregression
Registering model Model_logregression
Registered  Model_logregression:2
Registering  Model_svc
Registering model Model_svc
Registered  Model_svc:2
Registering  Model_decisiontree
Registering model Model_decisiontree
Registered  Model_decisiontree:2
Registering  Model_randomforest
Registering model Model_randomforest
Registered  Model_randomforest:2
Registering  Model_gradientboosting
Registering model Model_gradientboosting
Registered  Model_gradientboosting:2
Registering  Model_xgboost
Registering model Model_xgboost
Registered  Model_xgboost:2
Registering  Model_lgbm
Registering model Model_lgbm
Registered  Model_lgbm:2


# 6. Generate Fairlearn Dashboard

In [23]:
ys_pred = {}
for n, p in model_dict.items():
    if(n.split(':')[0]=='Model_lgbm'):
        ys_pred[n] = p.predict(X_test)
    else:
        ys_pred[n] = p.predict_proba(X_test)[:, 1]

In [21]:
# ys_pred = {}
# for n, p in model_dict.items():
#     ys_pred[n] = p.predict(X_test)

In [25]:
from fairlearn.widget import FairlearnDashboard

FairlearnDashboard(sensitive_features=A_test, 
                   sensitive_feature_names=['Sex', 'Race'],
                   y_true=Y_test.tolist(),
                   y_pred=ys_pred)

FairlearnWidget(value={'true_y': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7fcb891345c0>

# 7. Upload Fairness Dashboard to Azure

In [27]:
# ys_pred = {}
# for n, p in model_dict.items():
#     if(n.split(':')[0]=='Model_lgbm'):
#         ys_pred[n] = (p.predict(X_test) >= 0.5)*1.0
#     else:
#         ys_pred[n] = p.predict(X_test)

In [28]:
sf = { 'Race': A_test['Race'], 'Sex': A_test['Sex'] }

from fairlearn.metrics._group_metric_set import _create_group_metric_set

dash_dict = _create_group_metric_set(y_true=Y_test.astype(float),
                                     predictions=ys_pred,
                                     sensitive_features=sf,
                                     prediction_type='binary_classification')

In [30]:
from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id
for i_run in experiment.get_runs():
    dashboard_title = "Fairness Dashboard"
    upload_id = upload_dashboard_dictionary(i_run,
                                            dash_dict,
                                            dashboard_name=dashboard_title)
    print("\nUploaded to id: {0}\n".format(upload_id))

    downloaded_dict = download_dashboard_by_upload_id(i_run, upload_id)


Uploaded to id: 4eb20973-bdb7-4533-aa78-169128f584d9


Uploaded to id: 3c2c93fb-5859-480c-96dd-5878e435a96a


Uploaded to id: 4ed17ce1-215b-424e-bf05-d687f91a2fa0


Uploaded to id: da033d5b-79dc-4d17-a75e-70168da4ce3c


Uploaded to id: 8c5bf082-2d73-4039-98ee-54412449dc92


Uploaded to id: aebcf83a-8cf6-43b3-8e0b-db648ba4996d


Uploaded to id: 095b4d00-5253-4eeb-9dda-1042398185cb



In [31]:
print(dash_dict == downloaded_dict)

True
