![SolidQ](https://antoniosql.github.io/images/SolidQ_Verne.png) 
# Comprobando la responsabilidad de nuestros modelos

In [11]:
!pip install catboost

Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/90/86/c3dcb600b4f9e7584ed90ea9d30a717fb5c0111574675f442c3e7bc19535/catboost-0.24.1-cp36-none-manylinux1_x86_64.whl (66.1MB)
[K     |████████████████████████████████| 66.1MB 56kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.24.1


In [1]:
import shap

# Vamos a cargar un dataset muy conocido de información de censo americano, ya limpio que nos proporciona shap. La definición del dataset podéis encontrarla aquí https://archive.ics.uci.edu/ml/datasets/adult 
X_raw, Y = shap.datasets.adult()
print ("X_raw shape:", X_raw.shape)
print("Split by race id:",X_raw["Race"].value_counts().to_dict())
X_raw.head()

X_raw shape: (32561, 12)
Split by race id: {4: 27816, 2: 3124, 1: 1039, 0: 311, 3: 271}


Unnamed: 0,Age,Workclass,Education-Num,Marital Status,Occupation,Relationship,Race,Sex,Capital Gain,Capital Loss,Hours per week,Country
0,39.0,7,13.0,4,1,0,4,1,2174.0,0.0,40.0,39
1,50.0,6,13.0,2,4,4,4,1,0.0,0.0,13.0,39
2,38.0,4,9.0,0,6,0,4,1,0.0,0.0,40.0,39
3,53.0,4,7.0,2,6,4,2,1,0.0,0.0,40.0,39
4,28.0,4,13.0,2,10,5,2,0,0.0,0.0,40.0,5


In [2]:
# Preparamos los datos. Separamos características categóricas
import numpy as np
print(X_raw.dtypes)
categorical_features_indices = np.where(np.logical_or(X_raw.dtypes == np.int8, X_raw.dtypes == np.int32))[0]

print('categorical_features_indices:',categorical_features_indices)

numeric_features_indices = np.where(X_raw.dtypes == np.float32)[0]
numeric_features_indices
print('numeric_features_indices:',numeric_features_indices)

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

column_transformer = ColumnTransformer ([
    ('onehot', OneHotEncoder(handle_unknown='ignore'),
    categorical_features_indices),
    ('scaler', StandardScaler(),
    numeric_features_indices)
])

Age               float32
Workclass            int8
Education-Num     float32
Marital Status       int8
Occupation           int8
Relationship        int32
Race                 int8
Sex                  int8
Capital Gain      float32
Capital Loss      float32
Hours per week    float32
Country              int8
dtype: object
categorical_features_indices: [ 1  3  4  5  6  7 11]
numeric_features_indices: [ 0  2  8  9 10]


In [3]:
#Codificamos
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
print("Before label encoding:",Y) # --> [False False False  ... False False True]
Y=le.fit_transform(Y)
print("After label encoding:",Y) # --> [0 0 0  ... 0 0 1]

Before label encoding: [False False False ... False False  True]
After label encoding: [0 0 0 ... 0 0 1]


In [4]:
#Creamos el dataframe para predecir
A=X_raw[['Sex','Race']]
A.head()

Unnamed: 0,Sex,Race
0,1,4
1,1,4
2,1,4
3,1,2
4,0,2


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(
    X_raw, Y, A,
    test_size=0.2, random_state=0, stratify=Y)

X_train.reset_index(drop=True)
X_test.reset_index(drop=True)
A_train.reset_index(drop=True)
A_test.reset_index(drop=True)

print("X_raw shape: {}, X_train shape: {}, X_test shape: {}".format(
    X_raw.shape, X_train.shape, X_test.shape))
    
# test dataframe: features enrichment
import pandas as pd

pandas_warnings=pd.get_option('mode.chained_assignment')
# to avoid warning 'A value is trying to be set on a copy of a slice from a DataFrame'

pd.set_option('mode.chained_assignment', None)

# improve labels by replacing numbers with labels
A_test.Sex.loc[(A_test['Sex']==0)] = 'female'
A_test.Sex.loc[(A_test['Sex']==1)] = 'male'

A_test.Race.loc[(A_test['Race']==0)] = 'Amer-Indian-Eskimo'
A_test.Race.loc[(A_test['Race']==1)] = 'Asian-Pac-Islander'
A_test.Race.loc[(A_test['Race']==2)] = 'Black'
A_test.Race.loc[(A_test['Race']==3)] = 'Other'
A_test.Race.loc[(A_test['Race']==4)] = 'White'

pd.set_option('mode.chained_assignment', pandas_warnings)

A_test.head()

X_raw shape: (32561, 12), X_train shape: (26048, 12), X_test shape: (6513, 12)


Unnamed: 0,Sex,Race
13077,male,White
25002,male,Asian-Pac-Islander
23777,female,White
71,female,Black
955,male,White


In [6]:
# Train your first classification model with Logistic Regression
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

clf = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('classifier_LR', LogisticRegression(solver='liblinear', fit_intercept=True))])

unmitigated_predictor1 = clf.fit(X_train, Y_train)
print('unmitigated_predictor1.score:', unmitigated_predictor1.score(X_test, Y_test))

##############################

# Train your second classification model with SVM
from sklearn import svm
svm_predictor = svm.SVC()
clf = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('classifier_SVM', svm_predictor)])

unmitigated_predictor2 = clf.fit(X_train, Y_train)
print('unmitigated_predictor2.score:', unmitigated_predictor2.score(X_test, Y_test))

##############################

# Train your third classification model with Catboost Classifier, chosen for 2 reasons:
# 1) no needs for categoryzing features
# 2) works well with default-values parameters

from catboost import CatBoostClassifier # !pip install catboost==0.18.1

cbc = CatBoostClassifier(
    random_seed=42, logging_level="Silent", iterations=150)


clf = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('classifier_CBC', cbc)])

unmitigated_predictor3 = clf.fit(X_train, Y_train)
                                 
print('unmitigated_predictor3.score:', unmitigated_predictor3.score(X_test, Y_test))

unmitigated_predictor1.score: 0.8466144633809305
unmitigated_predictor2.score: 0.8516812528788577
unmitigated_predictor3.score: 0.873637340703209


In [7]:
unmitigated_predictors=[]
unmitigated_predictors.append(unmitigated_predictor1)
unmitigated_predictors.append(unmitigated_predictor2)
unmitigated_predictors.append(unmitigated_predictor3)

In [8]:
import sklearn.metrics as skm
from sklearn.metrics import confusion_matrix

for unmitigated_predictor in unmitigated_predictors:
    Y_pred=unmitigated_predictor.predict(X_test)
    conf_mx = confusion_matrix(Y_test, Y_pred)
    print("CLASSIFIER:",unmitigated_predictor.steps[-1][0])
    print("Confusion matrix:\n",skm.confusion_matrix(Y_test,Y_pred))

    print("Recall: {}\nAccuracy: {}\nPrecision: {}\nF1 Score: {}\n".format(
        skm.recall_score(Y_test, Y_pred,average='binary'),
        skm.accuracy_score(Y_test, Y_pred),
        skm.precision_score(Y_test, Y_pred),
        skm.f1_score(Y_test, Y_pred)))

CLASSIFIER: classifier_LR
Confusion matrix:
 [[4587  358]
 [ 641  927]]
Recall: 0.5911989795918368
Accuracy: 0.8466144633809305
Precision: 0.7214007782101167
F1 Score: 0.6498422712933755

CLASSIFIER: classifier_SVM
Confusion matrix:
 [[4638  307]
 [ 659  909]]
Recall: 0.579719387755102
Accuracy: 0.8516812528788577
Precision: 0.7475328947368421
F1 Score: 0.6530172413793104

CLASSIFIER: classifier_CBC
Confusion matrix:
 [[4672  273]
 [ 550 1018]]
Recall: 0.6492346938775511
Accuracy: 0.873637340703209
Precision: 0.7885360185902401
F1 Score: 0.7121371108779294



In [17]:
#!pip install fairlearn

Collecting fairlearn
[?25l  Downloading https://files.pythonhosted.org/packages/c0/ec/15764c20d27f4ec53e826eff160139c937274b646491c4de936a73444fd8/fairlearn-0.4.6-py3-none-any.whl (21.2MB)
[K     |████████████████████████████████| 21.2MB 56.3MB/s 
Installing collected packages: fairlearn
Successfully installed fairlearn-0.4.6


In [9]:
import fairlearn.metrics as flm

group_metrics_accuracy = flm.group_summary(skm.accuracy_score,
    Y_test, Y_pred,
    sensitive_features=A_test.Race,
    sample_weight=None)

group_metrics_precision = flm.group_summary(skm.precision_score,
    Y_test, Y_pred,
    sensitive_features=A_test.Race,
    sample_weight=None)

group_metrics_recall = flm.group_summary(skm.recall_score,
    Y_test, Y_pred,
    sensitive_features=A_test.Race,
    sample_weight=None)

print ("Split recall   : {}\n\nSplit accuracy : {}\n\nSplit precision: {}".format(
    group_metrics_recall.by_group, group_metrics_accuracy.by_group, group_metrics_precision.by_group))


Split recall   : {'Amer-Indian-Eskimo': 0.8333333333333334, 'Asian-Pac-Islander': 0.64, 'Black': 0.5428571428571428, 'Other': 0.4, 'White': 0.6548364648573417}

Split accuracy : {'Amer-Indian-Eskimo': 0.9807692307692307, 'Asian-Pac-Islander': 0.8507462686567164, 'Black': 0.9369085173501577, 'Other': 0.9148936170212766, 'White': 0.8659257931528948}

Split precision: {'Amer-Indian-Eskimo': 1.0, 'Asian-Pac-Islander': 0.7272727272727273, 'Black': 0.8260869565217391, 'Other': 0.6666666666666666, 'White': 0.7887678122380554}


In [10]:
# the following dict contains (<model_id>, <predictions>) pairs
ys_pred = {}
id=0
for rup in unmitigated_predictors:
    ys_pred[id]=rup.predict(X_test)
    id=id+1
    print(ys_pred)

{0: array([0, 0, 0, ..., 0, 1, 1], dtype=int64)}
{0: array([0, 0, 0, ..., 0, 1, 1], dtype=int64), 1: array([0, 0, 0, ..., 0, 0, 1], dtype=int64)}
{0: array([0, 0, 0, ..., 0, 1, 1], dtype=int64), 1: array([0, 0, 0, ..., 0, 0, 1], dtype=int64), 2: array([0, 0, 0, ..., 0, 0, 0], dtype=int64)}


In [11]:
%matplotlib inline

In [12]:
from fairlearn.widget import FairlearnDashboard

FairlearnDashboard(
    sensitive_features=A_test,
    sensitive_feature_names=np.array(A_test.columns),
    y_true=Y_test,
    y_pred=ys_pred)

FairlearnWidget(value={'true_y': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x1d316064af0>