In [3]:
pip install fairlearn matplotlib


Note: you may need to restart the kernel to use updated packages.


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fairlearn.datasets import fetch_adult
data = fetch_adult(as_frame=True)
X = pd.get_dummies(data.data)
y_true = (data.target == '>50K') * 1
sex = data.data['sex']
sex.value_counts()

sex
Male      32650
Female    16192
Name: count, dtype: int64

## Model Bias Detection and Mitigation using Fairlearn
In this example, we'll use the Adult dataset from the UCI Machine Learning Repository to demonstrate how to detect and mitigate model bias. We'll explore bias based on the 'sex' attribute and apply different techniques to address any detected bias. We'll use Fairlearn and other tools for bias detection and mitigation.

In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fairlearn.datasets import fetch_adult

# Load the dataset
data = fetch_adult(as_frame=True)
X = pd.get_dummies(data.data)
y_true = (data.target == '>50K') * 1
sex = data.data['sex']

# Check the distribution of the sensitive attribute
print(sex.value_counts())


sex
Male      32650
Female    16192
Name: count, dtype: int64


### Step 1: Load and Prepare the Data


In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fairlearn.datasets import fetch_adult

# Load the dataset
data = fetch_adult(as_frame=True)
X = pd.get_dummies(data.data)
y_true = (data.target == '>50K') * 1
sex = data.data['sex']

# Check the distribution of the sensitive attribute
print(sex.value_counts())


sex
Male      32650
Female    16192
Name: count, dtype: int64


### Step 2: Train Initial Model

We train an initial Logistic Regression model to establish a baseline.

In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Split the data into train and test sets
X_train, X_test, y_train, y_test, sex_train, sex_test = train_test_split(X, y_true, sex, test_size=0.3, random_state=0, stratify=y_true)

# Train a Logistic Regression model
model = LogisticRegression(solver='liblinear')
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.796901658363475
              precision    recall  f1-score   support

           0       0.80      0.97      0.88     11147
           1       0.71      0.25      0.37      3506

    accuracy                           0.80     14653
   macro avg       0.76      0.61      0.63     14653
weighted avg       0.78      0.80      0.76     14653



### Step 3: Detect Bias using Fairlearn

We'll check for fairness issues using metrics such as demographic parity and equalized odds.

In [19]:
from fairlearn.metrics import MetricFrame
from fairlearn.metrics import demographic_parity_difference, equalized_odds_difference

# Custom wrapper functions for metrics
def demographic_parity_diff(y_true, y_pred, **kwargs):
    return demographic_parity_difference(y_true, y_pred, **kwargs)

def equalized_odds_diff(y_true, y_pred, **kwargs):
    return equalized_odds_difference(y_true, y_pred, **kwargs)

# Calculate fairness metrics
metrics = {
    'accuracy': accuracy_score,
    'demographic_parity_difference': demographic_parity_diff,
    'equalized_odds_difference': equalized_odds_diff
}

# Ensure sensitive features match y_true
y_true_test = y_test.reset_index(drop=True)
sex_test = sex_test.reset_index(drop=True)

metric_frame = MetricFrame(metrics=metrics, y_true=y_true_test, y_pred=y_pred, sensitive_features=sex_test)
print(metric_frame.by_group)


TypeError: demographic_parity_difference() missing 1 required keyword-only argument: 'sensitive_features'

In [24]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.explainers import MetricTextExplainer

# Load Adult Income Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status',
                'occupation', 'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 
                'hours_per_week', 'native_country', 'income']

adult_data = pd.read_csv(url, header=None, names=column_names, na_values=' ?')

# Preprocess Adult Income Dataset
adult_data.dropna(inplace=True)
adult_data['income'] = adult_data['income'].apply(lambda x: 1 if x == ' >50K' else 0)

# Define features and target variable
X_adult = adult_data.drop('income', axis=1)
y_adult = adult_data['income']

# Preprocessing pipeline
numeric_features = ['age', 'fnlwgt', 'education_num', 'capital_gain', 'capital_loss', 'hours_per_week']
categorical_features = ['workclass', 'education', 'marital_status', 'occupation', 'relationship', 'race', 'sex', 'native_country']

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)])

# Model pipeline
model_adult = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', LogisticRegression(max_iter=1000))])

# Split the data
X_train_adult, X_test_adult, y_train_adult, y_test_adult = train_test_split(X_adult, y_adult, test_size=0.2, random_state=42)

# Train the model
model_adult.fit(X_train_adult, y_train_adult)
y_pred_adult = model_adult.predict(X_test_adult)

# Evaluate the model
print('Adult Income Dataset')
print('Accuracy:', accuracy_score(y_test_adult, y_pred_adult))
print('Precision:', precision_score(y_test_adult, y_pred_adult))
print('Recall:', recall_score(y_test_adult, y_pred_adult))
print('F1 Score:', f1_score(y_test_adult, y_pred_adult))

# Fairness-aware preprocessing
X_adult_encoded = pd.DataFrame(preprocessor.fit_transform(X_adult).toarray())
adult_data_encoded = pd.concat([X_adult_encoded, y_adult.reset_index(drop=True)], axis=1)

# Find the column name for 'race_ White'
onehot_columns = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)
race_white_column = [col for col in onehot_columns if 'race_ White' in col][0]

# Ensure the column exists
assert race_white_column in adult_data_encoded.columns, f"Column {race_white_column} not found in DataFrame."

adult_dataset = BinaryLabelDataset(favorable_label=1, unfavorable_label=0,
                                   df=adult_data_encoded, label_names=['income'],
                                   protected_attribute_names=[race_white_column])

# Reweighing
RW = Reweighing(unprivileged_groups=[{race_white_column: 0}], privileged_groups=[{race_white_column: 1}])
adult_dataset_transf = RW.fit_transform(adult_dataset)

# Split transformed data
X_train_adult_transf, X_test_adult_transf, y_train_adult_transf, y_test_adult_transf = train_test_split(
    adult_dataset_transf.features, adult_dataset_transf.labels, test_size=0.2, random_state=42)

# Train the model on transformed data
model_adult.fit(X_train_adult_transf, y_train_adult_transf)
y_pred_adult_transf = model_adult.predict(X_test_adult_transf)

# Evaluate the model on transformed data
print('Transformed Adult Income Dataset')
print('Accuracy:', accuracy_score(y_test_adult_transf, y_pred_adult_transf))
print('Precision:', precision_score(y_test_adult_transf, y_pred_adult_transf))
print('Recall:', recall_score(y_test_adult_transf, y_pred_adult_transf))
print('F1 Score:', f1_score(y_test_adult_transf, y_pred_adult_transf))

# Fairness metrics
metric_transf = BinaryLabelDatasetMetric(adult_dataset_transf, 
                                         unprivileged_groups=[{race_white_column: 0}], 
                                         privileged_groups=[{race_white_column: 1}])
explainer_transf = MetricTextExplainer(metric_transf)
print(explainer_transf.disparate_impact())
print(explainer_transf.statistical_parity_difference())

# Load COMPAS Recidivism Dataset
url = "https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv"
compas_data = pd.read_csv(url)

# Preprocess COMPAS Recidivism Dataset
compas_data = compas_data[(compas_data.days_b_screening_arrest <= 30) & 
                          (compas_data.days_b_screening_arrest >= -30) & 
                          (compas_data.is_recid != -1) & 
                          (compas_data.c_charge_degree != "O") & 
                          (compas_data.score_text != "N/A")]

compas_data['recidivism'] = compas_data['two_year_recid'].apply(lambda x: 1 if x == 1 else 0)

# Define features and target variable
X_compas = compas_data[['age', 'sex', 'race', 'juv_fel_count', 'juv_misd_count', 'juv_other_count',
                        'priors_count', 'c_charge_degree']]
y_compas = compas_data['recidivism']

# Split the data
X_train_compas, X_test_compas, y_train_compas, y_test_compas = train_test_split(X_compas, y_compas, test_size=0.2, random_state=42)

# Preprocessing pipeline
numeric_features = ['age', 'juv_fel_count', 'juv_misd_count', 'juv_other_count', 'priors_count']
categorical_features = ['sex', 'race', 'c_charge_degree']

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)])

# Model pipeline
model_compas = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', LogisticRegression(max_iter=1000))])

# Train the model
model_compas.fit(X_train_compas, y_train_compas)
y_pred_compas = model_compas.predict(X_test_compas)

# Evaluate the model
print('COMPAS Recidivism Dataset')
print('Accuracy:', accuracy_score(y_test_compas, y_pred_compas))
print('Precision:', precision_score(y_test_compas, y_pred_compas))
print('Recall:', recall_score(y_test_compas, y_pred_compas))
print('F1 Score:', f1_score(y_test_compas, y_pred_compas))

# Fairness-aware preprocessing
X_compas_encoded = pd.DataFrame(preprocessor.fit_transform(X_compas).toarray())
compas_data_encoded = pd.concat([X_compas_encoded, y_compas.reset_index(drop=True)], axis=1)

# # Find the column name for 'race_ African-American'
# onehot_columns = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)
# race_black_column = [col for col in onehot_columns if 'race_ African-American'


Adult Income Dataset
Accuracy: 0.8534725675451682
Precision: 0.7584
Recall: 0.6196078431372549
F1 Score: 0.6820143884892087


AssertionError: Column race_ White not found in DataFrame.

In [1]:
# Set up feedback system
from learntools.core import binder
binder.bind(globals())
from learntools.ethics.ex4 import *
import pandas as pd
from sklearn.model_selection import train_test_split

ModuleNotFoundError: No module named 'learntools.core'

In [30]:
pip install learntools


Note: you may need to restart the kernel to use updated packages.


# Detect Bias using Fairlearn

In [7]:
%pip install lightgbm


Note: you may need to restart the kernel to use updated packages.


### Step 1: Load and Prepare the Data


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fairlearn.datasets import fetch_adult

# Load the dataset
data = fetch_adult(as_frame=True)
X = pd.get_dummies(data.data)
y_true = (data.target == '>50K') * 1
sex = data.data['sex']

# Check the distribution of the sensitive attribute
print(sex.value_counts())


sex
Male      32650
Female    16192
Name: count, dtype: int64


### Step 2: Train Initial Model


In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Split the data into train and test sets
X_train, X_test, y_train, y_test, sex_train, sex_test = train_test_split(X, y_true, sex, test_size=0.3, random_state=0, stratify=y_true)

# Train a Logistic Regression model
model = LogisticRegression(solver='liblinear')
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.796901658363475
              precision    recall  f1-score   support

           0       0.80      0.97      0.88     11147
           1       0.71      0.25      0.37      3506

    accuracy                           0.80     14653
   macro avg       0.76      0.61      0.63     14653
weighted avg       0.78      0.80      0.76     14653



### Step 3: Detect Bias using Fairlearn


In [5]:
from fairlearn.metrics import MetricFrame, demographic_parity_difference, equalized_odds_difference, selection_rate
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Calculate fairness metrics
metrics = {
    'accuracy': accuracy_score,
    'precision': precision_score,
    'recall': recall_score,
    'demographic_parity_difference': lambda y_true, y_pred: demographic_parity_difference(y_true, y_pred, sensitive_features=sex_test),
    'equalized_odds_difference': lambda y_true, y_pred: equalized_odds_difference(y_true, y_pred, sensitive_features=sex_test),
    'selection_rate': selection_rate
}

metric_frame = MetricFrame(metrics=metrics, y_true=y_test, y_pred=y_pred, sensitive_features=sex_test)
print("Overall metrics:", metric_frame.overall)
print("Metrics by group:", metric_frame.by_group)

# Plot the metrics by group
metric_frame.by_group.plot.bar(
    subplots=True,
    layout=[3, 3],
    legend=False,
    figsize=[12, 8],
    title="Metrics by Group"
)
plt.show()


ValueError: Found input variables with inconsistent numbers of samples: [14653, 4907]