In [1]:
!pip install 'aif360[All]'

Collecting aif360[All]
  Downloading aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Collecting skorch (from aif360[All])
  Downloading skorch-1.0.0-py3-none-any.whl.metadata (11 kB)
Collecting jupyter (from aif360[All])
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting sphinx-rtd-theme (from aif360[All])
  Downloading sphinx_rtd_theme-3.0.2-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting igraph[plotting] (from aif360[All])
  Downloading igraph-0.11.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting lime (from aif360[All])
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting fairlearn~=0.7 (from aif360[All])
  Downloading fairlearn-0.11.0-py3-none-any.whl.metadata (7.0 kB)
Collecting colorama (from aif360[All])
  Downloading colorama-0.4.6-p

In [2]:
%matplotlib inline
# Load all necessary packages
import sys
sys.path.append("../")
import numpy as np
from tqdm import tqdm

from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    balanced_accuracy_score,
    classification_report,
    precision_score,
    confusion_matrix
)
from collections import defaultdict

  vect_normalized_discounted_cumulative_gain = vmap(
  monte_carlo_vect_ndcg = vmap(vect_normalized_discounted_cumulative_gain, in_dims=(0,))


In [3]:
!wget https://raw.githubusercontent.com/Trusted-AI/AIF360/main/examples/common_utils.py
from common_utils import compute_metrics

--2024-11-17 23:22:33--  https://raw.githubusercontent.com/Trusted-AI/AIF360/main/examples/common_utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1340 (1.3K) [text/plain]
Saving to: ‘common_utils.py’


2024-11-17 23:22:33 (55.8 MB/s) - ‘common_utils.py’ saved [1340/1340]



In [56]:
# Load the Drug Consumption dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00373/drug_consumption.data"
columns = [
    "ID", "Age", "Gender", "Education", "Country", "Ethnicity", "Neuroticism", "Extraversion",
    "Openness", "Agreeableness", "Conscientiousness", "Impulsiveness", "Sensation-seeking",
    "Alcohol", "Amphetamines", "Amyl_nitrite", "Benzodiazepines", "Caffeine", "Cannabis",
    "Chocolate", "Cocaine", "Crack", "Ecstasy", "Heroin", "Ketamine", "Legal_highs", "LSD",
    "Methadone", "Mushrooms", "Nicotine", "Semer", "Volatile_substance"
]
data = pd.read_csv(url, header=None, names=columns)

In [57]:
# Define target (Y) and bias group (B)
target = "Cannabis"
bias_group = "Age"

# Binarize the target and bias group
data[target] = data[target].apply(lambda x: 1 if x in ["CL3", "CL4", "CL5", "CL6"] else 0)
data[bias_group] = data[bias_group].apply(lambda x: 1 if x > 0 else 0)

In [58]:
# Preprocess categorical features
categorical_columns = data.select_dtypes(include=["object"]).columns
label_encoders = {}
for col in categorical_columns:
    encoder = LabelEncoder()
    data[col] = encoder.fit_transform(data[col])
    label_encoders[col] = encoder

# Ensure all columns are numeric
for col in data.columns:
    if data[col].dtype == "object":
        data[col] = pd.to_numeric(data[col], errors="coerce")

In [59]:
# Split dataset into train and test sets
dataset_orig_train, dataset_orig_test = train_test_split(data, test_size=0.3, random_state=42)

# Define features and target for training and testing
X_train = dataset_orig_train.drop(columns=[target, bias_group])
y_train = dataset_orig_train[target]
X_test = dataset_orig_test.drop(columns=[target, bias_group])
y_test = dataset_orig_test[target]

# Convert features to float to avoid type issues
X_train = X_train.astype(float)
X_test = X_test.astype(float)

# Create a BinaryLabelDataset instance with a different variable name
dataset_bld_train = BinaryLabelDataset(df=dataset_orig_train,  # Use the original DataFrame here
                                        label_names=[target],
                                        protected_attribute_names=[bias_group])

privileged_groups = [{bias_group: 1}]
unprivileged_groups = [{bias_group: 0}]

In [60]:
dataset_orig_train = BinaryLabelDataset(df=dataset_orig_train,
                                        label_names=[target],
                                        protected_attribute_names=[bias_group])

privileged_groups = [{bias_group: 1}]
unprivileged_groups = [{bias_group: 0}]

metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = 0.386189


In [61]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
               privileged_groups=privileged_groups)
RW.fit(dataset_orig_train)
dataset_transf_train = RW.transform(dataset_orig_train)
assert np.abs(dataset_transf_train.instance_weights.sum()-dataset_orig_train.instance_weights.sum())<1e-6

In [62]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,
                                         unprivileged_groups=unprivileged_groups,
                                         privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups = 0.000000


In [63]:
print("Original training dataset shape:", dataset_orig_train.features.shape)
print("Original testing dataset shape:", dataset_orig_test.shape)

Original training dataset shape: (1319, 31)
Original testing dataset shape: (566, 32)


In [64]:
def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, group, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    balanced_acc = balanced_accuracy_score(y_test, y_pred)
    print(f"\n{model_name} - Regular Data")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Balanced Accuracy: {balanced_acc:.2f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

    # Fairness metrics
    equalized_odds = calculate_equalized_odds(y_test, y_pred, group)
    predictive_parity = calculate_predictive_value_parity(y_test, y_pred, group)

    print("\nEqualized Odds (TPR, FPR) by Group:")
    for g, metrics in equalized_odds.items():
        print(f"Group {g}: TPR = {metrics['TPR']:.2f}, FPR = {metrics['FPR']:.2f}")

    print("\nPredictive Parity (PPV) by Group:")
    for g, ppv in predictive_parity.items():
        print(f"Group {g}: PPV = {ppv:.2f}")

    return y_pred

In [65]:
def calculate_equalized_odds(y_true, y_pred, group):
    tpr_fpr = defaultdict(dict)
    unique_groups = np.unique(group)

    for g in unique_groups:
        mask = group == g
        tn, fp, fn, tp = confusion_matrix(y_true[mask], y_pred[mask]).ravel()
        tpr = tp / (tp + fn) if (tp + fn) > 0 else 0  # Sensitivity
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0  # Fall-out
        tpr_fpr[g] = {"TPR": tpr, "FPR": fpr}

    return tpr_fpr

In [66]:
def calculate_predictive_value_parity(y_true, y_pred, group):
    ppv = {}
    unique_groups = np.unique(group)

    for g in unique_groups:
        mask = group == g
        precision = precision_score(y_true[mask], y_pred[mask], zero_division=0)
        ppv[g] = precision

    return ppv

In [67]:
# Create a StandardScaler object
scaler = StandardScaler()

# Fit the scaler to the training data and transform it
X_train_scaled = scaler.fit_transform(X_train)

# Transform the testing data using the fitted scaler
X_test_scaled = scaler.transform(X_test)

# Then call the train_and_evaluate_model function with the scaled data
knn_regular = KNeighborsClassifier(n_neighbors=5)
y_pred_knn_regular = train_and_evaluate_model(
    knn_regular, X_train_scaled, y_train, X_test_scaled, y_test, dataset_orig_test[bias_group], "KNN Model"
)



KNN Model - Regular Data
Accuracy: 0.83
Balanced Accuracy: 0.83

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.87      0.82       249
           1       0.88      0.79      0.84       317

    accuracy                           0.83       566
   macro avg       0.83      0.83      0.83       566
weighted avg       0.83      0.83      0.83       566


Equalized Odds (TPR, FPR) by Group:
Group 0: TPR = 0.83, FPR = 0.20
Group 1: TPR = 0.66, FPR = 0.08

Predictive Parity (PPV) by Group:
Group 0: PPV = 0.91
Group 1: PPV = 0.78


In [68]:
rf_regular = RandomForestClassifier(random_state=42)
y_pred_rf_regular = train_and_evaluate_model(
    rf_regular, X_train, y_train, X_test, y_test, dataset_orig_test[bias_group], "Random Forest Model"
)


Random Forest Model - Regular Data
Accuracy: 0.86
Balanced Accuracy: 0.86

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.85      0.85       249
           1       0.88      0.88      0.88       317

    accuracy                           0.86       566
   macro avg       0.86      0.86      0.86       566
weighted avg       0.86      0.86      0.86       566


Equalized Odds (TPR, FPR) by Group:
Group 0: TPR = 0.92, FPR = 0.24
Group 1: TPR = 0.72, FPR = 0.09

Predictive Parity (PPV) by Group:
Group 0: PPV = 0.90
Group 1: PPV = 0.78


In [69]:
logistic_regular = LogisticRegression(random_state=42, max_iter=1000)
y_pred_logistic_regular = train_and_evaluate_model(
    logistic_regular, X_train_scaled, y_train, X_test_scaled, y_test, dataset_orig_test[bias_group], "Logistic Regression Model"
)


Logistic Regression Model - Regular Data
Accuracy: 0.84
Balanced Accuracy: 0.84

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.86      0.83       249
           1       0.88      0.83      0.85       317

    accuracy                           0.84       566
   macro avg       0.84      0.84      0.84       566
weighted avg       0.84      0.84      0.84       566


Equalized Odds (TPR, FPR) by Group:
Group 0: TPR = 0.85, FPR = 0.22
Group 1: TPR = 0.72, FPR = 0.08

Predictive Parity (PPV) by Group:
Group 0: PPV = 0.90
Group 1: PPV = 0.79


In [70]:
dataset_transf_train

               instance weights features                               \
                                         protected attribute            
                                      ID                 Age   Gender   
instance names                                                          
236                    1.501616    237.0                 0.0  0.48246   
551                    1.501616    554.0                 0.0  0.48246   
1760                   0.762190   1764.0                 0.0  0.48246   
865                    0.762190    869.0                 0.0  0.48246   
208                    1.769560    209.0                 1.0  0.48246   
...                         ...      ...                 ...      ...   
1130                   0.762190   1134.0                 0.0 -0.48246   
1294                   0.762190   1298.0                 0.0 -0.48246   
860                    0.762190    864.0                 0.0 -0.48246   
1459                   0.762190   1463.0           

In [78]:
knn_reweighted = KNeighborsClassifier(n_neighbors=5)
y_pred_knn_reweighted = train_and_evaluate_model(
    knn_reweighted, X_train_scaled_rw, y_train, X_test_scaled_rw, y_test, dataset_orig_test[bias_group], "KNN Model"
)


KNN Model - Regular Data
Accuracy: 0.77
Balanced Accuracy: 0.77

Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.79      0.75       249
           1       0.82      0.75      0.78       317

    accuracy                           0.77       566
   macro avg       0.77      0.77      0.77       566
weighted avg       0.77      0.77      0.77       566


Equalized Odds (TPR, FPR) by Group:
Group 0: TPR = 0.75, FPR = 0.25
Group 1: TPR = 0.75, FPR = 0.18

Predictive Parity (PPV) by Group:
Group 0: PPV = 0.88
Group 1: PPV = 0.65


In [75]:
feature_columns = dataset_orig_test.columns[1:]  # Exclude the 'ID' column, assuming it's the first

# Select the same columns from dataset_orig_test to create X_test_rw
X_test_rw = dataset_orig_test[feature_columns].astype(float)

# Scale the data
scaler = StandardScaler()
X_train_scaled_rw = scaler.fit_transform(X_train_rw)
X_test_scaled_rw = scaler.transform(X_test_rw)



In [79]:

logistic_reweighted = LogisticRegression(random_state=42, max_iter=1000)
y_pred_logistic_reweighted = train_and_evaluate_model(
    logistic_reweighted, X_train_scaled_rw, dataset_transf_train.labels.ravel(), X_test_scaled_rw, y_test, dataset_orig_test[bias_group], "Logistic Regression Model - Reweighted"
)


Logistic Regression Model - Reweighted - Regular Data
Accuracy: 0.61
Balanced Accuracy: 0.55

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.12      0.21       249
           1       0.59      0.99      0.74       317

    accuracy                           0.61       566
   macro avg       0.74      0.55      0.47       566
weighted avg       0.72      0.61      0.51       566


Equalized Odds (TPR, FPR) by Group:
Group 0: TPR = 0.98, FPR = 0.86
Group 1: TPR = 1.00, FPR = 0.89

Predictive Parity (PPV) by Group:
Group 0: PPV = 0.74
Group 1: PPV = 0.33


In [80]:
# Assuming dataset_transf_train and dataset_orig_test are already defined and processed
# Extract features and labels from dataset_transf_train
X_train_rw = dataset_transf_train.features
y_train = dataset_transf_train.labels.ravel()  # Flatten labels

# Extract features from dataset_orig_test
feature_columns = dataset_orig_test.columns[1:]  # Exclude the 'ID' column, assuming it's the first
X_test_rw = dataset_orig_test[feature_columns].astype(float)

# Scale the data
scaler = StandardScaler()
X_train_scaled_rw = scaler.fit_transform(X_train_rw)
X_test_scaled_rw = scaler.transform(X_test_rw)

rf_reweighted = RandomForestClassifier(random_state=42)
y_pred_rf_reweighted = train_and_evaluate_model(
    rf_reweighted, X_train_scaled_rw, y_train, X_test_scaled_rw, y_test, dataset_orig_test[bias_group], "Random Forest Model - Reweighted"
)




Random Forest Model - Reweighted - Regular Data
Accuracy: 0.84
Balanced Accuracy: 0.84

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.81      0.82       249
           1       0.85      0.87      0.86       317

    accuracy                           0.84       566
   macro avg       0.84      0.84      0.84       566
weighted avg       0.84      0.84      0.84       566


Equalized Odds (TPR, FPR) by Group:
Group 0: TPR = 0.89, FPR = 0.23
Group 1: TPR = 0.81, FPR = 0.16

Predictive Parity (PPV) by Group:
Group 0: PPV = 0.90
Group 1: PPV = 0.69
