In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
#!pip install --user fairlearn



In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load German Credit Data dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
columns = ['Status', 'Month', 'CreditHistory', 'Purpose', 'CreditAmount', 'Savings', 'Employment', 'Investment', 'Sex', 'Debtors', 'Residence', 'Property', 'Age', 'OtherInstallments', 'Housing', 'ExistingCredits', 'Job', 'NumPeople', 'Telephone', 'Foreign', 'Class']
data = pd.read_csv(url, delimiter=' ', header=None, names=columns)

# Preprocess data
data['Class'] = data['Class'].apply(lambda x: 1 if x == 1 else 0)
data['Sex'] = data['Sex'].apply(lambda x: 1 if x == 'A91' or x == 'A93' or x == 'A94' else 0)

# Identify categorical and numerical columns
categorical_columns = ['Status', 'CreditHistory', 'Purpose', 'Savings', 'Employment', 'Debtors', 'Property', 'OtherInstallments', 'Housing', 'Job', 'Telephone', 'Foreign']
numerical_columns = ['Month', 'CreditAmount', 'Investment', 'Residence', 'Age', 'ExistingCredits', 'NumPeople']

# One-hot encode categorical columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(), categorical_columns)
    ])

# Split data ensuring stratification
X = data[categorical_columns + numerical_columns]
y = data['Class']
sensitive_feature = data['Sex']

# Ensure stratification by both Class and Sex
stratify_data = pd.concat([y, sensitive_feature], axis=1)
X_train, X_test, y_train, y_test, sensitive_train, sensitive_test = train_test_split(X, y, sensitive_feature, test_size=0.2, random_state=42, stratify=stratify_data)

# Apply preprocessor to the data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)


In [12]:
from fairlearn.metrics import MetricFrame

# Train a logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Fairness assessment
metric_frame = MetricFrame(metrics={"accuracy": accuracy_score}, y_true=y_test, y_pred=y_pred, sensitive_features=sensitive_test)
print(metric_frame.by_group)


Accuracy: 0.75
     accuracy
Sex          
0    0.693548
1    0.775362


In [13]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity

# Fairness mitigation using ExponentiatedGradient
mitigator = ExponentiatedGradient(LogisticRegression(max_iter=1000), constraints=DemographicParity())
mitigator.fit(X_train, y_train, sensitive_features=sensitive_train)
y_pred_mitigated = mitigator.predict(X_test)

# Assess mitigated model
accuracy_mitigated = accuracy_score(y_test, y_pred_mitigated)
print(f"Accuracy after mitigation: {accuracy_mitigated}")

metric_frame_mitigated = MetricFrame(metrics={"accuracy": accuracy_score}, y_true=y_test, y_pred=y_pred_mitigated, sensitive_features=sensitive_test)
print(metric_frame_mitigated.by_group)


Accuracy after mitigation: 0.735
     accuracy
Sex          
0    0.709677
1    0.746377


In [13]:
from fairlearn.postprocessing import ThresholdOptimizer

# Re-train the LogisticRegression model to be used in ThresholdOptimizer
base_model = LogisticRegression(max_iter=1000)
base_model.fit(X_train, y_train)

# Apply Equalized Odds Postprocessing
eop = ThresholdOptimizer(estimator=base_model, constraints="equalized_odds", prefit=True)
eop.fit(X_train, y_train, sensitive_features=sensitive_train)
y_pred_eop = eop.predict(X_test, sensitive_features=sensitive_test)

# Evaluate the model
accuracy_eop = accuracy_score(y_test, y_pred_eop)
print(f"Accuracy after Equalized Odds Postprocessing: {accuracy_eop}")

metric_frame_eop = MetricFrame(metrics={"accuracy": accuracy_score}, y_true=y_test, y_pred=y_pred_eop, sensitive_features=sensitive_test)
print(metric_frame_eop.by_group)


Accuracy after Equalized Odds Postprocessing: 0.74
     accuracy
Sex          
0    0.661290
1    0.775362
