In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import warnings
from sklearn.exceptions import ConvergenceWarning

# Filter out ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
# Set up parameters
dataset_name = "credit"
y_label = "class"
csv_file_path = f"{dataset_name}.csv"

# Load the CSV file as a DataFrame, ignoring the first column
df = pd.read_csv(csv_file_path, index_col=0)


In [3]:
df.head()

Unnamed: 0,checking_status,credit_history,credit_amount,savings,employment,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,telephone,foreign_worker,class,Gender
0,A11,A34,1169,A65,A75,A101,4,A121,67,A143,A152,2,A173,1,A192,A201,0.0,0
1,A12,A32,5951,A61,A73,A101,2,A121,22,A143,A152,1,A173,1,A191,A201,1.0,1
2,A14,A34,2096,A61,A74,A101,3,A121,49,A143,A152,1,A172,2,A191,A201,0.0,0
3,A11,A32,7882,A61,A74,A103,4,A122,45,A143,A153,1,A173,2,A191,A201,0.0,0
4,A11,A33,4870,A61,A73,A101,4,A124,53,A143,A153,2,A173,2,A191,A201,1.0,0


In [4]:
# One-hot encode categorical features
categorical_features = df.select_dtypes(include=['category', 'object']).columns.tolist()
for feature in categorical_features:
    onehot = pd.get_dummies(df[feature], prefix=feature)
    df = df.drop(feature, axis=1)
    df = df.join(onehot)

# Separate features and target
X = df.drop(y_label, axis=1)
y = df[y_label]

# Balance the dataset if it's imbalanced
if y.value_counts().min() < y.value_counts().max() / 2:
    df_majority = df[df[y_label] == y.value_counts().idxmax()]
    df_minority = df[df[y_label] == y.value_counts().idxmin()]
    
    df_minority_upsampled = resample(df_minority,
                                     replace=True,
                                     n_samples=len(df_majority),
                                     random_state=42)
    
    df_upsampled = pd.concat([df_majority, df_minority_upsampled])
    
    X = df_upsampled.drop(y_label, axis=1)
    y = df_upsampled[y_label]

In [5]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define parameter grid for GridSearchCV
param_grid = {
    'solver': ['liblinear', 'lbfgs'],
    'C': [0.1, 1, 10, 100],
    'max_iter': [100, 200, 300],
    'class_weight': ['balanced', None]
}

# Perform GridSearchCV
grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='f1')
grid_search.fit(X_train_scaled, y_train)

# Get the best model
model = grid_search.best_estimator_

In [6]:
# Evaluate the model
print("Model performance:")
y_pred = model.predict(X_test_scaled)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.5f}", end=", ")
print(f"Precision: {precision_score(y_test, y_pred):.5f}", end=", ")
print(f"F1 Score: {f1_score(y_test, y_pred):.5f}", end=", ")
print(f"Recall: {recall_score(y_test, y_pred):.5f}")


Model performance:
Accuracy: 0.69643, Precision: 0.67361, F1 Score: 0.69534, Recall: 0.71852


In [7]:
# Import necessary method from fairlearn
from fairlearn.metrics import demographic_parity_difference

sensitive_features_test = X_test['Gender'].values

# Calculate Demographic Parity Difference using fairlearn
dpd = demographic_parity_difference(y_true=y_test, 
                                    y_pred=y_pred, 
                                    sensitive_features=sensitive_features_test)

# Print the Demographic Parity Difference
print(f"Demographic Parity Difference: {dpd:.5f}")

Demographic Parity Difference: 0.19181


In [None]:
# Import necessary libraries and methods for mitigation
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize the ThresholdOptimizer with the trained model and specify the constraint
threshold_optimizer = ThresholdOptimizer(
    estimator=model,
    constraints='demographic_parity',
    objective='balanced_accuracy_score',
    prefit=True
)

# Fit the ThresholdOptimizer on the training data
# Note: The sensitive feature should be a part of the training data
sensitive_features_train = X_train['Gender'].values
threshold_optimizer.fit(X_train_scaled, y_train, sensitive_features=sensitive_features_train)

# Use the fitted ThresholdOptimizer to make predictions on the test data
y_pred_mitigated = threshold_optimizer.predict(X_test_scaled, sensitive_features=sensitive_features_test)

# Evaluate the performance of the model after mitigation
print("Model performance after mitigation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_mitigated):.5f}", end=", ")
print(f"Precision: {precision_score(y_test, y_pred_mitigated):.5f}", end=", ")
print(f"F1 Score: {f1_score(y_test, y_pred_mitigated):.5f}", end=", ")
print(f"Recall: {recall_score(y_test, y_pred_mitigated):.5f}")

# Calculate and print the Demographic Parity Difference after mitigation
dpd_mitigated = demographic_parity_difference(y_true=y_test, 
                                              y_pred=y_pred_mitigated, 
                                              sensitive_features=sensitive_features_test)
print(f"Demographic Parity Difference after mitigation: {dpd_mitigated:.5f}")