In [1]:
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.exceptions import ConvergenceWarning

# Filter out the ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [2]:
# Configuration
dataset_name = "ausprivauto0405"
target_column = "ClaimNb"
file_path = f"{dataset_name}.csv"
test_size = 0.2
random_state = 42

# Load the data
df = pd.read_csv(file_path, index_col=0)

In [3]:
# Convert categorical columns to one-hot encoding
categorical_features = df.select_dtypes(include=['category', 'object']).columns.tolist()

for feature in categorical_features:
    onehot = pd.get_dummies(df[feature], prefix=feature)
    df = df.drop(feature, axis=1)
    df = df.join(onehot)


In [4]:
# Prepare features and target
X = df.drop(target_column, axis=1)
y = df[target_column]

# Split the data
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

# Train initial model
model = LogisticRegression(solver='liblinear', class_weight='balanced', max_iter=100)
model.fit(x_train, y_train)

# Make predictions
y_pred = model.predict(x_test)

# Evaluate initial model
print("Initial model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.5f}")
print(f"Precision: {precision_score(y_test, y_pred):.5f}")
print(f"F1 Score: {f1_score(y_test, y_pred):.5f}")
print(f"Recall: {recall_score(y_test, y_pred):.5f}")

Initial model performance:
Accuracy: 0.60662
Precision: 0.09745
F1 Score: 0.16799
Recall: 0.60835


In [5]:
from fairlearn.metrics import equalized_odds_difference

# Extract the sensitive attribute from the variable that contains the features of the test dataset
sensitive_features = x_test['Gender']

# Calculate the equalized odds difference
eo = equalized_odds_difference(y_test, y_pred, sensitive_features=sensitive_features)

print("Equalized Odds Difference: ", eo)

Equalized Odds Difference:  0.011526788382003716


In [6]:
from fairlearn.postprocessing import ThresholdOptimizer

# Create a copy of the existing model
mitigated_model = ThresholdOptimizer(estimator=model, constraints='equalized_odds', objective='balanced_accuracy_score')

# Fit the model to the data set and the sensitive feature
mitigated_model.fit(x_train, y_train, sensitive_features=x_train['Gender'])

# Predict on the test data using the mitigated model and sensitive feature
y_pred_mitigated = mitigated_model.predict(x_test, sensitive_features = x_test["Gender"])

# Measure the performance metrics and fairness metric
accuracy_mitigated = accuracy_score(y_test, y_pred_mitigated)
precision_mitigated = precision_score(y_test, y_pred_mitigated)
recall_mitigated = recall_score(y_test, y_pred_mitigated)
f1_mitigated = f1_score(y_test, y_pred_mitigated)
eo_mitigated = equalized_odds_difference(y_test, y_pred_mitigated, sensitive_features=x_test['Gender'])

print("Mitigated model performance: ", end="")
print(f"Accuracy: {accuracy_mitigated:.5f}", end=", ")
print(f"Precision: {precision_mitigated:.5f}", end=", ")
print(f"F1 Score: {f1_mitigated:.5f}", end=", ")
print(f"Recall: {recall_mitigated:.5f}")
print("Equalized Odds Difference: ", eo_mitigated)

  warn(


Mitigated model performance: Accuracy: 0.60529, Precision: 0.09728, F1 Score: 0.16778, Recall: 0.60948
Equalized Odds Difference:  0.00881676128173281
