In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import tensorflow as tf
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing import Reweighing

# Load the dataset
data = pd.read_csv('credit_risk_dataset.csv')  # Replace with the actual dataset path

# Define features and labels
features = data[['Age', 'Gender', 'Income', 'CreditScore']]
labels = data['Approved']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Train a Random Forest model for initial analysis
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)

# Evaluate the Random Forest model
print("Random Forest Model Report:")
print(classification_report(y_test, rf_predictions))

# Create a BinaryLabelDataset for AIF360
privileged_groups = [{'Gender': 1}]  # Define privileged group based on Gender (e.g., 1 for males)
unprivileged_groups = [{'Gender': 0}]  # Define unprivileged group based on Gender (e.g., 0 for females)
protected_attribute = 'Gender'  # The attribute to test for bias

dataset = BinaryLabelDataset(
    favorable_label=1,  # Define the favorable label
    unfavorable_label=0,  # Define the unfavorable label
    df=data,  # The pandas DataFrame containing the dataset
    label_names=['Approved'],  # Label names
    protected_attribute_names=[protected_attribute],  # Protected attribute name
    privileged_protected_attributes=privileged_groups,
    unprivileged_protected_attributes=unprivileged_groups
)

# Compute metrics on the dataset
metric = BinaryLabelDatasetMetric(dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
classification_metric = ClassificationMetric(dataset, y_test, rf_predictions)

# Check for disparate impact
disparate_impact = metric.disparate_impact()
print(f"Disparate Impact: {disparate_impact}")

# Check for equal opportunity difference
equal_opportunity_difference = classification_metric.equal_opportunity_difference()
print(f"Equal Opportunity Difference: {equal_opportunity_difference}")

# Apply reweighing to mitigate bias
RW = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
transformed_dataset = RW.fit_transform(dataset)

# Train a Random Forest model on the transformed dataset
X_transformed = transformed_dataset.features
y_transformed = transformed_dataset.labels.ravel()

rf_model_transformed = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_transformed.fit(X_transformed, y_transformed)
rf_predictions_transformed = rf_model_transformed.predict(X_test)

# Evaluate the transformed model
print("Random Forest Model (Transformed) Report:")
print(classification_report(y_test, rf_predictions_transformed))


ModuleNotFoundError: No module named 'tensorflow'