In [2]:
import warnings
from sklearn.metrics import accuracy_score

from preprocess import load_dataset, preprocess_dataset
from model import MLPClassifier, predict_nn, train_nn_adversarial
from causal_analysis import estimate_causal_effect, creat_causal_model
from fairness_metrics import compute_fairness_metrics
from visualization import plot_repair_effect

# Ignore warnings to keep the output clean
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)


ImportError: attempted relative import with no known parent package

In [ ]:
# Define file path and relevant columns
file_path = '../data/adult.csv'
relevant_columns = [
    'age', 'workclass', 'fnlwgt', 'education.num', 'marital.status',
    'occupation', 'relationship', 'race', 'sex', 'capital.gain',
    'capital.loss', 'hours.per.week', 'native.country', 'income'
]
target_column = 'income'

# Load and preprocess data
data = load_dataset(file_path)
# Convert income to a binary variable (0 or 1)
data['income'] = (data['income'] == '>50K').astype(int)

# Preprocess the dataset and split into train and test sets
(X_train, X_test, y_train, y_test), processed_data = preprocess_dataset(
    data, relevant_cols=relevant_columns, target_column=target_column
)

print(f"Training Data Shape: {X_train.shape}")
print(f"Testing Data Shape: {X_test.shape}")


In [ ]:
# Fairness Metrics Before Repair
compute_fairness_metrics(X_test, y_test, title="BEFORE REPAIR")

# Define the causal graph in DOT format
causal_graph = """
digraph {
    sex_Male -> income;
    sex_Male -> education_num;
    education_num -> income;
    hours_per_week -> income;
    sex_Male -> hours_per_week;
    age -> income;
}
"""

# Prepare data for causal analysis (select test rows from processed_data)
processed_data_test = processed_data.loc[X_test.index, :].copy()

# Create the causal model using the provided graph, sensitive attribute, and outcome
causal_model = creat_causal_model(processed_data_test, causal_graph, "sex_Male", "income")
causal_model.view_model()  # This should display the causal graph

# Estimate the causal effect before repair
effect_before = estimate_causal_effect(causal_model)
print(f"\n=== Causal Effect BEFORE Repair ===")
print(f"Estimated Total Effect of Gender on Income: {effect_before.value:.4f}")


In [ ]:
# Define feature columns to be used in the model
feature_cols = ['education_num', 'hours_per_week', 'age']

# Assume 'sex_Male' is stored in X_train (adjust if necessary)
s_train = X_train['sex_Male']

# Train the neural network model
model_nn, scaler = train_nn_adversarial(X_train, y_train, s_train, feature_cols, n_epochs=500, lambda_=10)

# Predict on the test set
y_pred = predict_nn(model_nn, X_test, feature_cols, scaler)
print(f"\nTest Accuracy (NN): {accuracy_score(y_test, y_pred):.4f}")


In [ ]:
# Optionally, if you have a counterfactual repair function, you might generate counterfactual predictions here
# For example:
# df_results = generate_counterfactuals(X_test, model_nn, scaler, feature_cols, y_test)

# Fairness Metrics After Repair
compute_fairness_metrics(X_test, y_pred.squeeze(), title="AFTER REPAIR")

# Update the processed test data with the new predicted income values
processed_data_test["income"] = y_pred

# Create a new causal model with the repaired/predicted outcome
causal_model_after = creat_causal_model(processed_data_test, causal_graph, "sex_Male", "income")
effect_after = estimate_causal_effect(causal_model_after)
print(f"\n=== Causal Effect AFTER Repair ===")
print(f"Estimated Total Effect of Gender on Income: {effect_after.value:.4f}")

# Visualize the repair effect
plot_repair_effect(X_test, y_test, y_pred.squeeze())
