### Bias & Fairness in Data: Bias Mitigation Techniques
**Question**: Use the Adult Income dataset and apply reweighing technique to balance the
class weights based on sensitive attributes (e.g., gender).

In [1]:
# write your code from here
import pandas as pd
import numpy as np
from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import Reweighing
from sklearn.model_selection import train_test_split

# Load Dataset
try:
    df = pd.read_csv('adult.csv')  # Change path if needed
    print("Dataset loaded successfully.")
except FileNotFoundError:
    raise FileNotFoundError("Dataset file not found. Please check the file path.")

# Handle Missing Values
if df.isnull().sum().sum() > 0:
    print("Missing values found! Applying forward fill...")
    df.fillna(method='ffill', inplace=True)

# Encode categorical variables
try:
    df['income'] = df['income'].apply(lambda x: 1 if '>50K' in x else 0)
    df['sex'] = df['sex'].apply(lambda x: 1 if x.strip().lower() == 'male' else 0)
except Exception as e:
    raise ValueError(f"Encoding error: {e}")

# Assertion Checks
assert 'sex' in df.columns, "Sensitive attribute 'sex' not found."
assert set(df['income'].unique()) == {0, 1}, "Target variable 'income' not binary encoded correctly."

# Split data
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

assert len(train_df) > 0 and len(test_df) > 0, "Train/Test split failed — empty dataset encountered."

# Convert to AIF360 StandardDataset
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

train_data = StandardDataset(train_df,
                             label_name='income',
                             favorable_classes=[1],
                             protected_attribute_names=['sex'],
                             privileged_classes=[[1]])

# Apply Reweighing
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)

RW.fit(train_data)
train_transf = RW.transform(train_data)

# Unit Test for instance weights
def test_reweighing_weights(dataset):
    weights = dataset.instance_weights
    assert weights is not None, "Instance weights are None after reweighing."
    assert np.all(weights >= 0), "Negative weights detected in reweighed dataset."
    print("✅ Reweighing unit test passed.")

test_reweighing_weights(train_transf)

ModuleNotFoundError: No module named 'aif360'