# Folktables: Real‐World Bias Detection with MSD

## Configuration & Imports

In [1]:
import numpy as np
import pandas as pd

from folktables import ACSDataSource, ACSIncome
from humancompatible.detect import detect_bias, detect_bias_two_samples

#### Main parameters

In [2]:
state1 = "HI"
state2 = "ME"

#### Extra parameters

In [3]:
survey_year = "2018"
horizon = "1-Year"
data_root = "../data/folktables"

selected_columns = ['AGEP', 'MAR', 'POBP', 'SEX', 'RAC1P']
protected_attrs = ['AGEP', 'MAR', 'POBP', 'SEX', 'RAC1P']
# ['AGEP', 'COW', 'SCHL', 'MAR', 'OCCP', 'POBP', 'RELP', 'WKHP', 'SEX', 'RAC1P']
continuous_feats = []
feature_map = {}

seed = 42
n_samples = 1000
method = "MSD"
method_kwargs = {"time_limit": 120}  # 2 min per solve

## Load & Prepare Data via Folktables

In [4]:
def load_state_manual():
    """
    Attempts to download via folktables; if that fails, expects you to have
    manually downloaded & unzipped the two CSV zips into data_root/{year}/{horizon}/
    """
    ds = ACSDataSource(
        survey_year=survey_year,
        horizon=horizon,
        survey="person",
        root_dir=data_root,
    )
    try:
        # try folktables' automatic downloader
        raw = ds.get_data(states=[state1, state2], download=True)
    except Exception as e:
        print("\n⚠️  Automatic download failed:")
        print(f"    {e!r}\n")
        print("→ Please manually download these two files and unzip them under:")
        print(f"    {data_root}/{survey_year}/{horizon}/csv_p{state1.lower()}.zip")
        print(f"    {data_root}/{survey_year}/{horizon}/csv_p{state2.lower()}.zip")
        print("\nYou can get them from:")
        print(f"https://www2.census.gov/programs-surveys/acs/data/pums/{survey_year}/{horizon}/\n")
        # now try again, without download flag, so folktables will read from disk:
        raw = ds.get_data(states=[state1, state2], download=False)
    return raw