We use the [US Bible Belt states](https://en.wikipedia.org/wiki/Bible_Belt) from [Folktables](https://github.com/socialfoundations/folktables)' ACSIncome prediction task to showcase the code of Lazzari, Alvarez, and Ruggieri (2022).

In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
from folktables import ACSDataSource, ACSIncome

# all US states
states = sorted(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 
                 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 
                 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 
                 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 
                 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'])

# state codes
state_codes = pd.read_csv(os.path.join('..', 'data', 'state_codes.csv'))

# bible belt states
bible_belt = ['AL', 'AK', 'GA', 'LA', 'MS', 'OK', 'TN', 'NC', 'SC']

# data folder
data_source = ACSDataSource(survey_year='2017', horizon='1-Year', survey='person', root_dir=os.path.join('..', 'data'))

In [None]:
state_codes.head(50)

In [None]:
for state in bible_belt:
    print(state)
    data = data_source.get_data(states=[state], download=True)
    features, labels, _ = ACSIncome.df_to_numpy(data)
    df = pd.DataFrame(features, columns=ACSIncome.features)
    df['Y'] = labels
    df['STATE'] = state
    print("{} features for {} individuals".format(df.shape[1]-1, df.shape[0]))
    df.to_csv(os.path.join('..', 'data', 'lar_{}_adult.csv'.format(state.lower())), sep='|', index=False)
    del data, features, labels, df
print('DONE')

In [None]:
# let's run the pipeline for Florida
fl_data = data_source.get_data(states=['FL'], download=True)
fl_features, fl_labels, _ = ACSIncome.df_to_numpy(fl_data)

df = pd.DataFrame(fl_features, columns=ACSIncome.features)
df['Y'] = fl_labels

print("{} features for {} individuals".format(df.shape[1]-1, df.shape[0]))
df.head(5)

# save for run_experiments
df.to_csv(os.path.join('..', 'data', 'lar_fl_adult.csv'), sep='|', index=False)