# Create a table converting state codes to HHS region numbers
Manual data processing.

In [8]:
import pandas as pd

In [13]:
# Open verbatim text from https://www.hhs.gov/about/agencies/iea/regional-offices/index.html
with open("/home/dskel/Code/HHS-Region-Numbers-by-State/hhs.txt") as f:
    temp = f.readlines()

# Process text into state, region number pairs 
temp = [int(s[7:9]) if "Region" in s else s for s in temp]
temp = [s.strip().split(", ") if type(s) == str else s for s in temp]
temp = {temp[i]:temp[i+1] for i in range(0, len(temp), 2)}
temp = {key:[s.lstrip(' and') for s in temp[key]] for key in temp}
temp = [[(key, x) for x in temp[key]] for key in temp]
hhs_state_pairs = [x for y in temp for x in y]

# Make naming adjustments
hhs_state_pairs.remove((2, "the Virgin Islands"))
hhs_state_pairs.append((2, "U.S. Virgin Islands"))
hhs_state_pairs.remove((9, "Commonwealth of the Northern Mariana Islands"))
hhs_state_pairs.append((9, "Northern Mariana Islands"))

In [14]:
hhs_state_pairs

[(1, 'Connecticut'),
 (1, 'Maine'),
 (1, 'Massachusetts'),
 (1, 'New Hampshire'),
 (1, 'Rhode Island'),
 (1, 'Vermont'),
 (2, 'New Jersey'),
 (2, 'New York'),
 (2, 'Puerto Rico'),
 (3, 'Delaware'),
 (3, 'District of Columbia'),
 (3, 'Maryland'),
 (3, 'Pennsylvania'),
 (3, 'Virginia'),
 (3, 'West Virginia'),
 (4, 'Alabama'),
 (4, 'Florida'),
 (4, 'Georgia'),
 (4, 'Kentucky'),
 (4, 'Mississippi'),
 (4, 'North Carolina'),
 (4, 'South Carolina'),
 (4, 'Tennessee'),
 (5, 'Illinois'),
 (5, 'Indiana'),
 (5, 'Michigan'),
 (5, 'Minnesota'),
 (5, 'Ohio'),
 (5, 'Wisconsin'),
 (6, 'Arkansas'),
 (6, 'Louisiana'),
 (6, 'New Mexico'),
 (6, 'Oklahoma'),
 (6, 'Texas'),
 (7, 'Iowa'),
 (7, 'Kansas'),
 (7, 'Missouri'),
 (7, 'Nebraska'),
 (8, 'Colorado'),
 (8, 'Montana'),
 (8, 'North Dakota'),
 (8, 'South Dakota'),
 (8, 'Utah'),
 (8, 'Wyoming'),
 (9, 'Arizona'),
 (9, 'California'),
 (9, 'Hawaii'),
 (9, 'Nevada'),
 (9, 'American Samoa'),
 (9, 'Federated States of Micronesia'),
 (9, 'Guam'),
 (9, 'Marshall I

In [15]:
hhs_df = pd.DataFrame(hhs_state_pairs, columns=["hhs_region_number", "state_name"])
hhs_df['hhs_region_number'] = hhs_df['hhs_region_number'].astype(str)

Open state codes crosswalk from the US Census Bureau.

In [16]:
states_df = (
    pd.read_csv("http://www2.census.gov/geo/docs/reference/state.txt?#", delimiter="|")
    .drop(columns="STATENS")
    .rename(
        columns={
            "STATE": "state_code",
            "STUSAB": "state_id",
            "STATE_NAME": "state_name",
        }
    )
)
states_df["state_code"] = states_df["state_code"].astype(str).str.zfill(2)
states_df["state_id"] = states_df["state_id"].astype(str).str.lower()

# Add a few extra US state territories manually
states_df = pd.concat(
    (
        states_df,
        pd.DataFrame(
            [
                {
                    "state_code": 70,
                    "state_name": "Republic of Palau",
                    "state_id": "pw",
                },
                {
                    "state_code": 68,
                    "state_name": "Marshall Islands",
                    "state_id": "mh",
                },
                {
                    "state_code": 64,
                    "state_name": "Federated States of Micronesia",
                    "state_id": "fm",
                },
            ]
        ),
    )
)


In [17]:
states_df

Unnamed: 0,state_code,state_id,state_name
0,1,al,Alabama
1,2,ak,Alaska
2,4,az,Arizona
3,5,ar,Arkansas
4,6,ca,California
5,8,co,Colorado
6,9,ct,Connecticut
7,10,de,Delaware
8,11,dc,District of Columbia
9,12,fl,Florida


In [20]:
states_df.merge(hhs_df, on="state_name", how="left").dropna().to_csv(
    "/home/dskel/Code/HHS-Region-Numbers-by-State/state_hhs_crosswalk.csv", index=False
)