Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add popsafe-fips to geomapper #1787

Merged
merged 20 commits into from
Mar 14, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
4d04b19
add popsafe county -> fips and state data files
nmdefries Feb 16, 2023
b3a03d4
construct class attributes using CROSSWALK keys
nmdefries Feb 16, 2023
0284c28
support popsafe-fips in geomap.py; get_geos_within to take "fips"
nmdefries Feb 16, 2023
1f8a21b
combine fips and popsafe-fips get_geo_within logic; comments
nmdefries Feb 20, 2023
fb82be4
code to generate popsafe tables
nmdefries Feb 20, 2023
f284b91
document group assignment procedure
nmdefries Feb 21, 2023
ba62241
add lowpop county group mapping file
nmdefries Feb 21, 2023
a1c491c
test popsafe-fips
nmdefries Feb 20, 2023
f74aefd
linting
nmdefries Feb 21, 2023
1a1bba7
change code name
nmdefries Mar 2, 2023
bb7df1a
generate local county mapping from CHNG spreadsheet
nmdefries Mar 3, 2023
be8da31
remove "population" wording in relation to chng-fips
nmdefries Mar 3, 2023
54faa45
rename local county groups csv to not mention population
nmdefries Mar 3, 2023
18a08a5
drop split-out county fields; don't always exist in input
nmdefries Mar 13, 2023
0ef6b13
split out single counties from fips_list field
nmdefries Mar 13, 2023
f9ed4d4
raise error if new groups seen
nmdefries Mar 13, 2023
4fa39b7
state_fips and group are already str; don't need to cast
nmdefries Mar 13, 2023
4b9e836
Merge branch 'ndefries/geomapper/popsafe-county-level' into ndefries/…
nmdefries Mar 13, 2023
dc02bb5
make error more actionable
nmdefries Mar 13, 2023
f785b22
Merge pull request #1803 from cmu-delphi/ndefries/geomapper/chng-fips…
korlaxxalrok Mar 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
402 changes: 402 additions & 0 deletions _delphi_utils_python/data_proc/geomap/chng_county_groups.csv

Large diffs are not rendered by default.

75 changes: 75 additions & 0 deletions _delphi_utils_python/data_proc/geomap/geo_data_proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,18 @@
FIPS_PUERTO_RICO_POPULATION_URL = "https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt?"
STATE_HHS_FILE = "hhs.txt"
ZIP_POP_MISSING_FILE = "zip_pop_filling.csv"
CHNG_COUNTY_GROUPS_FILE = "chng_county_groups.csv"

# Out files
FIPS_STATE_OUT_FILENAME = "fips_state_table.csv"
FIPS_MSA_OUT_FILENAME = "fips_msa_table.csv"
FIPS_HRR_OUT_FILENAME = "fips_hrr_table.csv"
FIPS_ZIP_OUT_FILENAME = "fips_zip_table.csv"
FIPS_HHS_FILENAME = "fips_hhs_table.csv"
FIPS_CHNGFIPS_OUT_FILENAME = "fips_chng-fips_table.csv"
FIPS_POPULATION_OUT_FILENAME = "fips_pop.csv"

CHNGFIPS_STATE_OUT_FILENAME = "chng-fips_state_table.csv"
ZIP_HSA_OUT_FILENAME = "zip_hsa_table.csv"
ZIP_HRR_OUT_FILENAME = "zip_hrr_table.csv"
ZIP_FIPS_OUT_FILENAME = "zip_fips_table.csv"
Expand Down Expand Up @@ -475,6 +478,76 @@ def derive_zip_hhs_crosswalk():
zip_state.sort_values(["zip", "hhs"]).to_csv(join(OUTPUT_DIR, ZIP_HHS_FILENAME), index=False)


def derive_fips_chngfips_crosswalk():
"""Build a crosswalk table for FIPS to CHNG FIPS."""
if not isfile(join(OUTPUT_DIR, FIPS_STATE_OUT_FILENAME)):
derive_fips_state_crosswalk()

# County mapping file is derived from
# https://docs.google.com/spreadsheets/d/1PEce4CjjHbRM1Z5xEMNI6Xsq_b2kkCh0/edit#gid=871427657.
nmdefries marked this conversation as resolved.
Show resolved Hide resolved
# We assign an incrementing integer to be the group id of each county
# grouping within the given state via:
#
# county_groups["group"] = (county_groups.groupby("state_fips").cumcount() + 1).astype("string")
county_groups = pd.read_csv(CHNG_COUNTY_GROUPS_FILE, dtype="string", index_col=False
).drop(columns = "fips_list")

# Change to long format.
county_groups = pd.melt(
county_groups,
id_vars = ["state_fips", "group"],
var_name = "county_num",
value_name = "fips"
).drop(
columns="county_num"
).dropna()

county_groups["state_fips"] = county_groups["state_fips"].str.zfill(2).astype("string")
nmdefries marked this conversation as resolved.
Show resolved Hide resolved
county_groups["group"] = county_groups["group"].str.zfill(2).astype("string")
county_groups["fips"] = county_groups["fips"].str.zfill(5).astype("string")
# Combine state codes and group ids into a single FIPS code.
county_groups["chng-fips"] = county_groups["state_fips"] + "g" + county_groups["group"]

county_groups = county_groups[["fips", "chng-fips"]]
fips_to_state = pd.read_csv(join(OUTPUT_DIR, FIPS_STATE_OUT_FILENAME), dtype="string", index_col=False)

# Get all the fips that aren't included in the chng groupings.
extra_fips_list = list(set(fips_to_state.fips) - set(county_groups.fips))
# Normal fips codes and CHNG fips codes are the same for ungrouped counties.
extra_fips_df = pd.DataFrame({"fips" : extra_fips_list, "chng-fips" : extra_fips_list}, dtype="string")

# Combine grouped and ungrouped counties.
pd.concat(
[county_groups, extra_fips_df]
).sort_values(
["fips", "chng-fips"]
).to_csv(
join(OUTPUT_DIR, FIPS_CHNGFIPS_OUT_FILENAME), index=False
)


def derive_chngfips_state_crosswalk():
"""Build a crosswalk table for FIPS to CHNG FIPS."""
if not isfile(join(OUTPUT_DIR, FIPS_STATE_OUT_FILENAME)):
derive_fips_state_crosswalk()

if not isfile(join(OUTPUT_DIR, FIPS_CHNGFIPS_OUT_FILENAME)):
derive_fips_chngfips_crosswalk()

fips_to_group = pd.read_csv(join(OUTPUT_DIR, FIPS_CHNGFIPS_OUT_FILENAME), dtype="string", index_col=False)
fips_to_state = pd.read_csv(join(OUTPUT_DIR, FIPS_STATE_OUT_FILENAME), dtype="string", index_col=False)

group_to_state = fips_to_group.join(
fips_to_state.set_index("fips"), on="fips", how="left"
).drop(
columns = "fips"
).drop_duplicates(
).sort_values(
["chng-fips", "state_code"]
)
group_to_state.to_csv(join(OUTPUT_DIR, CHNGFIPS_STATE_OUT_FILENAME), index=False)


def clear_dir(dir_path: str):
for fname in listdir(dir_path):
remove(join(dir_path, fname))
Expand All @@ -501,3 +574,5 @@ def clear_dir(dir_path: str):
derive_zip_population_table()
derive_fips_hhs_crosswalk()
derive_zip_hhs_crosswalk()
derive_fips_chngfips_crosswalk()
derive_chngfips_state_crosswalk()
Loading