removes suppression logic

benhammondmusic · Aug 30, 2023 · 9425fe0 · 9425fe0
1 parent 33bfcad
commit 9425fe0
Showing 1 changed file with 0 additions and 25 deletions.
diff --git a/python/datasources/cdc_restricted_local.py b/python/datasources/cdc_restricted_local.py
@@ -126,14 +126,6 @@
     'race_and_age': ([RACE_COL, ETH_COL, AGE_COL], {**AGE_NAMES_MAPPING, **RACE_NAMES_MAPPING}),
 }
 
-# States that we have decided to suppress different kinds of data for, due to
-# very incomplete data. Note that states that have all data suppressed will
-# have case, hospitalization, and death data suppressed.
-# See https://github.com/SatcherInstitute/health-equity-tracker/issues/617.
-ALL_DATA_SUPPRESSION_STATES = ("MP", "MS", "WV")
-HOSP_DATA_SUPPRESSION_STATES = ("HI", "NE", "RI", "SD")
-DEATH_DATA_SUPPRESSION_STATES = ("HI", "NE", "SD", "DE")
-
 
 def combine_race_eth(df):
     """Combines the race and ethnicity fields into the legacy race/ethnicity category.
@@ -326,9 +318,6 @@ def process_data(dir, files):
             df[COUNTY_FIPS_COL] = df[COUNTY_FIPS_COL].map(
                 lambda x: x.zfill(5) if len(x) > 0 else x)
 
-            # Remove records from states where we want to suppress all data.
-            df = df[~df[STATE_COL].isin(ALL_DATA_SUPPRESSION_STATES)]
-
             # For each of ({state, county} x {race, sex, age}), we slice the
             # data to focus on that dimension and aggregate.
             for (geo, demo), _ in all_dfs.items():
@@ -367,20 +356,6 @@ def process_data(dir, files):
         # Standardize the column names and race/age/sex values.
         all_dfs[key] = all_dfs[key].rename(columns=COL_NAME_MAPPING)
 
-        # Set hospitalization and death data for states we want to suppress to
-        # an empty string, indicating missing data.
-        rows_to_modify = all_dfs[key][std_col.STATE_POSTAL_COL].isin(
-            HOSP_DATA_SUPPRESSION_STATES)
-        all_dfs[key].loc[rows_to_modify, std_col.COVID_HOSP_Y] = ""
-        all_dfs[key].loc[rows_to_modify, std_col.COVID_HOSP_N] = ""
-        all_dfs[key].loc[rows_to_modify, std_col.COVID_HOSP_UNKNOWN] = ""
-
-        rows_to_modify = all_dfs[key][std_col.STATE_POSTAL_COL].isin(
-            DEATH_DATA_SUPPRESSION_STATES)
-        all_dfs[key].loc[rows_to_modify, std_col.COVID_DEATH_Y] = ""
-        all_dfs[key].loc[rows_to_modify, std_col.COVID_DEATH_N] = ""
-        all_dfs[key].loc[rows_to_modify, std_col.COVID_DEATH_UNKNOWN] = ""
-
         # Standardize all None/NaNs in the data to an empty string, and convert
         # everything to string before returning & writing to CSV.
         all_dfs[key] = all_dfs[key].fillna("").astype(str)