diff --git a/safegraph/DETAILS.md b/safegraph/DETAILS.md index cf8e0dc1d..88331a99e 100644 --- a/safegraph/DETAILS.md +++ b/safegraph/DETAILS.md @@ -2,13 +2,15 @@ We import census block group-level raw mobility indicators from Safegraph, calculate functions of the raw data, and then aggregate the data to the -county and state levels. MSA and HRR not yet implemented. +county, state, HHS, and nation levels. ## Geographical Levels * `county`: reported using zero-padded FIPS codes. The FIPS codes are obtained by zero-padding the census block group codes and taking the first five digits, which are by construction the corresponding county FIPS code. +* `hhs`: reported using HHS region number * `state`: reported using two-letter postal code +* `nation`: reported using two-letter nation code. Just 'us' for now ## Metrics * `completely_home_prop`, defined as: diff --git a/safegraph/delphi_safegraph/constants.py b/safegraph/delphi_safegraph/constants.py index e6679df52..8d1e80f59 100644 --- a/safegraph/delphi_safegraph/constants.py +++ b/safegraph/delphi_safegraph/constants.py @@ -16,5 +16,7 @@ 'county', 'state', 'msa', - 'hrr' + 'hrr', + 'hhs', + 'nation' ] diff --git a/safegraph/delphi_safegraph/process.py b/safegraph/delphi_safegraph/process.py index 64e617e20..6dd7181e9 100644 --- a/safegraph/delphi_safegraph/process.py +++ b/safegraph/delphi_safegraph/process.py @@ -134,20 +134,14 @@ def aggregate(df, signal_names, geo_resolution='county'): new_code='state_id', new_col='geo_id', dropna=False) - elif geo_resolution == 'msa': + elif geo_resolution in ['msa', 'nation', 'hrr', 'hhs']: geo_transformed_df = gmpr.add_geocode(df, from_col='county_fips', from_code='fips', - new_code='msa', - new_col='geo_id', - dropna=False) - elif geo_resolution == 'hrr': - geo_transformed_df = gmpr.add_geocode(df, - from_col='county_fips', - from_code='fips', - new_code='hrr', + new_code=geo_resolution, new_col='geo_id', dropna=False) + else: raise ValueError( f'`geo_resolution` must be one of {GEO_RESOLUTIONS}.') diff --git a/safegraph/tests/test_process.py b/safegraph/tests/test_process.py index 57ad8b7a4..25eca8722 100644 --- a/safegraph/tests/test_process.py +++ b/safegraph/tests/test_process.py @@ -57,8 +57,7 @@ def test_aggregate_state(self): assert df.shape == (52, 17) def test_aggregate_msa(self): - """Tests that aggregation at the state level creates non-zero-valued - signals.""" + """Tests that aggregation at the state level creates non-zero-valued signals.""" cbg_df = construct_signals(pd.read_csv('raw_data/sample_raw_data.csv'), SIGNALS) df = aggregate(cbg_df, SIGNALS, 'msa') @@ -69,8 +68,7 @@ def test_aggregate_msa(self): assert df.shape == (372, 17) def test_aggregate_hrr(self): - """Tests that aggregation at the state level creates non-zero-valued - signals.""" + """Tests that aggregation at the state level creates non-zero-valued signals.""" cbg_df = construct_signals(pd.read_csv('raw_data/sample_raw_data.csv'), SIGNALS) df = aggregate(cbg_df, SIGNALS, 'hrr') @@ -80,6 +78,28 @@ def test_aggregate_hrr(self): assert np.all(x[~np.isnan(x)] >= 0) assert df.shape == (306, 17) + def test_aggregate_nation(self): + """Tests that aggregation at the state level creates non-zero-valued signals.""" + cbg_df = construct_signals(pd.read_csv('raw_data/sample_raw_data.csv'), + SIGNALS) + df = aggregate(cbg_df, SIGNALS, 'nation') + + assert np.all(df[f'{SIGNALS[0]}_n'].values > 0) + x = df[f'{SIGNALS[0]}_se'].values + assert np.all(x[~np.isnan(x)] >= 0) + assert df.shape == (1, 17) + + def test_aggregate_hhs(self): + """Tests that aggregation at the state level creates non-zero-valued signals.""" + cbg_df = construct_signals(pd.read_csv('raw_data/sample_raw_data.csv'), + SIGNALS) + df = aggregate(cbg_df, SIGNALS, 'hhs') + + assert np.all(df[f'{SIGNALS[0]}_n'].values > 0) + x = df[f'{SIGNALS[0]}_se'].values + assert np.all(x[~np.isnan(x)] >= 0) + assert df.shape == (10, 17) + def test_files_in_past_week(self): """Tests that `files_in_past_week()` finds the file names corresponding to the previous 6 days."""