Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nssp/delphi_nssp/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"state",
"county",
"hhs",
"hsa-nci",
]

SIGNALS_MAP = {
Expand Down
4 changes: 2 additions & 2 deletions nssp/delphi_nssp/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def pull_with_socrata_api(socrata_token: str, dataset_id: str):
-------
list of dictionaries, each representing a row in the dataset
"""
client = Socrata("data.cdc.gov", socrata_token)
client = Socrata("data.cdc.gov", socrata_token, timeout=50) # set timeout to avoid read timed out error
results = []
offset = 0
limit = 50000 # maximum limit allowed by SODA 2.0
Expand Down Expand Up @@ -177,5 +177,5 @@ def pull_nssp_data(
# Format county fips to all be 5 digits with leading zeros
df_ervisits["fips"] = df_ervisits["fips"].apply(lambda x: str(x).zfill(5) if str(x) != "0" else "0")

keep_columns = ["timestamp", "geography", "county", "fips"]
keep_columns = ["timestamp", "geography", "county", "fips", "hsa_nci_id"]
return df_ervisits[SIGNALS + keep_columns]
9 changes: 9 additions & 0 deletions nssp/delphi_nssp/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def add_needed_columns(df, col_names=None):
df = add_default_nancodes(df)
return df


def logging(start_time, run_stats, logger):
"""Boilerplate making logs."""
elapsed_time_in_seconds = round(time.time() - start_time, 2)
Expand Down Expand Up @@ -137,6 +138,14 @@ def run_module(params, logger=None):
df = geo_mapper.add_geocode(df, "state_code", "hhs", from_col="state_code", new_col="geo_id")
df = geo_mapper.aggregate_by_weighted_sum(df, "geo_id", "val", "timestamp", "population")
df = df.rename(columns={"weighted_val": "val"})
elif geo == "hsa-nci":
df = df[["hsa_nci_id", "val", "timestamp"]]
df = df[df["hsa_nci_id"] != "All"]
# We use drop_duplicates below just to pick a representative value,
# since all the values in a given HSA-NCI level are the same
# (the data is reported at the HSA-NCI level).
df.drop_duplicates(["hsa_nci_id", "timestamp", "val"], inplace=True)
df = df.rename(columns={"hsa_nci_id": "geo_id"})
else:
df = df[df["county"] != "All"]
df["geo_id"] = df["fips"]
Expand Down
2 changes: 1 addition & 1 deletion nssp/tests/test_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_normal_pull_nssp_data(self, mock_socrata, params, caplog):
pd.testing.assert_frame_equal(expected_data, actual_data)

# Check that Socrata client was initialized with correct arguments
mock_socrata.assert_called_once_with("data.cdc.gov", test_token)
mock_socrata.assert_called_once_with("data.cdc.gov", test_token, timeout=50)

# Check that get method was called with correct arguments
mock_client.get.assert_any_call("rdmq-nq56", limit=50000, offset=0)
Expand Down