diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py new file mode 100644 index 000000000..4c4ac93eb --- /dev/null +++ b/src/acquisition/flusurv/api.py @@ -0,0 +1,406 @@ +""" +=============== +=== Purpose === +=============== + +Fetches FluSurv-NET data (flu hospitalization rates) from CDC. Unlike the other +CDC-hosted datasets (e.g. FluView), FluSurv is not available as a direct +download. This program emulates web browser requests for the web app and +extracts data of interest from the JSON response. + +For unknown reasons, the server appears to provide two separate rates for any +given location, epiweek, and age group. These rates are usually identical--but +not always. When two given rates differ, the first is kept. This appears to be +the behavior of the web app, at the following location: + - https://gis.cdc.gov/GRASP/Fluview/FluView3References/Main/FluView3.js:859 + +See also: + - flusurv_update.py + - https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html + - https://wwwnc.cdc.gov/eid/article/21/9/14-1912_article + - Chaves, S., Lynfield, R., Lindegren, M., Bresee, J., & Finelli, L. (2015). + The US Influenza Hospitalization Surveillance Network. Emerging Infectious + Diseases, 21(9), 1543-1550. https://dx.doi.org/10.3201/eid2109.141912. + + +================= +=== Changelog === +================= + +2017-05-22 + * rewrite for new data source +2017-02-17 + * handle discrepancies by prefering more recent values +2017-02-03 + + initial version +""" + +# standard library +from collections import defaultdict +from datetime import datetime +import json +import time +from warnings import warn + +# third party +import requests + +# first party +from delphi.utils.epidate import EpiDate +from delphi.utils.epiweek import delta_epiweeks +from .constants import (MAP_REGION_NAMES_TO_ABBR, MAP_ENTIRE_NETWORK_NAMES, + SEX_GROUPS) + + +def fetch_json(path, payload, call_count=1, requests_impl=requests): + """Send a request to the server and return the parsed JSON response.""" + + # it's polite to self-identify this "bot" + DELPHI_URL = "https://delphi.cmu.edu/index.html" + USER_AGENT = f"Mozilla/5.0 (compatible; delphibot/1.0; +{DELPHI_URL})" + + # the FluSurv AMF server + flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path + + # request headers + headers = { + "Accept-Encoding": "gzip", + "User-Agent": USER_AGENT, + } + if payload is not None: + headers["Content-Type"] = "application/json;charset=UTF-8" + + # send the request and read the response + if payload is None: + method = requests_impl.get + data = None + else: + method = requests_impl.post + data = json.dumps(payload) + resp = method(flusurv_url, headers=headers, data=data) + + # check the HTTP status code + if resp.status_code == 500 and call_count <= 2: + # the server often fails with this status, so wait and retry + delay = 10 * call_count + print(f"got status {int(resp.status_code)}, will retry in {int(delay)} sec...") + time.sleep(delay) + return fetch_json(path, payload, call_count=call_count + 1) + elif resp.status_code != 200: + raise Exception(["status code != 200", resp.status_code]) + + # check response mime type + if "application/json" not in resp.headers.get("Content-Type", ""): + raise Exception("response is not json") + + # return the decoded json object + return resp.json() + + +def mmwrid_to_epiweek(mmwrid): + """Convert a CDC week index into an epiweek.""" + + # Add the difference in IDs, which are sequential, to a reference + # epiweek, which is 2003w40 in this case. + epiweek_200340 = EpiDate(2003, 9, 28) + mmwrid_200340 = 2179 + return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() + + +class FlusurvMetadata: + def __init__(self, max_age_weeks): + self.metadata = self._fetch_flusurv_metadata() + + self.issue = self._get_current_issue() + self.max_age_weeks = max_age_weeks + self.seasonids = self._get_recent_seasonids() + + self.location_to_code = self._make_location_to_code_map() + self.locations = self.location_to_code.keys() + + self.id_to_group = self._make_id_group_map() + self.id_to_season = self._make_id_season_map() + + def _fetch_flusurv_metadata(self): + """Return FluSurv JSON metadata object.""" + return fetch_json( + "PostPhase03DataTool", + {"appversion": "Public", "key": "", "injson": []} + ) + + def _location_name_to_abbr(self, geo, network): + """Find short geo name corresponding to a geo and network""" + if geo == "Entire Network": + return MAP_ENTIRE_NETWORK_NAMES[network] + else: + return MAP_REGION_NAMES_TO_ABBR[geo] + + def _make_location_to_code_map(self): + """Create a map for all currently available FluSurv locations from names to codes""" + location_to_code = dict() + for location in self.metadata["catchments"]: + # "area" is the long-form region (California, etc), and "name" is + # the network/data source type (IHSP, EIP, etc) + location_name = self._location_name_to_abbr(location["area"], location["name"]) + if location_name in location_to_code.keys(): + raise Exception( + f"catchment {location_name} already seen, but " + + "we expect catchments to be unique" + ) + + location_to_code[location_name] = ( + int(location["networkid"]), int(location["catchmentid"]) + ) + return location_to_code + + def _get_current_issue(self): + """ + Extract the current issue from the FluSurv metadata result. + + Args: + metadata: dictionary representing a JSON response from the FluSurv API + """ + # extract + date = datetime.strptime(self.metadata["loaddatetime"], "%b %d, %Y") + + # convert and return + return EpiDate(date.year, date.month, date.day).get_ew() + + def _get_recent_seasonids(self): + # Ignore seasons with all dates older than one year + seasonids = { + season_blob["seasonid"] for season_blob in self.metadata["seasons"] + if delta_epiweeks(mmwrid_to_epiweek(season_blob["endweek"]), self.issue) < self.max_age_weeks + } + + return seasonids + + def _make_id_group_map(self): + """Create a map from valueid to strata description""" + id_to_label = defaultdict(lambda: defaultdict(lambda: None)) + for group in self.metadata["master_lookup"]: + # Skip "overall" group + if group["Variable"] is None: + continue + clean_group_label = group["Label"].replace( + " ", "" + ).replace( + "/", "" + ).replace( + "-", "t" + ).replace( + "yr", "" + ).replace( + "<", "lt" # less than + ).replace( + ">=", "gte" # greater or equal to + ).lower() + + if clean_group_label == "hispaniclatino": + clean_group_label = "hisp" + elif clean_group_label == "asianpacificislander": + clean_group_label = "asian" + elif clean_group_label == "americanindianalaskanative": + clean_group_label = "natamer" + + id_to_label[group["Variable"]][group["valueid"]] = clean_group_label + + return id_to_label + + def _make_id_season_map(self): + """Create a map from seasonid to season description, in the format "YYYY-YY" """ + id_to_label = defaultdict(lambda: defaultdict(lambda: None)) + for season in self.metadata["seasons"]: + id_to_label[season["seasonid"]] = season["label"].strip() + + return id_to_label + + +class FlusurvLocationFetcher: + def __init__(self, max_age_weeks): + self.metadata = FlusurvMetadata(max_age_weeks) + + def get_data(self, location): + """ + Fetch and parse flu data for a given location. + + This method performs the following operations: + - fetch location-specific FluSurv data from CDC API + - extracts and returns hospitalization rates for each epiweek + """ + # fetch + print("[fetching flusurv data...]") + data_in = self._fetch_flusurv_location(location) + + # extract + print("[reformatting flusurv result...]") + data_out = self._add_sex_breakdowns_ut( + self._group_by_epiweek(data_in), location + ) + + # return + print(f"[successfully fetched data for {location}]") + return data_out + + def _fetch_flusurv_location(self, location): + """Return FluSurv JSON object for a given location.""" + location_code = self.metadata.location_to_code[location] + + result = fetch_json( + "PostPhase03DataTool", + { + "appversion": "Public", + "key": "getdata", + "injson": [ + { + "networkid": location_code[0], + "catchmentid": location_code[1], + "seasonid": elem, + } for elem in self.metadata.seasonids], + }, + ) + + # If no data is returned (a given seasonid is not reported, + # location codes are invalid, etc), the API returns a JSON like: + # { + # 'default_data': { + # 'response': 'No Data' + # } + # } + # + # If data is returned, then data["default_data"] is a list + # and data["default_data"]["response"] doesn't exist. + if (len(result["default_data"]) == 0 or + ( + isinstance(result["default_data"], dict) and + "response" in result["default_data"].keys() and + result["default_data"]["response"] == "No Data" + )): + warn(f"warning: No data was returned from the API for {location}") + # Return empty obs with right format to avoid downstream errors + return {"default_data": []} + + return result + + def _group_by_epiweek(self, data): + """ + Convert default data for a single location into an epiweek-grouped dictionary + + Args: + data: The "default_data" element of a GRASP API response object, + as fetched with 'fetch_flusurv_location' or `fetch_flusurv_metadata` + metadata: The JSON result returned from `fetch_flusurv_metadata()` + containing mappings from strata IDs and season IDs to descriptions. + + Returns a dictionary of the format + { + : { + : { + : , + ... + : , + ... + } + ... + } + ... + } + """ + data = data["default_data"] + + # Create output object + # First layer of keys is epiweeks. Second layer of keys is groups + # (by id, not age in years, sex abbr, etc). + # + # If a top-level key doesn't already exist, create a new empty dict. + # If a secondary key doesn't already exist, create a new key with a + # default value of None. + data_out = defaultdict(lambda: defaultdict(lambda: None)) + + # data["default_data"] is a list of dictionaries, with the format + # [ + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + # ... + # ] + for obs in data: + epiweek = mmwrid_to_epiweek(obs["mmwrid"]) + groupname = self._groupid_to_name( + ageid = obs["ageid"], sexid = obs["sexid"], raceid = obs["raceid"] + ) + + # Set season description. This will be overwritten every iteration, + # but should always have the same value per epiweek group. + data_out[epiweek]["season"] = self.metadata.id_to_season[obs["seasonid"]] + + rate = obs["weeklyrate"] + prev_rate = data_out[epiweek][groupname] + if prev_rate is None: + # This is the first time to see a rate for this epiweek-group + # combo + data_out[epiweek][groupname] = rate + elif prev_rate != rate: + # Skip and warn; a different rate was already found for this + # epiweek-group combo + warn((f"warning: Multiple rates seen for {epiweek} " + f"{groupname}, but previous value {prev_rate} does not " + f"equal new value {rate}. Using the first value.")) + + print(f"found data for {len(data_out.keys())} epiweeks") + + return data_out + + def _groupid_to_name(self, ageid, sexid, raceid): + if ((ageid, sexid, raceid).count(0) < 2): + raise ValueError("Expect at least two of three group ids to be 0") + if (ageid, sexid, raceid).count(0) == 3: + group = "overall" + elif ageid != 0: + # The column names used in the DB for the original age groups + # are ordinal, such that: + # "rate_age_0" corresponds to age group 1, 0-4 yr + # "rate_age_1" corresponds to age group 2, 5-17 yr + # "rate_age_2" corresponds to age group 3, 18-49 yr + # "rate_age_3" corresponds to age group 4, 50-64 yr + # "rate_age_4" corresponds to age group 5, 65+ yr + # "rate_age_5" corresponds to age group 7, 65-74 yr + # "rate_age_6" corresponds to age group 8, 75-84 yr + # "rate_age_7" corresponds to age group 9, 85+ yr + # + # Group 6 was the "overall" category and not included in the + # ordinal naming scheme. Because of that, groups 1-5 have column + # ids equal to the ageid - 1; groups 7-9 have column ids equal + # to ageid - 2. + # + # Automatically map from ageids 1-9 to column ids to match + # the historical convention. + if ageid <= 5: + age_group = str(ageid - 1) + elif ageid == 6: + # Ageid of 6 used to be used for the "overall" category. + # Now "overall" is represented by a valueid of 0, and ageid of 6 + # is not used for any group. If we see an ageid of 6, something + # has gone wrong. + raise ValueError("Ageid cannot be 6; please check for changes in the API") + elif ageid <= 9: + age_group = str(ageid - 2) + else: + age_group = self.metadata.id_to_group["Age"][ageid] + group = "age_" + age_group + elif sexid != 0: + group = "sex_" + self.metadata.id_to_group["Sex"][sexid] + elif raceid != 0: + group = "race_" + self.metadata.id_to_group["Race"][raceid] + + return "rate_" + group + + def _add_sex_breakdowns_ut(self, data, location): + # UT doesn't have sex breakdowns available at least for 2022-23. Fill + # in to avoid downstream errors. + if location == "UT": + for epiweek in data.keys(): + for group in SEX_GROUPS: + if group not in data[epiweek].keys(): + data[epiweek][group] = None + return(data) diff --git a/src/acquisition/flusurv/constants.py b/src/acquisition/flusurv/constants.py new file mode 100644 index 000000000..ad31b1cce --- /dev/null +++ b/src/acquisition/flusurv/constants.py @@ -0,0 +1,110 @@ +from delphi_utils import GeoMapper + +""" +As of Sept 2023, for new data we expect to see these 23 groups, as described +in the top-level "master_lookup" element, below, of the new GRASP API +(https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response object. +See `./reference/new_grasp_result.json` for a full example response. + 'master_lookup' = [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True} + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ] + +All 23 strata are available starting epiweek 200935, inclusive. + +The previous version of the GRASP API +(https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp) used the following age-id +mapping, as described in the top-level "ages" element, below. See +`./reference/old_grasp_result.json` for a full example response. + 'ages' = [ + {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'}, + {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, + {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, + {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, + {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, + {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, + {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, + {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, + {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'} + ] + +In addition to the new age, race, and sex breakdowns, the group id for overall +reporting has changed from 6 to 0. Age ids 1-5 and 7-9 retain the same the +same meanings; age id 6 is not reported. +""" +HISTORICAL_GROUPS = ( + "rate_overall", + + "rate_age_0", + "rate_age_1", + "rate_age_2", + "rate_age_3", + "rate_age_4", + "rate_age_5", + "rate_age_6", + "rate_age_7", +) +NEW_AGE_GROUPS = ( + "rate_age_18t29", + "rate_age_30t39", + "rate_age_40t49", + "rate_age_5t11", + "rate_age_12t17", + "rate_age_lt18", + "rate_age_gte18", +) +RACE_GROUPS = ( + "rate_race_white", + "rate_race_black", + "rate_race_hisp", + "rate_race_asian", + "rate_race_natamer", +) +SEX_GROUPS = ( + "rate_sex_male", + "rate_sex_female" +) +EXPECTED_GROUPS = HISTORICAL_GROUPS + NEW_AGE_GROUPS + RACE_GROUPS + SEX_GROUPS + + +MAX_AGE_TO_CONSIDER_WEEKS = 52 + + +gmpr = GeoMapper() +map_state_names = gmpr.get_crosswalk("state", "state") +map_state_names = map_state_names.to_dict(orient = "records") +map_state_names = {elem["state_name"]: elem["state_id"].upper() for elem in map_state_names} + +map_nonstandard_names = {"New York - Albany": "NY_albany", "New York - Rochester": "NY_rochester"} + +MAP_REGION_NAMES_TO_ABBR = {**map_state_names, **map_nonstandard_names} + +MAP_ENTIRE_NETWORK_NAMES = { + "FluSurv-NET": "network_all", + "EIP": "network_eip", + "IHSP": "network_ihsp" +} diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py deleted file mode 100644 index 28105d933..000000000 --- a/src/acquisition/flusurv/flusurv.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -=============== -=== Purpose === -=============== - -Fetches FluSurv-NET data (flu hospitaliation rates) from CDC. Unlike the other -CDC-hosted datasets (e.g. FluView), FluSurv is not available as a direct -download. This program emulates web browser requests for the web app and -extracts data of interest from the JSON response. - -For unknown reasons, the server appears to provide two separate rates for any -given location, epiweek, and age group. These rates are usually identical--but -not always. When two given rates differ, the first is kept. This appears to be -the behavior of the web app, at the following location: - - https://gis.cdc.gov/GRASP/Fluview/FluView3References/Main/FluView3.js:859 - -See also: - - flusurv_update.py - - https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html - - https://wwwnc.cdc.gov/eid/article/21/9/14-1912_article - - Chaves, S., Lynfield, R., Lindegren, M., Bresee, J., & Finelli, L. (2015). - The US Influenza Hospitalization Surveillance Network. Emerging Infectious - Diseases, 21(9), 1543-1550. https://dx.doi.org/10.3201/eid2109.141912. - - -================= -=== Changelog === -================= - -2017-05-22 - * rewrite for new data source -2017-02-17 - * handle discrepancies by prefering more recent values -2017-02-03 - + initial version -""" - -# standard library -from datetime import datetime -import json -import time - -# third party -import requests - -# first party -from delphi.utils.epidate import EpiDate - - -# all currently available FluSurv locations and their associated codes -# the number pair represents NetworkID and CatchmentID -location_codes = { - "CA": (2, 1), - "CO": (2, 2), - "CT": (2, 3), - "GA": (2, 4), - "IA": (3, 5), - "ID": (3, 6), - "MD": (2, 7), - "MI": (3, 8), - "MN": (2, 9), - "NM": (2, 11), - "NY_albany": (2, 13), - "NY_rochester": (2, 14), - "OH": (3, 15), - "OK": (3, 16), - "OR": (2, 17), - "RI": (3, 18), - "SD": (3, 19), - "TN": (2, 20), - "UT": (3, 21), - "network_all": (1, 22), - "network_eip": (2, 22), - "network_ihsp": (3, 22), -} - - -def fetch_json(path, payload, call_count=1, requests_impl=requests): - """Send a request to the server and return the parsed JSON response.""" - - # it's polite to self-identify this "bot" - delphi_url = "https://delphi.cmu.edu/index.html" - user_agent = f"Mozilla/5.0 (compatible; delphibot/1.0; +{delphi_url})" - - # the FluSurv AMF server - flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path - - # request headers - headers = { - "Accept-Encoding": "gzip", - "User-Agent": user_agent, - } - if payload is not None: - headers["Content-Type"] = "application/json;charset=UTF-8" - - # send the request and read the response - if payload is None: - method = requests_impl.get - data = None - else: - method = requests_impl.post - data = json.dumps(payload) - resp = method(flusurv_url, headers=headers, data=data) - - # check the HTTP status code - if resp.status_code == 500 and call_count <= 2: - # the server often fails with this status, so wait and retry - delay = 10 * call_count - print(f"got status {int(resp.status_code)}, will retry in {int(delay)} sec...") - time.sleep(delay) - return fetch_json(path, payload, call_count=call_count + 1) - elif resp.status_code != 200: - raise Exception(["status code != 200", resp.status_code]) - - # check response mime type - if "application/json" not in resp.headers.get("Content-Type", ""): - raise Exception("response is not json") - - # return the decoded json object - return resp.json() - - -def fetch_flusurv_object(location_code): - """Return decoded FluSurv JSON object for the given location.""" - return fetch_json( - "PostPhase03GetData", - { - "appversion": "Public", - "networkid": location_code[0], - "cacthmentid": location_code[1], - }, - ) - - -def mmwrid_to_epiweek(mmwrid): - """Convert a CDC week index into an epiweek.""" - - # Add the difference in IDs, which are sequential, to a reference epiweek, - # which is 2003w40 in this case. - epiweek_200340 = EpiDate(2003, 9, 28) - mmwrid_200340 = 2179 - return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() - - -def extract_from_object(data_in): - """ - Given a FluSurv data object, return hospitaliation rates. - - The returned object is indexed first by epiweek, then by zero-indexed age - group. - """ - - # an object to hold the result - data_out = {} - - # iterate over all seasons and age groups - for obj in data_in["busdata"]["dataseries"]: - if obj["age"] in (10, 11, 12): - # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242): - # capture as-of-yet undefined age groups 10, 11, and 12 - continue - age_index = obj["age"] - 1 - # iterage over weeks - for mmwrid, _, _, rate in obj["data"]: - epiweek = mmwrid_to_epiweek(mmwrid) - if epiweek not in data_out: - # weekly rate of each age group - data_out[epiweek] = [None] * 9 - prev_rate = data_out[epiweek][age_index] - if prev_rate is None: - # this is the first time to see a rate for this epiweek/age - data_out[epiweek][age_index] = rate - elif prev_rate != rate: - # a different rate was already found for this epiweek/age - format_args = (epiweek, obj["age"], prev_rate, rate) - print("warning: %d %d %f != %f" % format_args) - - # sanity check the result - if len(data_out) == 0: - raise Exception("no data found") - - # print the result and return flu data - print(f"found data for {len(data_out)} weeks") - return data_out - - -def get_data(location_code): - """ - Fetch and parse flu data for the given location. - - This method performs the following operations: - - fetches FluSurv data from CDC - - extracts and returns hospitaliation rates - """ - - # fetch - print("[fetching flusurv data...]") - data_in = fetch_flusurv_object(location_code) - - # extract - print("[extracting values...]") - data_out = extract_from_object(data_in) - - # return - print("[scraped successfully]") - return data_out - - -def get_current_issue(): - """Scrape the current issue from the FluSurv main page.""" - - # fetch - data = fetch_json("GetPhase03InitApp?appVersion=Public", None) - - # extract - date = datetime.strptime(data["loaddatetime"], "%b %d, %Y") - - # convert and return - return EpiDate(date.year, date.month, date.day).get_ew() diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 1aa8e9885..b0073be4c 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -70,15 +70,17 @@ # standard library import argparse +from warnings import warn # third party import mysql.connector # first party -from delphi.epidata.acquisition.flusurv import flusurv import delphi.operations.secrets as secrets from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks +from .api import FlusurvLocationFetcher +from .constants import (MAX_AGE_TO_CONSIDER_WEEKS, EXPECTED_GROUPS) def get_rows(cur): @@ -90,17 +92,13 @@ def get_rows(cur): return num -def update(issue, location_name, test_mode=False): - """Fetch and store the currently avialble weekly FluSurv dataset.""" - - # fetch data - location_code = flusurv.location_codes[location_name] - print("fetching data for", location_name, location_code) - data = flusurv.get_data(location_code) +def update(fetcher, location, test_mode=False): + """Fetch and store the currently available weekly FluSurv dataset.""" + # Fetch location-specific data + data = fetcher.get_data(location) # metadata epiweeks = sorted(data.keys()) - location = location_name release_date = str(EpiDate.today()) # connect to the database @@ -111,40 +109,61 @@ def update(issue, location_name, test_mode=False): print(f"rows before: {int(rows1)}") # SQL for insert/update - sql = """ + nonrelease_fields = ("issue", "epiweek", "location", "lag", "season") + EXPECTED_GROUPS + other_field_names = ", ".join( + f"`{name}`" for name in nonrelease_fields + ) + other_field_values = ", ".join( + f"%({name})s" for name in nonrelease_fields + ) + # Updates on duplicate key only for release date + signal fields, not metadata. + other_field_coalesce = ", ".join( + f"`{name}` = coalesce(%({name})s, `{name}`)" for name in EXPECTED_GROUPS + ) + + sql = f""" INSERT INTO `flusurv` ( - `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`, - `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`, - `rate_age_5`, `rate_age_6`, `rate_age_7` + `release_date`, + {other_field_names} ) VALUES ( - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s + %(release_date)s, + {other_field_values} ) ON DUPLICATE KEY UPDATE - `release_date` = least(`release_date`, %s), - `rate_age_0` = coalesce(%s, `rate_age_0`), - `rate_age_1` = coalesce(%s, `rate_age_1`), - `rate_age_2` = coalesce(%s, `rate_age_2`), - `rate_age_3` = coalesce(%s, `rate_age_3`), - `rate_age_4` = coalesce(%s, `rate_age_4`), - `rate_overall` = coalesce(%s, `rate_overall`), - `rate_age_5` = coalesce(%s, `rate_age_5`), - `rate_age_6` = coalesce(%s, `rate_age_6`), - `rate_age_7` = coalesce(%s, `rate_age_7`) + `release_date` = least(`release_date`, %(release_date)s), + {other_field_coalesce} """ # insert/update each row of data (one per epiweek) for epiweek in epiweeks: - lag = delta_epiweeks(epiweek, issue) - if lag > 52: + lag = delta_epiweeks(epiweek, fetcher.metadata.issue) + if lag > MAX_AGE_TO_CONSIDER_WEEKS: # Ignore values older than one year, as (1) they are assumed not to # change, and (2) it would adversely affect database performance if all # values (including duplicates) were stored on each run. continue - args_meta = [release_date, issue, epiweek, location, lag] - args_insert = data[epiweek] - args_update = [release_date] + data[epiweek] - cur.execute(sql, tuple(args_meta + args_insert + args_update)) + + missing_expected_groups = EXPECTED_GROUPS - data[epiweek].keys() + # Remove the season description since we also store it in each epiweek obj + unexpected_groups = data[epiweek].keys() - EXPECTED_GROUPS - {"season"} + if len(missing_expected_groups) != 0: + raise Exception( + f"{location} {epiweek} data is missing group(s) {missing_expected_groups}" + ) + if len(unexpected_groups) != 0: + raise Exception( + f"{location} {epiweek} data includes new group(s) {unexpected_groups}" + ) + + args_meta = { + "release_date": release_date, + "issue": fetcher.metadata.issue, + "epiweek": epiweek, + "location": location, + "lag": lag + } + cur.execute(sql, {**args_meta, **data[epiweek]}) # commit and disconnect rows2 = get_rows(cur) @@ -175,18 +194,19 @@ def main(): # fmt: on args = parser.parse_args() - # scrape current issue from the main page - issue = flusurv.get_current_issue() - print(f"current issue: {int(issue)}") + fetcher = FlusurvLocationFetcher(MAX_AGE_TO_CONSIDER_WEEKS) + print(f"current issue: {int(fetcher.metadata.issue)}") # fetch flusurv data if args.location == "all": # all locations - for location in flusurv.location_codes.keys(): - update(issue, location, args.test) + for location in fetcher.metadata.locations: + update(fetcher, location, args.test) else: # single location - update(issue, args.location, args.test) + if (args.location not in fetcher.metadata.locations): + raise KeyError("Requested location {args.location} not available") + update(fetcher, args.location, args.test) if __name__ == "__main__": diff --git a/src/acquisition/flusurv/reference/new_grasp_location_result.json b/src/acquisition/flusurv/reference/new_grasp_location_result.json new file mode 100644 index 000000000..fe4fc09d9 --- /dev/null +++ b/src/acquisition/flusurv/reference/new_grasp_location_result.json @@ -0,0 +1,34 @@ +### New location-specific API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool?appVersion=Public +# with payload +# {"appversion": "Public", "key": "getdata", "injson": [ +# {"networkid": 1, "catchmentid": 22, "seasonid": 30 }, +# {"networkid": 1, "catchmentid": 22, "seasonid": 49} +# ]} +# The "seasonid" parameter is required. To fetch all historical data, +# each season must be listed separately. Seasonids that don't exist don't +# cause errors, they are simply ignored. +# +# However, if no listed seasonids exist for the specified location, then +# the returned JSON is: +# { +# 'default_data': { +# 'response': 'No Data' +# } +# } +{ + # Data!! Format is list of dicts. Each obs' fields are + # labelled, so we could easily convert this to dataframe. + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ... + ] +} diff --git a/src/acquisition/flusurv/reference/new_grasp_result.json b/src/acquisition/flusurv/reference/new_grasp_result.json new file mode 100644 index 000000000..dca59c6f0 --- /dev/null +++ b/src/acquisition/flusurv/reference/new_grasp_result.json @@ -0,0 +1,167 @@ +### New API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool?appVersion=Public +# with payload +# {"appversion": "Public", "key": "", "injson": []} +{ + # Last data update date + 'loaddatetime': 'Aug 26, 2023', + # Text appearing on dashboard + 'app_text': [ + { + 'description': 'SplashScreenDisclaimerText', + 'text': 'The Influenza Hospitalization Surveillance Network (FluSurv-NET) conducts population-based surveillance for laboratory-confirmed influenza-associated hospitalizations in children (persons younger than 18 years) and adults. The current network covers over 70 counties in the 10 Emerging Infection..." + } + ], + # IDs for different data sources/networks + 'networks': [ + {'networkid': 1, 'name': 'FluSurv-NET'}, + {'networkid': 2, 'name': 'EIP'}, + {'networkid': 3, 'name': 'IHSP'} + ], + # IDs (network ID + catchment ID) specifying geos and data sources available + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + # Season labels, descriptions, IDs, and date ranges + 'seasons': [ + {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2012-13', 'enabled': True, 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2015-16', 'enabled': True, 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'IncludeWeeklyRatesAndStrata': True}, + ... + ], + # Descriptions of data repporting groups (age, race, sex, and overall). Careful, some of these overlap. + # some of "parentidoverlap. + 'master_lookup': [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ], + # Colors by ID, presumably used in dashboard. Appears to be only some of the age groups. Also doesn't agree with "master_lookup" above. + 'colors_qualitative': [ + {'id': 1, 'hex': '#a6cee3'}, + {'id': 2, 'hex': '#1f78b4'}, + {'id': 3, 'hex': '#b2df8a'}, + {'id': 4, 'hex': '#33a02c'}, + {'id': 5, 'hex': '#fb9a99'}, + {'id': 6, 'hex': '#e31a1c'}, + {'id': 7, 'hex': '#fdbf6f'}, + {'id': 8, 'hex': '#ff7f00'}, + {'id': 9, 'hex': '#cab2d6'}, + {'id': 10, 'hex': '#6a3d9a'}, + {'id': 12, 'hex': '#b15928'} + ], + # List of ageids available for each network+catchement combo + 'age_catchment_lookup': [ + {'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 1, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 2, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 3, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 4, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 5, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 7, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 8, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 9, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 10, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 11, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 12, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 21, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 22, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 97, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 98, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 99, 'NetworkId': 1, 'CatchmentID': 22}, + {'NetworkId': 2, 'CatchmentID': 1}, + {'ageID': 1, 'NetworkId': 2, 'CatchmentID': 1}, + {'ageID': 2, 'NetworkId': 2, 'CatchmentID': 1}, + ... + ], + # List of seasonids available for each network+catchement combo + 'season_catchment_lookup': [ + {'seasonid': 49, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 50, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 51, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 52, 'NetworkId': 1, 'CatchmentID': 22}, + ... + {'raceid': 4, 'NetworkId': 2, 'CatchmentID': 14}, + {'raceid': 5, 'NetworkId': 2, 'CatchmentID': 14}, + {'raceid': 99, 'NetworkId': 2, 'CatchmentID': 14}, + {'NetworkId': 2, 'CatchmentID': 17}, + ... + ], + # Data!! But only for the overall category. Format is list + # of dicts. Each obs' fields are labelled, so we could easily + # convert this to dataframe. + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ... + ], + # Mapping each mmwrid to a week number, season, and date. Could use this instead of our current epoch-based function. + 'mmwr': [ + ..., + {'mmwrid': 2828, 'weekend': '2016-03-12', 'weeknumber': 10, 'weekstart': '2016-03-06', 'year': 2016, 'yearweek': 201610, 'seasonid': 55, 'label': 'Mar-12-2016', 'weekendlabel': 'Mar 12, 2016', 'weekendlabel2': 'Mar-12-2016'}, + {'mmwrid': 2885, 'weekend': '2017-04-15', 'weeknumber': 15, 'weekstart': '2017-04-09', 'year': 2017, 'yearweek': 201715, 'seasonid': 56, 'label': 'Apr-15-2017', 'weekendlabel': 'Apr 15, 2017', 'weekendlabel2': 'Apr-15-2017'}, + {'mmwrid': 2911, 'weekend': '2017-10-14', 'weeknumber': 41, 'weekstart': '2017-10-08', 'year': 2017, 'yearweek': 201741, 'seasonid': 57, 'label': 'Oct-14-2017', 'weekendlabel': 'Oct 14, 2017', 'weekendlabel2': 'Oct-14-2017'}, + {'mmwrid': 2928, 'weekend': '2018-02-10', 'weeknumber': 6, 'weekstart': '2018-02-04', 'year': 2018, 'yearweek': 201806, 'seasonid': 57, 'label': 'Feb-10-2018', 'weekendlabel': 'Feb 10, 2018', 'weekendlabel2': 'Feb-10-2018'}, + {'mmwrid': 2974, 'weekend': '2018-12-29', 'weeknumber': 52, 'weekstart': '2018-12-23', 'year': 2018, 'yearweek': 201852, 'seasonid': 58, 'label': 'Dec-29-2018', 'weekendlabel': 'Dec 29, 2018', 'weekendlabel2': 'Dec-29-2018'}, + {'mmwrid': 3031, 'weekend': '2020-02-01', 'weeknumber': 5, 'weekstart': '2020-01-26', 'year': 2020, 'yearweek': 202005, 'seasonid': 59, 'label': 'Feb-01-2020', 'weekendlabel': 'Feb 01, 2020', 'weekendlabel2': 'Feb-01-2020'}, + {'mmwrid': 3037, 'weekend': '2020-03-14', 'weeknumber': 11, 'weekstart': '2020-03-08', 'year': 2020, 'yearweek': 202011, 'seasonid': 59, 'label': 'Mar-14-2020', 'weekendlabel': 'Mar 14, 2020', 'weekendlabel2': 'Mar-14-2020'}, + {'mmwrid': 3077, 'weekend': '2020-12-19', 'weeknumber': 51, 'weekstart': '2020-12-13', 'year': 2020, 'yearweek': 202051, 'seasonid': 60, 'label': 'Dec-19-2020', 'weekendlabel': 'Dec 19, 2020', 'weekendlabel2': 'Dec-19-2020'}, + {'mmwrid': 3140, 'weekend': '2022-03-05', 'weeknumber': 9, 'weekstart': '2022-02-27', 'year': 2022, 'yearweek': 202209, 'seasonid': 61, 'label': 'Mar-05-2022', 'weekendlabel': 'Mar 05, 2022', 'weekendlabel2': 'Mar-05-2022'}, + {'mmwrid': 3183, 'weekend': '2022-12-31', 'weeknumber': 52, 'weekstart': '2022-12-25', 'year': 2022, 'yearweek': 202252, 'seasonid': 62, 'label': 'Dec-31-2022', 'weekendlabel': 'Dec 31, 2022', 'weekendlabel2': 'Dec-31-2022'}, + ] +} diff --git a/src/acquisition/flusurv/reference/old_grasp_location_result.json b/src/acquisition/flusurv/reference/old_grasp_location_result.json new file mode 100644 index 000000000..6ef4952e8 --- /dev/null +++ b/src/acquisition/flusurv/reference/old_grasp_location_result.json @@ -0,0 +1,42 @@ +### Old location-specific API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03GetData?appVersion=Public +# with payload +# { +# "appversion": "Public", +# "networkid": location_code[0], +# "cacthmentid": location_code[1], +# } +{ + 'busdata': { + 'datafields': ['mmwrid', 'weeknumber', 'rate', 'weeklyrate'], + "dataseries": [ + {'season': 55, 'age': 8, 'data': [[2806, 40, 0.7, 0.7], [2807, 41, 0.7, 0], [2808, 42, 1.4, 0.7], [2809, 43, 1.4, 0], [2810, 44, 2.1, 0.7], [2811, 45, 2.1, 0], [2812, 46, 2.1, 0], [2813, 47, 2.1, 0], [2814, 48, 2.8, 0.7], [2815, 49, 4.2, 1.4], [2816, 50, 4.2, 0], [2817, 51, 6.4, 2.1], [2818, 52, 8.5, 2.1], [2819, 1, 13.4, 4.9], [2820, 2, 17, 3.5], [2821, 3, 20.5, 3.5], [2822, 4, 26.2, 5.7], [2823, 5, 30.4, 4.2], [2824, 6, 40.3, 9.9], [2825, 7, 47.4, 7.1], [2826, 8, 53.7, 6.4], [2827, 9, 63.6, 9.9], [2828, 10, 74.9, 11.3], [2829, 11, 82, 7.1], [2830, 12, 91.2, 9.2], [2831, 13, 98.3, 7.1], [2832, 14, 103.9, 5.7], [2833, 15, 106.8, 2.8], [2834, 16, 109.6, 2.8], [2835, 17, 111.7, 2.1]]}, + {'season': 57, 'age': 2, 'data': [[2910, 40, 0, 0], [2911, 41, 0, 0], [2912, 42, 0, 0], [2913, 43, 0, 0], [2914, 44, 0.2, 0.2], [2915, 45, 0.4, 0.2], [2916, 46, 0.8, 0.4], [2917, 47, 0.8, 0], [2918, 48, 0.8, 0], [2919, 49, 1.3, 0.6], [2920, 50, 1.7, 0.4], [2921, 51, 1.9, 0.2], [2922, 52, 3.1, 1.1], [2923, 1, 4.8, 1.7], [2924, 2, 6.9, 2.1], [2925, 3, 7.9, 1], [2926, 4, 9.2, 1.3], [2927, 5, 10.5, 1.3], [2928, 6, 11.3, 0.8], [2929, 7, 11.5, 0.2], [2930, 8, 12.6, 1.1], [2931, 9, 13.8, 1.1], [2932, 10, 15.1, 1.3], [2933, 11, 15.9, 0.8], [2934, 12, 16.3, 0.4], [2935, 13, 16.7, 0.4], [2936, 14, 16.9, 0.2], [2937, 15, 16.9, 0], [2938, 16, 16.9, 0], [2939, 17, 17, 0.2]]}, + ... + ] + }, + 'seasons': [ + {'description': 'Season 2003-04', 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'color': 'Dark Purple', 'color_hexvalue': '#4A298B'}, + {'description': 'Season 2004-05', 'endweek': 2283, 'label': '2004-05', 'seasonid': 44, 'startweek': 2232, 'color': 'Brown', 'color_hexvalue': '#76522E'}, + {'description': 'Season 2005-06', 'endweek': 2335, 'label': '2005-06', 'seasonid': 45, 'startweek': 2284, 'color': 'Salmon', 'color_hexvalue': '#C76751'}, + {'description': 'Season 2006-07', 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'color': 'Purple', 'color_hexvalue': '#B92CC6'}, + {'description': 'Season 2007-08', 'endweek': 2439, 'label': '2007-08', 'seasonid': 47, 'startweek': 2388, 'color': 'Blue', 'color_hexvalue': '#2A44C8'}, + {'description': 'Season 2008-09', 'endweek': 2487, 'label': '2008-09', 'seasonid': 48, 'startweek': 2440, 'color': 'Green', 'color_hexvalue': '#299A42'}, + {'description': 'Season 2009-10', 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'color': 'Red', 'color_hexvalue': '#A12732'}, + {'description': 'Season 2010-11', 'endweek': 2596, 'label': '2010-11', 'seasonid': 50, 'startweek': 2545, 'color': 'Mustard', 'color_hexvalue': '#BDAA2A'}, + {'description': 'Season 2011-12', 'endweek': 2648, 'label': '2011-12', 'seasonid': 51, 'startweek': 2597, 'color': 'Light Blue', 'color_hexvalue': '#3289A4'}, + {'description': 'Season 2012-13', 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'color': 'Grey', 'color_hexvalue': '#5E5E5E'}, + {'description': 'Season 2013-14', 'endweek': 2752, 'label': '2013-14', 'seasonid': 53, 'startweek': 2701, 'color': 'Light Blue', 'color_hexvalue': '#42B5C8'}, + {'description': 'Season 2014-15', 'endweek': 2805, 'label': '2014-15', 'seasonid': 54, 'startweek': 2753, 'color': 'Mint', 'color_hexvalue': '#4EC87B'}, + {'description': 'Season 2015-16', 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'color': 'Orange', 'color_hexvalue': '#C7852E'}, + {'description': 'Season 2016-17', 'endweek': 2909, 'label': '2016-17', 'seasonid': 56, 'startweek': 2858, 'color': 'Purple', 'color_hexvalue': '#7F42A9'}, + {'description': 'Season 2017-18', 'endweek': 2961, 'label': '2017-18', 'seasonid': 57, 'startweek': 2910, 'color': 'Lime', 'color_hexvalue': '#8AC73C'}, + {'description': 'Season 2018-19', 'endweek': 3013, 'label': '2018-19', 'seasonid': 58, 'startweek': 2962, 'color': 'Brown', 'color_hexvalue': '#A06C3A'}, + {'description': 'Season 2019-20', 'endweek': 3065, 'label': '2019-20', 'seasonid': 59, 'startweek': 3014, 'color': 'Light Orange', 'color_hexvalue': '#FFCF48'} + ], + 'mmwr': [ + {'mmwrid': 2179, 'weekend': '2003-10-04', 'weeknumber': 40, 'weekstart': '2003-09-28', 'year': 2003, 'yearweek': 200340, 'seasonid': 43, 'label': '40', 'weekendlabel': 'Oct 04, 2003', 'weekendlabel2': 'Oct-04-2003'}, + {'mmwrid': 2180, 'weekend': '2003-10-11', 'weeknumber': 41, 'weekstart': '2003-10-05', 'year': 2003, 'yearweek': 200341, 'seasonid': 43, 'label': '', 'weekendlabel': 'Oct 11, 2003', 'weekendlabel2': 'Oct-11-2003'}, + {'mmwrid': 2181, 'weekend': '2003-10-18', 'weeknumber': 42, 'weekstart': '2003-10-12', 'year': 2003, 'yearweek': 200342, 'seasonid': 43, 'label': '', 'weekendlabel': 'Oct 18, 2003', 'weekendlabel2': 'Oct-18-2003'}, + ... + ] +} diff --git a/src/acquisition/flusurv/reference/old_grasp_result.json b/src/acquisition/flusurv/reference/old_grasp_result.json new file mode 100644 index 000000000..14fb72b66 --- /dev/null +++ b/src/acquisition/flusurv/reference/old_grasp_result.json @@ -0,0 +1,89 @@ +### Old API response from https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp?appVersion=Public +{ + 'loaddatetime': 'Mar 20, 2021', + 'disclaimer': { + 'id': 1, + 'splashscreensisclaimersext': 'The Influenza Hospitalization Surveillance Network (FluSurv-NET) conducts population-based surveillance for laboratory-confirmed influenza-associated hospitalizations in children (persons younger than 18 years) and adults. The current network covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, and TN) and three additional states (MI, OH, and UT). The network represents approximately 8.5% of US population (~27 million people).

Cases are identified by reviewing hospital, laboratory, and admission databases and infection control logs for patients hospitalized during the influenza season with a documented positive influenza test (i.e., viral culture, direct/indirect fluorescent antibody assay (DFA/IFA), reverse transcription-polymerase chain reaction (RT-PCR), or a rapid influenza diagnostic test (RIDT)).

Data gathered are used to estimate age-specific hospitalization rates on a weekly basis, and describe characteristics of persons hospitalized with severe influenza illness. Laboratory-confirmation is dependent on clinician-ordered influenza testing. Therefore, the rates provided are likely to be underestimated as influenza-related hospitalizations can be missed, either because testing is not performed, or because cases may be attributed to other causes of pneumonia or other common influenza-related complications.

FluSurv-NET hospitalization data are preliminary and subject to change as more data become available. Please use the following citation when referencing these data: “FluView: Influenza Hospitalization Surveillance Network, Centers for Disease Control and Prevention. WEBSITE. Accessed on DATE”.', + 'pagedisclaimertext': "The Influenza Hospitalization Surveillance Network (FluSurv-NET) data are preliminary and subject to change as more data become available. \r\n All incidence rates are unadjusted. FluSurv-NET conducts population-based surveillance for laboratory-confirmed influenza associated \r\n hospitalizations in children <18 years of age (since 2003-2004 influenza season) and adults (since 2005-2006 influenza season). \r\n The FluSurv-NET covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, TN) \r\n and additional Influenza Hospitalization Surveillance Project (IHSP) states. The IHSP began during the 2009-2010 season to enhance \r\n surveillance during the 2009 H1N1 pandemic. IHSP sites included IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, \r\n and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014 season and later. Cumulative and weekly unadjusted incidence rates (per 100,000 population) are calculated using the National Center for Health Statistics' (NCHS) \r\n population estimates for the counties included in the surveillance catchment area. The rates provided are likely to be underestimated as \r\n influenza-associated hospitalizations can be missed if influenza is not suspected and tested for.", + 'surveillancehelptext': 'The Emerging Infections Program (EIP) has conducted ongoing population-based influenza-hospitalization surveillance since the 2003-2004 season. EIP sites include counties within CA, CO, CT, GA, MD, MN, NM, NY, OR, TN. The Influenza Hospitalization Surveillance Project (IHSP) began during the 2009-2010 season to enhance surveillance during the 2009 H1N1 pandemic. IHSP sites included counties within IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014, 2014-15 and 2015-2016 seasons. Together, the EIP and IHSP sites comprise the Influenza Hospitalization Surveillance Network (FluSurv-NET). The FluSurv-NET represents approximately 9% of the US population (~27 million people). FluSurv-NET hospitalization data are preliminary and subject to change as more data become available. All incidence rates are unadjusted.', + 'groupbyhelptext': 'Age Groups: During the 2003-2004 and 2004-2005 seasons, Emerging Infections Program (EIP) sites conducted population-based influenza-hospitalization surveillance for pediatric (persons younger than 18 years) cases only. Surveillance for adults began during the 2005-2006 season and all-age surveillance for laboratory-confirmed influenza associated hospitalizations has been ongoing since then. Seasons: Most calendar years can be divided into 52 epidemiologic weeks (MMWR weeks), but occasionally some years will have 53 weeks. For the Influenza Surveillance Network (FluSurv-Net), the influenza season is normally from October 1 through April 30 (MMWR Week 40-17). However, due to the onset of the influenza A (H1N1)pdm09 pandemic in spring of 2009, the 2008-2009 influenza season ended April 14, 2009 and the 2009-2010 season began September 1, 2009 (MMWR Week 35). The number of sites contributing to the influenza-hospitalization surveillance network has changed over the years. Please, refer to the Surveillance Area Help Button for more detailed information.', + 'downloaddatadisclaimertext': "The Influenza Hospitalization Surveillance Network (FluSurv-NET) data are preliminary and subject to change as more data become available. All incidence rates are unadjusted. FluSurv-NET conducts population-based surveillance for laboratory-confirmed influenza related hospitalizations in children <18 years of age (since 2003-2004 influenza season) and adults (since 2005-2006 influenza season). The FluSurv-NET covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, TN) and additional Influenza Hospitalization Surveillance Project (IHSP) states. The IHSP began during the 2009-2010 season to enhance surveillance during the 2009 H1N1 pandemic. IHSP sites included IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014 , 2014-2015 and 2015-2016 seasons. Cumulative and weekly unadjusted incidence rates (per 100,000 population) are calculated using the National Center for Health Statistics' (NCHS) population estimates for the counties included in the surveillance catchment area. The rates provided are likely to be underestimated as influenza-related hospitalizations can be missed, either because testing is not performed, or because cases may be attributed to other causes of pneumonia or other common influenza-related complications." + }, + 'networks': [ + {'networkid': 1, 'name': 'FluSurv-NET'}, + {'networkid': 2, 'name': 'EIP'}, + {'networkid': 3, 'name': 'IHSP'} + ], + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + 'seasons': [ + {'description': 'Season 2009-10', 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'color': 'Red', 'color_hexvalue': '#A12732'}, + {'description': 'Season 2010-11', 'endweek': 2596, 'label': '2010-11', 'seasonid': 50, 'startweek': 2545, 'color': 'Mustard', 'color_hexvalue': '#BDAA2A'}, + {'description': 'Season 2011-12', 'endweek': 2648, 'label': '2011-12', 'seasonid': 51, 'startweek': 2597, 'color': 'Light Blue', 'color_hexvalue': '#3289A4'}, + {'description': 'Season 2012-13', 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'color': 'Grey', 'color_hexvalue': '#5E5E5E'}, + {'description': 'Season 2013-14', 'endweek': 2752, 'label': '2013-14', 'seasonid': 53, 'startweek': 2701, 'color': 'Light Blue', 'color_hexvalue': '#42B5C8'}, + {'description': 'Season 2014-15', 'endweek': 2805, 'label': '2014-15', 'seasonid': 54, 'startweek': 2753, 'color': 'Mint', 'color_hexvalue': '#4EC87B'}, + {'description': 'Season 2015-16', 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'color': 'Orange', 'color_hexvalue': '#C7852E'}, + {'description': 'Season 2016-17', 'endweek': 2909, 'label': '2016-17', 'seasonid': 56, 'startweek': 2858, 'color': 'Purple', 'color_hexvalue': '#7F42A9'}, + {'description': 'Season 2017-18', 'endweek': 2961, 'label': '2017-18', 'seasonid': 57, 'startweek': 2910, 'color': 'Lime', 'color_hexvalue': '#8AC73C'}, + {'description': 'Season 2018-19', 'endweek': 3013, 'label': '2018-19', 'seasonid': 58, 'startweek': 2962, 'color': 'Brown', 'color_hexvalue': '#A06C3A'}, + {'description': 'Season 2019-20', 'endweek': 3065, 'label': '2019-20', 'seasonid': 59, 'startweek': 3014, 'color': 'Light Orange', 'color_hexvalue': '#FFCF48'} + ], + 'ages': [ + {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'}, + {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, + {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, + {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, + {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, + {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, + {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, + {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, + {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'} + ], + 'busdata': { + 'datafields': ['mmwrid', 'weeknumber', 'rate', 'weeklyrate'], + 'dataseries': [ + ..., + {'season': 56, 'age': 4, 'data': [[2858, 40, 0.1, 0.1], [2859, 41, 0.2, 0.1], [2860, 42, 0.3, 0.1], [2861, 43, 0.5, 0.1], [2862, 44, 0.6, 0.1], [2863, 45, 0.8, 0.2], [2864, 46, 1, 0.2], [2865, 47, 1.4, 0.4], [2866, 48, 1.7, 0.4], [2867, 49, 2.3, 0.5], [2868, 50, 3.3, 1.1], [2869, 51, 4.8, 1.5], [2870, 52, 7.8, 2.9], [2871, 1, 11.7, 4], [2872, 2, 15.7, 3.9], [2873, 3, 19.5, 3.8], [2874, 4, 22.9, 3.4], [2875, 5, 26.3, 3.4], [2876, 6, 31.1, 4.8], [2877, 7, 36.1, 5], [2878, 8, 41.5, 5.3], [2879, 9, 44.7, 3.2], [2880, 10, 48, 3.3], [2881, 11, 50.9, 2.9], [2882, 12, 54, 3.1], [2883, 13, 57.3, 3.3], [2884, 14, 59.3, 2], [2885, 15, 60.8, 1.5], [2886, 16, 62, 1.2], [2887, 17, 62.7, 0.8]]}, + {'season': 55, 'age': 8, 'data': [[2806, 40, 0.6, 0.6], [2807, 41, 1, 0.4], [2808, 42, 1.3, 0.3], [2809, 43, 1.3, 0], [2810, 44, 1.8, 0.5], [2811, 45, 2.3, 0.5], [2812, 46, 2.4, 0.1], [2813, 47, 2.4, 0], [2814, 48, 2.8, 0.4], [2815, 49, 3.2, 0.4], [2816, 50, 4, 0.8], [2817, 51, 5.5, 1.6], [2818, 52, 6.4, 0.9], [2819, 1, 8, 1.6], [2820, 2, 9.6, 1.6], [2821, 3, 10.9, 1.3], [2822, 4, 12.6, 1.8], [2823, 5, 14.8, 2.2], [2824, 6, 19.6, 4.7], [2825, 7, 25.1, 5.5], [2826, 8, 30.9, 5.8], [2827, 9, 41.4, 10.5], [2828, 10, 54.2, 12.8], [2829, 11, 63.5, 9.3], [2830, 12, 73.9, 10.4], [2831, 13, 83.5, 9.6], [2832, 14, 90.3, 6.8], [2833, 15, 95.6, 5.3], [2834, 16, 100.7, 5.1], [2835, 17, 103.7, 3]]}, + {'season': 59, 'age': 11, 'data': [[3014, 40, 0, 0], [3015, 41, 0.1, 0], [3016, 42, 0.2, 0.1], [3017, 43, 0.2, 0], [3018, 44, 0.3, 0.1], [3019, 45, 0.5, 0.2], [3020, 46, 0.8, 0.3], [3021, 47, 1.1, 0.4], [3022, 48, 1.7, 0.5], [3023, 49, 2.5, 0.9], [3024, 50, 3.5, 1], [3025, 51, 4.8, 1.3], [3026, 52, 7.2, 2.4], [3027, 1, 10.2, 3], [3028, 2, 12.7, 2.5], [3029, 3, 14.7, 2], [3030, 4, 17.1, 2.3], [3031, 5, 19.9, 2.8], [3032, 6, 23.5, 3.6], [3033, 7, 25.7, 2.2], [3034, 8, 27.9, 2.2], [3035, 9, 30, 2], [3036, 10, 31.9, 1.9], [3037, 11, 33.2, 1.4], [3038, 12, 34.1, 0.8], [3039, 13, 34.2, 0.1], [3040, 14, 34.2, 0], [3041, 15, 34.2, 0], [3042, 16, 34.3, 0.1], [3043, 17, 34.3, 0]]}, + {'season': 50, 'age': 4, 'data': [[2545, 40, 0.1, 0.1], [2546, 41, 0.1, 0.1], [2547, 42, 0.2, 0.1], [2548, 43, 0.2, 0.1], [2549, 44, 0.3, 0.1], [2550, 45, 0.3, 0.1], [2551, 46, 0.6, 0.2], [2552, 47, 0.7, 0.2], [2553, 48, 0.9, 0.2], [2554, 49, 1.2, 0.3], [2555, 50, 1.5, 0.3], [2556, 51, 2, 0.5], [2557, 52, 2.7, 0.7], [2558, 1, 3.6, 0.9], [2559, 2, 4.6, 1], [2560, 3, 5.9, 1.2], [2561, 4, 7.3, 1.4], [2562, 5, 8.9, 1.6], [2563, 6, 10.5, 1.6], [2564, 7, 12.7, 2.2], [2565, 8, 15.2, 2.5], [2566, 9, 17.2, 2], [2567, 10, 18.7, 1.5], [2568, 11, 19.9, 1.3], [2569, 12, 20.7, 0.8], [2570, 13, 21.2, 0.5], [2571, 14, 21.5, 0.4], [2572, 15, 21.7, 0.2], [2573, 16, 21.8, 0.1], [2574, 17, 21.9, 0.1]]}, + {'season': 58, 'age': 1, 'data': [[2962, 40, 0.1, 0.1], [2963, 41, 0.3, 0.2], [2964, 42, 0.3, 0.1], [2965, 43, 0.5, 0.2], [2966, 44, 1, 0.5], [2967, 45, 1.4, 0.4], [2968, 46, 1.7, 0.3], [2969, 47, 2.3, 0.6], [2970, 48, 3.6, 1.3], [2971, 49, 5.5, 1.9], [2972, 50, 8.5, 3], [2973, 51, 12.3, 3.8], [2974, 52, 17.7, 5.4], [2975, 1, 21.7, 3.9], [2976, 2, 24.9, 3.2], [2977, 3, 27.4, 2.5], [2978, 4, 30.5, 3.1], [2979, 5, 34.5, 4.1], [2980, 6, 37.6, 3.1], [2981, 7, 41.6, 4], [2982, 8, 46.2, 4.6], [2983, 9, 50.2, 3.9], [2984, 10, 54.9, 4.7], [2985, 11, 59.2, 4.3], [2986, 12, 62.6, 3.4], [2987, 13, 65.3, 2.7], [2988, 14, 67.6, 2.3], [2989, 15, 68.9, 1.3], [2990, 16, 69.8, 0.9], [2991, 17, 70.9, 1]]}, + {'season': 52, 'age': 10, 'data': [[2649, 40, 0, 0], [2649, 40, 0, 0], [2650, 41, 0, 0], [2650, 41, 0, 0], [2651, 42, 0.1, 0], [2651, 42, 0, 0], [2652, 43, 0.1, 0], [2652, 43, 0, 0], [2653, 44, 0.1, 0], [2653, 44, 0, 0], [2654, 45, 0.2, 0.1], [2654, 45, 0.1, 0], [2655, 46, 0.5, 0.1], [2655, 46, 0.1, 0], [2656, 47, 0.8, 0.2], [2656, 47, 0.2, 0], [2657, 48, 1.3, 0.2], [2657, 48, 0.3, 0], [2658, 49, 2.3, 0.7], [2658, 49, 0.5, 0.1], [2659, 50, 3.7, 0.7], [2659, 50, 0.7, 0.1], [2660, 51, 5.3, 0.9], [2660, 51, 1.1, 0.2], [2661, 52, 7.4, 1.2], [2661, 52, 1.5, 0.2], [2662, 1, 9.9, 1.3], [2662, 1, 1.9, 0.2], [2663, 2, 12.2, 1.1], [2663, 2, 2.3, 0.2], [2664, 3, 14.4, 1], [2664, 3, 2.6, 0.2], [2665, 4, 16.3, 0.9], [2665, 4, 2.9, 0.2], [2666, 5, 17.8, 0.6], [2666, 5, 3.2, 0.1], [2667, 6, 19, 0.5], [2667, 6, 3.3, 0.1], [2668, 7, 20, 0.6], [2668, 7, 3.5, 0.1], [2669, 8, 21.1, 0.5], [2669, 8, 3.7, 0.1], [2670, 9, 22, 0.4], [2670, 9, 3.8, 0.1], [2671, 10, 22.8, 0.4], [2671, 10, 4, 0.1], [2672, 11, 23.4, 0.3], [2672, 11, 4.1, 0], [2673, 12, 23.8, 0.2], [2673, 12, 4.1, 0], [2674, 13, 24.2, 0.2], [2674, 13, 4.2, 0], [2675, 14, 24.6, 0.2], [2675, 14, 4.2, 0], [2676, 15, 24.9, 0.1], [2676, 15, 4.3, 0], [2677, 16, 25.1, 0.1], [2677, 16, 4.3, 0], [2678, 17, 25.2, 0.1], [2678, 17, 4.3, 0]]}, + {'season': 54, 'age': 7, 'data': [[2753, 40, 0.2, 0.2], [2754, 41, 0.4, 0.1], [2755, 42, 0.9, 0.5], [2756, 43, 1, 0.1], [2757, 44, 1.1, 0.1], [2758, 45, 1.6, 0.4], [2759, 46, 2, 0.5], [2760, 47, 2.9, 0.9], [2761, 48, 5.3, 2.3], [2762, 49, 9.9, 4.7], [2763, 50, 17.6, 7.7], [2764, 51, 30.1, 12.5], [2765, 52, 49.4, 19.3], [2766, 53, 69, 19.5], [2767, 1, 83.2, 14.2], [2768, 2, 91.4, 8.2], [2769, 3, 98.6, 7.3], [2770, 4, 104.6, 5.9], [2771, 5, 110.9, 6.3], [2772, 6, 116, 5.2], [2773, 7, 119.2, 3.1], [2774, 8, 122.3, 3.2], [2775, 9, 124.5, 2.1], [2776, 10, 127.5, 3], [2777, 11, 130.1, 2.7], [2778, 12, 132.4, 2.3], [2779, 13, 135, 2.6], [2780, 14, 137.7, 2.7], [2781, 15, 139.1, 1.4], [2782, 16, 140.3, 1.1], [2783, 17, 141, 0.8]]} + ] + }, + 'mmwr': [ + ..., + {'mmwrid': 3038, 'weekend': '2020-03-21', 'weeknumber': 12, 'weekstart': '2020-03-15', 'year': 2020, 'yearweek': 202012, 'seasonid': 59, 'label': '', 'weekendlabel': 'Mar 21, 2020', 'weekendlabel2': 'Mar-21-2020'}, + {'mmwrid': 3039, 'weekend': '2020-03-28', 'weeknumber': 13, 'weekstart': '2020-03-22', 'year': 2020, 'yearweek': 202013, 'seasonid': 59, 'label': '', 'weekendlabel': 'Mar 28, 2020', 'weekendlabel2': 'Mar-28-2020'}, + {'mmwrid': 3040, 'weekend': '2020-04-04', 'weeknumber': 14, 'weekstart': '2020-03-29', 'year': 2020, 'yearweek': 202014, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 04, 2020', 'weekendlabel2': 'Apr-04-2020'}, + {'mmwrid': 3041, 'weekend': '2020-04-11', 'weeknumber': 15, 'weekstart': '2020-04-05', 'year': 2020, 'yearweek': 202015, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 11, 2020', 'weekendlabel2': 'Apr-11-2020'}, + {'mmwrid': 3042, 'weekend': '2020-04-18', 'weeknumber': 16, 'weekstart': '2020-04-12', 'year': 2020, 'yearweek': 202016, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 18, 2020', 'weekendlabel2': 'Apr-18-2020'}, + {'mmwrid': 3043, 'weekend': '2020-04-25', 'weeknumber': 17, 'weekstart': '2020-04-19', 'year': 2020, 'yearweek': 202017, 'seasonid': 59, 'label': '17', 'weekendlabel': 'Apr 25, 2020', 'weekendlabel2': 'Apr-25-2020'} + ] +} diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index e0c5acaad..a08696255 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -2,13 +2,184 @@ # standard library import unittest -from unittest.mock import MagicMock -from unittest.mock import sentinel +from collections import defaultdict +from unittest.mock import (MagicMock, sentinel, patch) -from delphi.epidata.acquisition.flusurv.flusurv import fetch_json +import delphi.epidata.acquisition.flusurv.api as flusurv # py3tester coverage target -__test_target__ = "delphi.epidata.acquisition.flusurv.flusurv" +__test_target__ = "delphi.epidata.acquisition.flusurv.api" + + +network_all_example_data = [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'rate': 42, 'weeklyrate': 0.5, 'mmwrid': 2519}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ] + +metadata_result = { + # Last data update date + 'loaddatetime': 'Sep 12, 2023', + # IDs (network ID + catchment ID) specifying geos and data sources available + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + # "seasons" element, used for mapping between seasonids and season year spans. + 'seasons': [ + {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2021-22', 'enabled': True, 'endweek': 3170, 'label': '2021-22', 'seasonid': 61, 'startweek': 3119, 'IncludeWeeklyRatesAndStrata': False}, + {'description': 'Season 2022-23', 'enabled': True, 'endweek': 3222, 'label': '2022-23', 'seasonid': 62, 'startweek': 3171, 'IncludeWeeklyRatesAndStrata': False}, + # sic + {'description': 'Season 2023-24 ', 'enabled': True, 'endweek': 3274, 'label': '2023-24', 'seasonid': 63, 'startweek': 3223, 'IncludeWeeklyRatesAndStrata': False}, + ], + # "master_lookup" element, used for mapping between valueids and strata descriptions + 'master_lookup': [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ], + 'default_data': network_all_example_data, + # Mapping each mmwrid to a week number, season, and date. Could use this instead of our current epoch-based function. + 'mmwr': [ + {'mmwrid': 2828, 'weekend': '2016-03-12', 'weeknumber': 10, 'weekstart': '2016-03-06', 'year': 2016, 'yearweek': 201610, 'seasonid': 55, 'label': 'Mar-12-2016', 'weekendlabel': 'Mar 12, 2016', 'weekendlabel2': 'Mar-12-2016'}, + {'mmwrid': 2885, 'weekend': '2017-04-15', 'weeknumber': 15, 'weekstart': '2017-04-09', 'year': 2017, 'yearweek': 201715, 'seasonid': 56, 'label': 'Apr-15-2017', 'weekendlabel': 'Apr 15, 2017', 'weekendlabel2': 'Apr-15-2017'}, + {'mmwrid': 2911, 'weekend': '2017-10-14', 'weeknumber': 41, 'weekstart': '2017-10-08', 'year': 2017, 'yearweek': 201741, 'seasonid': 57, 'label': 'Oct-14-2017', 'weekendlabel': 'Oct 14, 2017', 'weekendlabel2': 'Oct-14-2017'}, + {'mmwrid': 2928, 'weekend': '2018-02-10', 'weeknumber': 6, 'weekstart': '2018-02-04', 'year': 2018, 'yearweek': 201806, 'seasonid': 57, 'label': 'Feb-10-2018', 'weekendlabel': 'Feb 10, 2018', 'weekendlabel2': 'Feb-10-2018'}, + {'mmwrid': 2974, 'weekend': '2018-12-29', 'weeknumber': 52, 'weekstart': '2018-12-23', 'year': 2018, 'yearweek': 201852, 'seasonid': 58, 'label': 'Dec-29-2018', 'weekendlabel': 'Dec 29, 2018', 'weekendlabel2': 'Dec-29-2018'}, + {'mmwrid': 3031, 'weekend': '2020-02-01', 'weeknumber': 5, 'weekstart': '2020-01-26', 'year': 2020, 'yearweek': 202005, 'seasonid': 59, 'label': 'Feb-01-2020', 'weekendlabel': 'Feb 01, 2020', 'weekendlabel2': 'Feb-01-2020'}, + {'mmwrid': 3037, 'weekend': '2020-03-14', 'weeknumber': 11, 'weekstart': '2020-03-08', 'year': 2020, 'yearweek': 202011, 'seasonid': 59, 'label': 'Mar-14-2020', 'weekendlabel': 'Mar 14, 2020', 'weekendlabel2': 'Mar-14-2020'}, + {'mmwrid': 3077, 'weekend': '2020-12-19', 'weeknumber': 51, 'weekstart': '2020-12-13', 'year': 2020, 'yearweek': 202051, 'seasonid': 60, 'label': 'Dec-19-2020', 'weekendlabel': 'Dec 19, 2020', 'weekendlabel2': 'Dec-19-2020'}, + {'mmwrid': 3140, 'weekend': '2022-03-05', 'weeknumber': 9, 'weekstart': '2022-02-27', 'year': 2022, 'yearweek': 202209, 'seasonid': 61, 'label': 'Mar-05-2022', 'weekendlabel': 'Mar 05, 2022', 'weekendlabel2': 'Mar-05-2022'}, + {'mmwrid': 3183, 'weekend': '2022-12-31', 'weeknumber': 52, 'weekstart': '2022-12-25', 'year': 2022, 'yearweek': 202252, 'seasonid': 62, 'label': 'Dec-31-2022', 'weekendlabel': 'Dec 31, 2022', 'weekendlabel2': 'Dec-31-2022'}, + ] +} + +# Example location-specific return JSON from CDC GRASP API. Contains +# partial data for "network_all" location and season 49. +location_api_result = {'default_data': network_all_example_data} + + +# Map derived from "master_lookup" dictionary above, mapping between valueids +# by type and cleaned-up descriptions (no spaces or capital letters, etc) +id_group_map = { + "Age": { + 1: "0t4", + 2: "5t17", + 3: "18t49", + 4: "50t64", + 5: "65+", + 7: "65t74", + 8: "75t84", + 9: "85+", + 10: "18t29", + 11: "30t39", + 12: "40t49", + 21: "5t11", + 22: "12t17", + 97: "lt18", + 98: "gte18", + }, + "Race": { + 1: "white", + 2: "black", + 3: "hisp", + 4: "asian", + 5: "natamer", + }, + "Sex": { + 1: "male", + 2: "female", + }, +} + +catchment_name_map = { + "CA": (2, 1), + "CO": (2, 2), + "CT": (2, 3), + "GA": (2, 4), + "IA": (3, 5), + "ID": (3, 6), + "MD": (2, 7), + "MI": (3, 8), + "MN": (2, 9), + "NM": (2, 11), + "NY_albany": (2, 13), + "NY_rochester": (2, 14), + "OH": (3, 15), + "OK": (3, 16), + "OR": (2, 17), + "RI": (3, 18), + "SD": (3, 19), + "TN": (2, 20), + "UT": (3, 21), + "network_all": (1, 22), + "network_eip": (2, 22), + "network_ihsp": (3, 22), +} + +with patch(__test_target__ + ".fetch_json", + return_value = metadata_result) as MockFlusurvMetadata: + metadata_fetcher = flusurv.FlusurvMetadata(52) + api_fetcher = flusurv.FlusurvLocationFetcher(52) class FunctionTests(unittest.TestCase): @@ -28,6 +199,171 @@ def test_fetch_json(self): requests_impl = MagicMock() requests_impl.get.return_value = response_object - actual = fetch_json(path, payload, requests_impl=requests_impl) + actual = flusurv.fetch_json(path, payload, requests_impl=requests_impl) self.assertEqual(actual, sentinel.expected) + + def test_mmwrid_to_epiweek(self): + # Test epoch + self.assertEqual(flusurv.mmwrid_to_epiweek(2179), 200340) + + for mmwr in metadata_result["mmwr"]: + self.assertEqual(flusurv.mmwrid_to_epiweek(mmwr["mmwrid"]), mmwr["yearweek"]) + + def test_metadata_attributes(self): + self.assertEqual(metadata_fetcher.metadata, metadata_result) + self.assertEqual(metadata_fetcher.issue, 202337) + self.assertEqual(metadata_fetcher.max_age_weeks, 52) + self.assertEqual(metadata_fetcher.seasonids, {61, 62, 63}) + + self.assertEqual(metadata_fetcher.location_to_code, catchment_name_map) + self.assertEqual(metadata_fetcher.locations, catchment_name_map.keys()) + + self.assertEqual(metadata_fetcher.id_to_group, id_group_map) + self.assertEqual(metadata_fetcher.id_to_season, { + 43: '2003-04', + 46: '2006-07', + 49: '2009-10', + 61: '2021-22', + 62: '2022-23', + 63: '2023-24', + }) + + def test_geo_name_conversion(self): + geos = ( + "California", + "Utah", + "Entire Network", + "Entire Network", + "Entire Network", + "New York - Albany", + "New York - Rochester", + ) + networks = ( + "FluSurv-NET", + "FluSurv-NET", + "FluSurv-NET", + "IHSP", + "EIP", + "FluSurv-NET", + ) + expected_list = [ + "CA", + "UT", + "network_all", + "network_ihsp", + "network_eip", + "NY_albany", + "NY_rochester", + ] + + for (geo, network), expected in zip(zip(geos, networks), expected_list): + self.assertEqual(metadata_fetcher._location_name_to_abbr(geo, network), expected) + + @patch(__test_target__ + ".fetch_json") + def test_get_data(self, MockFlusurvLocation): + MockFlusurvLocation.return_value = location_api_result + + season_api_fetcher = api_fetcher + season_api_fetcher.metadata.seasonids = [30, 49] + + self.assertEqual(season_api_fetcher.get_data("network_all"), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10"}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10"}, + } + ) + + @patch(__test_target__ + ".fetch_json") + def test_fetch_flusurv_location(self, MockFlusurvLocation): + # API returns normal result + MockFlusurvLocation.return_value = location_api_result + self.assertEqual(api_fetcher._fetch_flusurv_location("network_all"), location_api_result) + + # API returns empty result formatted normally + empty_expected_result = {"default_data": []} + MockFlusurvLocation.return_value = empty_expected_result + with self.assertWarnsRegex(Warning, "No data was returned from the API for network_all"): + empty_data_result = api_fetcher._fetch_flusurv_location("network_all") + self.assertEqual(empty_data_result, empty_expected_result) + + # API returns "no data" result + MockFlusurvLocation.return_value = {"default_data": {"response": "No Data"}} + with self.assertWarnsRegex(Warning, "No data was returned from the API for network_all"): + no_data_result = api_fetcher._fetch_flusurv_location("network_all") + self.assertEqual(no_data_result, empty_expected_result) + + def test_group_by_epiweek(self): + self.assertEqual(api_fetcher._group_by_epiweek(metadata_result), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10"}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10"}, + } + ) + + duplicate_input_data = { + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'rate': 42, 'weeklyrate': 0.5, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'rate': 42, 'weeklyrate': 54, 'mmwrid': 2519}, + ] + } + + with self.assertWarnsRegex(Warning, "warning: Multiple rates seen for 201014"): + api_fetcher._group_by_epiweek(duplicate_input_data) + + self.assertEqual( + api_fetcher._group_by_epiweek({"default_data": []}), + defaultdict(lambda: defaultdict(lambda: None)) + ) + + @patch('builtins.print') + def test_group_by_epiweek_print_msgs(self, mock_print): + api_fetcher._group_by_epiweek(metadata_result) + mock_print.assert_called_with("found data for 4 epiweeks") + + def test_groupids_to_name(self): + ids = ( + (1, 0, 0), + (9, 0, 0), + (0, 2, 0), + (0, 0, 3), + (0, 0, 5), + (0, 0, 0), + ) + expected_list = [ + "rate_age_0", + "rate_age_7", + "rate_sex_female", + "rate_race_hisp", + "rate_race_natamer", + "rate_overall", + ] + + for (ageid, sexid, raceid), expected in zip(ids, expected_list): + self.assertEqual(api_fetcher._groupid_to_name(ageid, sexid, raceid), expected) + + with self.assertRaisesRegex(ValueError, "Ageid cannot be 6"): + api_fetcher._groupid_to_name(6, 0, 0) + with self.assertRaisesRegex(ValueError, "Expect at least two of three group ids to be 0"): + api_fetcher._groupid_to_name(1, 1, 0) + api_fetcher._groupid_to_name(0, 1, 1) + api_fetcher._groupid_to_name(1, 1, 1) + + def test_groupids_to_name(self): + input_data = api_fetcher._group_by_epiweek(metadata_result) + self.assertEqual(api_fetcher._add_sex_breakdowns_ut(input_data, "network_all"), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10"}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10"}, + } + ) + self.assertEqual(api_fetcher._add_sex_breakdowns_ut(input_data, "UT"), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10", "rate_sex_female": None, "rate_sex_male": None}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10", "rate_sex_female": None, "rate_sex_male": None}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10", "rate_sex_female": None, "rate_sex_male": None}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10", "rate_sex_female": None, "rate_sex_male": None}, + } + )