From d99d2d0c229d371598b5a9ace2551df2d380502b Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 21 Aug 2023 11:03:24 -0400 Subject: [PATCH 01/38] initial switch to new API endpoint; doesn't account for format change --- src/acquisition/flusurv/flusurv.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index 28105d933..c743433f0 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -123,11 +123,15 @@ def fetch_json(path, payload, call_count=1, requests_impl=requests): def fetch_flusurv_object(location_code): """Return decoded FluSurv JSON object for the given location.""" return fetch_json( - "PostPhase03GetData", + "PostPhase03DataTool", { "appversion": "Public", - "networkid": location_code[0], - "cacthmentid": location_code[1], + "key": "getdata", + "injson": [{ + "networkid": location_code[0], + "cacthmentid": location_code[1], + "seasonid": seasonid + }], }, ) @@ -210,7 +214,10 @@ def get_current_issue(): """Scrape the current issue from the FluSurv main page.""" # fetch - data = fetch_json("GetPhase03InitApp?appVersion=Public", None) + data = fetch_json( + "PostPhase03DataTool", + {"appversion": "Public", "key": "", "injson": []} + ) # extract date = datetime.strptime(data["loaddatetime"], "%b %d, %Y") From 70c3e16e503c355d4e8a4ef5d231475f5942d404 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 31 Aug 2023 11:39:54 -0400 Subject: [PATCH 02/38] separate fetch fn for whole api obj --- src/acquisition/flusurv/flusurv.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index c743433f0..d18fd3d9e 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -120,7 +120,7 @@ def fetch_json(path, payload, call_count=1, requests_impl=requests): return resp.json() -def fetch_flusurv_object(location_code): +def fetch_flusurv_location(location_code): """Return decoded FluSurv JSON object for the given location.""" return fetch_json( "PostPhase03DataTool", @@ -135,6 +135,13 @@ def fetch_flusurv_object(location_code): }, ) +def fetch_flusurv_object(): + """Return raw FluSurv JSON object for all locations.""" + return fetch_json( + "PostPhase03DataTool", + {"appversion": "Public", "key": "", "injson": []} + ) + def mmwrid_to_epiweek(mmwrid): """Convert a CDC week index into an epiweek.""" @@ -199,7 +206,7 @@ def get_data(location_code): # fetch print("[fetching flusurv data...]") - data_in = fetch_flusurv_object(location_code) + data_in = fetch_flusurv_location(location_code) # extract print("[extracting values...]") From 3580f3fdaa2d0b68de3ffb51b7c639720d58f9e8 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 31 Aug 2023 11:42:44 -0400 Subject: [PATCH 03/38] get_current_issue to use existing json response --- src/acquisition/flusurv/flusurv.py | 13 +++++-------- src/acquisition/flusurv/flusurv_update.py | 4 +++- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index d18fd3d9e..0db2d6be0 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -217,15 +217,12 @@ def get_data(location_code): return data_out -def get_current_issue(): - """Scrape the current issue from the FluSurv main page.""" - - # fetch - data = fetch_json( - "PostPhase03DataTool", - {"appversion": "Public", "key": "", "injson": []} - ) +def get_current_issue(data): + """ + Extract the current issue from the FluSurv API result. + data: dictionary representing a JSON response from the FluSurv API + """ # extract date = datetime.strptime(data["loaddatetime"], "%b %d, %Y") diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 1aa8e9885..2e481ac75 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -175,8 +175,10 @@ def main(): # fmt: on args = parser.parse_args() + data = fetch_flusurv_object() + # scrape current issue from the main page - issue = flusurv.get_current_issue() + issue = flusurv.get_current_issue(data) print(f"current issue: {int(issue)}") # fetch flusurv data From 9cb1da40d2f1a0588f3fed8cfb4712cb191629cf Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 1 Sep 2023 15:36:48 -0400 Subject: [PATCH 04/38] cleanup names and comments - rename input arg to `update` to avoid reassignment later - comment and reuse args_insert - spelling - comment magic constant used in output format - rename location-network/catchmentid map --- src/acquisition/flusurv/flusurv.py | 15 ++++++++++----- src/acquisition/flusurv/flusurv_update.py | 12 ++++++------ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index 0db2d6be0..2b0d3c36a 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -3,7 +3,7 @@ === Purpose === =============== -Fetches FluSurv-NET data (flu hospitaliation rates) from CDC. Unlike the other +Fetches FluSurv-NET data (flu hospitalization rates) from CDC. Unlike the other CDC-hosted datasets (e.g. FluView), FluSurv is not available as a direct download. This program emulates web browser requests for the web app and extracts data of interest from the JSON response. @@ -49,7 +49,7 @@ # all currently available FluSurv locations and their associated codes # the number pair represents NetworkID and CatchmentID -location_codes = { +location_to_code = { "CA": (2, 1), "CO": (2, 2), "CT": (2, 3), @@ -155,7 +155,7 @@ def mmwrid_to_epiweek(mmwrid): def extract_from_object(data_in): """ - Given a FluSurv data object, return hospitaliation rates. + Given a FluSurv data object, return hospitalization rates. The returned object is indexed first by epiweek, then by zero-indexed age group. @@ -171,11 +171,16 @@ def extract_from_object(data_in): # capture as-of-yet undefined age groups 10, 11, and 12 continue age_index = obj["age"] - 1 - # iterage over weeks + # iterate over weeks for mmwrid, _, _, rate in obj["data"]: epiweek = mmwrid_to_epiweek(mmwrid) if epiweek not in data_out: # weekly rate of each age group + # TODO what is this magic constant? Maybe total # of age + # groups?? Appears to be assuming that age groups are + # numbered sequentially. Better to store data_out in a + # dictionary of dictionaries, given new age group ids + # (e.g. 99, 21, etc) data_out[epiweek] = [None] * 9 prev_rate = data_out[epiweek][age_index] if prev_rate is None: @@ -201,7 +206,7 @@ def get_data(location_code): This method performs the following operations: - fetches FluSurv data from CDC - - extracts and returns hospitaliation rates + - extracts and returns hospitalization rates """ # fetch diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 2e481ac75..37557038a 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -90,17 +90,16 @@ def get_rows(cur): return num -def update(issue, location_name, test_mode=False): +def update(issue, location, test_mode=False): """Fetch and store the currently avialble weekly FluSurv dataset.""" # fetch data - location_code = flusurv.location_codes[location_name] - print("fetching data for", location_name, location_code) + location_code = flusurv.location_to_code[location] + print("fetching data for", location, location_code) data = flusurv.get_data(location_code) # metadata epiweeks = sorted(data.keys()) - location = location_name release_date = str(EpiDate.today()) # connect to the database @@ -142,8 +141,9 @@ def update(issue, location_name, test_mode=False): # values (including duplicates) were stored on each run. continue args_meta = [release_date, issue, epiweek, location, lag] + # List of values in order of columns specified in sql statement above args_insert = data[epiweek] - args_update = [release_date] + data[epiweek] + args_update = [release_date] + args_insert cur.execute(sql, tuple(args_meta + args_insert + args_update)) # commit and disconnect @@ -184,7 +184,7 @@ def main(): # fetch flusurv data if args.location == "all": # all locations - for location in flusurv.location_codes.keys(): + for location in flusurv.location_to_code.keys(): update(issue, location, args.test) else: # single location From 9bf3c82381f550b43f4940ce0d06f92d7b850059 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 1 Sep 2023 18:17:00 -0400 Subject: [PATCH 05/38] store extracted rates by strata name rather than position Previously, age strata were numbered sequentially which allowed us to store rate values by position in a list. With the introduction of the new strata, this system is not robust enough to track all the different groups (e.g. ageids are no longer sequential and there are now race and sex groupings with separate numbering systems). --- src/acquisition/flusurv/flusurv.py | 39 ++++++++++++----------- src/acquisition/flusurv/flusurv_update.py | 2 +- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index 2b0d3c36a..539b43623 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -36,6 +36,7 @@ """ # standard library +from collections import defaultdict from datetime import datetime import json import time @@ -161,38 +162,38 @@ def extract_from_object(data_in): group. """ - # an object to hold the result - data_out = {} + # Create output object + # First layer of keys is epiweeks. Second layer of keys is age groups + # (by id, not age). + # + # If a top-level key doesn't already exist, create a new empty dict. + # If a secondary key doesn't already exist, create a new dict. Default + # value is None if not provided. + data_out = defaultdict(lambda: defaultdict(lambda: None)) # iterate over all seasons and age groups for obj in data_in["busdata"]["dataseries"]: - if obj["age"] in (10, 11, 12): + age_group = obj["age"] + if age_group in (10, 11, 12): # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242): # capture as-of-yet undefined age groups 10, 11, and 12 continue - age_index = obj["age"] - 1 # iterate over weeks for mmwrid, _, _, rate in obj["data"]: epiweek = mmwrid_to_epiweek(mmwrid) - if epiweek not in data_out: - # weekly rate of each age group - # TODO what is this magic constant? Maybe total # of age - # groups?? Appears to be assuming that age groups are - # numbered sequentially. Better to store data_out in a - # dictionary of dictionaries, given new age group ids - # (e.g. 99, 21, etc) - data_out[epiweek] = [None] * 9 - prev_rate = data_out[epiweek][age_index] + prev_rate = data_out[epiweek][age_group] if prev_rate is None: - # this is the first time to see a rate for this epiweek/age - data_out[epiweek][age_index] = rate + # this is the first time to see a rate for this epiweek-age + # group combo + data_out[epiweek][age_group] = rate elif prev_rate != rate: - # a different rate was already found for this epiweek/age - format_args = (epiweek, obj["age"], prev_rate, rate) + # a different rate was already found for this epiweek-age + # group combo + format_args = (epiweek, age_group, prev_rate, rate) print("warning: %d %d %f != %f" % format_args) - # sanity check the result - if len(data_out) == 0: + # Sanity check the result. We expect to have seen some epiweeks + if len(data_out.keys()) == 0: raise Exception("no data found") # print the result and return flu data diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 37557038a..8bf90ade6 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -142,7 +142,7 @@ def update(issue, location, test_mode=False): continue args_meta = [release_date, issue, epiweek, location, lag] # List of values in order of columns specified in sql statement above - args_insert = data[epiweek] + args_insert = [week_rate_tuple[1] for week_rate_tuple in sorted(data[epiweek].items())] args_update = [release_date] + args_insert cur.execute(sql, tuple(args_meta + args_insert + args_update)) From cea25ab65fbad1e459c878f31e4851c38c9f7d78 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 6 Sep 2023 15:36:59 -0400 Subject: [PATCH 06/38] define function to convert json obs to dict grouped by location and epiweek --- src/acquisition/flusurv/flusurv.py | 97 +++++++++++++++++++----------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index 539b43623..ed21aa011 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -40,6 +40,7 @@ from datetime import datetime import json import time +from warnings import warn # third party import requests @@ -154,50 +155,74 @@ def mmwrid_to_epiweek(mmwrid): return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() -def extract_from_object(data_in): +def reformat_to_nested(data): """ - Given a FluSurv data object, return hospitalization rates. + Convert the default data object into a dictionary grouped by location and epiweek - The returned object is indexed first by epiweek, then by zero-indexed age - group. + Arg data is a list of dictionaries of the format + [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + ... + ] + + This object is stored as the value associated with the 'default_data' key in the + GRASP API response object, as fetched with 'fetch_flusurv_object()' + + Returns a dictionary of the format + { + : { + : { + : , + ... + : , + ... + } + ... + } + ... + } """ + # Sanity check the input. We expect to see some epiweeks + if len(data["default_data"]) == 0: + raise Exception("no data found") # Create output object - # First layer of keys is epiweeks. Second layer of keys is age groups - # (by id, not age). + # First layer of keys is locations. Second layer of keys is epiweeks. + # Third layer of keys is groups (by id, not age in years, sex abbr, etc). # # If a top-level key doesn't already exist, create a new empty dict. - # If a secondary key doesn't already exist, create a new dict. Default - # value is None if not provided. - data_out = defaultdict(lambda: defaultdict(lambda: None)) - - # iterate over all seasons and age groups - for obj in data_in["busdata"]["dataseries"]: - age_group = obj["age"] - if age_group in (10, 11, 12): - # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242): - # capture as-of-yet undefined age groups 10, 11, and 12 - continue - # iterate over weeks - for mmwrid, _, _, rate in obj["data"]: - epiweek = mmwrid_to_epiweek(mmwrid) - prev_rate = data_out[epiweek][age_group] - if prev_rate is None: - # this is the first time to see a rate for this epiweek-age - # group combo - data_out[epiweek][age_group] = rate - elif prev_rate != rate: - # a different rate was already found for this epiweek-age - # group combo - format_args = (epiweek, age_group, prev_rate, rate) - print("warning: %d %d %f != %f" % format_args) - - # Sanity check the result. We expect to have seen some epiweeks + # If a secondary key doesn't already exist, create a new empty dict. + # If a tertiary key doesn't already exist, create a new key with a + # default value of None if not provided. + data_out = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None))) + + for obs in data["default_data"]: + epiweek = mmwrid_to_epiweek(obs["mmwrid"]) + location = code_to_location[(obs["networkid"], obs["catchmentid"])] + groupname = groupids_to_name((obs["ageid"], obs["sexid"], obs["raceid"])) + + prev_rate = data_out[location][epiweek][groupname] + if prev_rate is None: + # this is the first time to see a rate for this location-epiweek- + # group combo + data_out[location][epiweek][groupname] = rate + elif prev_rate != rate: + # Skip and warn + # a different rate was already found for this location-epiweek- + # group combo + warn((f"warning: Multiple rates seen for {location} {epiweek} " + f"{groupname}, but previous value {prev_rate} does not " + f"equal new value {rate}. Using the first value.")) + + # Sanity check the input. We expect to have populated our dictionary if len(data_out.keys()) == 0: - raise Exception("no data found") + raise Exception("no data loaded") + + print(f"found data for {len(data_out.keys())} locations") + print(f"found data for {len(data_out[location].keys())} epiweeks for {location}") - # print the result and return flu data - print(f"found data for {len(data_out)} weeks") return data_out @@ -216,7 +241,7 @@ def get_data(location_code): # extract print("[extracting values...]") - data_out = extract_from_object(data_in) + data_out = reformat_to_nested(data_in) # return print("[scraped successfully]") From 24dc0889e7af87148d8caa5b554f091f81b0ef47 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 6 Sep 2023 16:18:51 -0400 Subject: [PATCH 07/38] auto-map from valueids to ordinal and label-based group names --- src/acquisition/flusurv/flusurv.py | 102 ++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 15 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index ed21aa011..f52a3d38b 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -159,16 +159,8 @@ def reformat_to_nested(data): """ Convert the default data object into a dictionary grouped by location and epiweek - Arg data is a list of dictionaries of the format - [ - {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, - {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, - {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, - ... - ] - - This object is stored as the value associated with the 'default_data' key in the - GRASP API response object, as fetched with 'fetch_flusurv_object()' + Args: + A GRASP API response object, as fetched with 'fetch_flusurv_object()' Returns a dictionary of the format { @@ -188,6 +180,8 @@ def reformat_to_nested(data): if len(data["default_data"]) == 0: raise Exception("no data found") + id_label_map = make_id_label_map(data) + # Create output object # First layer of keys is locations. Second layer of keys is epiweeks. # Third layer of keys is groups (by id, not age in years, sex abbr, etc). @@ -198,20 +192,30 @@ def reformat_to_nested(data): # default value of None if not provided. data_out = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None))) + # data["default_data"] is a list of dictionaries, with the format + # [ + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + # ... + # ] for obs in data["default_data"]: epiweek = mmwrid_to_epiweek(obs["mmwrid"]) location = code_to_location[(obs["networkid"], obs["catchmentid"])] - groupname = groupids_to_name((obs["ageid"], obs["sexid"], obs["raceid"])) + groupname = groupids_to_name( + ageid = obs["ageid"], sexid = obs["sexid"], raceid = obs["raceid"], + id_label_map = id_label_map + ) + rate = obs["weeklyrate"] prev_rate = data_out[location][epiweek][groupname] if prev_rate is None: - # this is the first time to see a rate for this location-epiweek- + # This is the first time to see a rate for this location-epiweek- # group combo data_out[location][epiweek][groupname] = rate elif prev_rate != rate: - # Skip and warn - # a different rate was already found for this location-epiweek- - # group combo + # Skip and warn; a different rate was already found for this + # location-epiweek-group combo warn((f"warning: Multiple rates seen for {location} {epiweek} " f"{groupname}, but previous value {prev_rate} does not " f"equal new value {rate}. Using the first value.")) @@ -221,6 +225,8 @@ def reformat_to_nested(data): raise Exception("no data loaded") print(f"found data for {len(data_out.keys())} locations") + # Just check one location to avoid iterating through the entire + # dictionary. print(f"found data for {len(data_out[location].keys())} epiweeks for {location}") return data_out @@ -259,3 +265,69 @@ def get_current_issue(data): # convert and return return EpiDate(date.year, date.month, date.day).get_ew() + + +def make_id_label_map(data): + """Create a map from valueid to group description""" + id_to_label = defaultdict(lambda: defaultdict(lambda: None)) + for group in data["master_lookup"]: + # Skip "overall" group + if group["Variable"] is None: + continue + id_to_label[group["Variable"]][group["valueid"]] = group["Label"].replace( + " ", "" + ).replace( + "/", "" + ).replace( + "-", "t" + ).replace( + "yr", "" + ).lower() + + return id_to_label + + +def groupids_to_name(ageid, sexid, raceid, id_label_map): + # Expect at least 2 of three ids to be 0 + assert (ageid, sexid, raceid).count(0) >= 2, \ + "At most one groupid can be non-zero" + if (ageid, sexid, raceid).count(0) == 3: + group = "overall" + elif ageid != 0: + # The column names used in the DB for the original age groups + # are ordinal, such that: + # "rate_age_0" corresponds to age group 1, 0-4 yr + # "rate_age_1" corresponds to age group 2, 5-17 yr + # "rate_age_2" corresponds to age group 3, 18-49 yr + # "rate_age_3" corresponds to age group 4, 50-64 yr + # "rate_age_4" corresponds to age group 5, 65+ yr + # "rate_age_5" corresponds to age group 7, 65-74 yr + # "rate_age_6" corresponds to age group 8, 75-84 yr + # "rate_age_7" corresponds to age group 9, 85+ yr + # + # Group 6 was the "overall" category and not included in the + # ordinal naming scheme. Because of that, groups 1-5 have column + # ids equal to the ageid - 1; groups 7-9 have column ids equal + # to ageid - 2. + # + # Automatically map from ageids 1-9 to column ids to match + # the historical convention. + if ageid <= 5: + age_group = str(ageid - 1) + elif ageid == 6: + # Ageid of 6 used to be used for the "overall" category. + # Now "overall" is represented by a valueid of 0, and ageid of 6 + # is not used for any group. If we see an ageid of 6, something + # has gone wrong. + raise ValueError("Ageid cannot be 6; please check for changes in the API") + elif ageid <= 9: + age_group = str(ageid - 2) + else: + age_group = id_label_map["Age"][ageid] + group = "age_" + age_group + elif sexid != 0: + group = "sex_" + id_label_map["Sex"][sexid] + elif raceid != 0: + group = "race_" + id_label_map["Race"][raceid] + + return "rate_" + group From 26eff97c9978f9e0907e77b8c1a12990aa560bae Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 6 Sep 2023 17:25:09 -0400 Subject: [PATCH 08/38] add new strata to sql insert statement by name, not order --- src/acquisition/flusurv/flusurv_update.py | 176 +++++++++++++++++++--- 1 file changed, 157 insertions(+), 19 deletions(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 8bf90ade6..8851a686d 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -112,39 +112,177 @@ def update(issue, location, test_mode=False): # SQL for insert/update sql = """ INSERT INTO `flusurv` ( - `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`, - `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`, - `rate_age_5`, `rate_age_6`, `rate_age_7` + `release_date`, + `issue`, + `epiweek`, + `location`, + `lag`, + + `rate_overall`, + + `rate_age_0`, + `rate_age_1`, + `rate_age_2`, + `rate_age_3`, + `rate_age_4`, + `rate_age_5`, + `rate_age_6`, + `rate_age_7`, + + `rate_age_18t29`, + `rate_age_30t39`, + `rate_age_40t49`, + `rate_age_5t11`, + `rate_age_12t17`, + `rate_age_lt18`, + `rate_age_gte18`, + + `rate_race_white`, + `rate_race_black`, + `rate_race_hisp`, + `rate_race_asian`, + `rate_race_natamer`, + + `rate_sex_male`, + `rate_sex_female` ) VALUES ( - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s + %(release_date)s, + %(issue)s, + %(epiweek)s, + %(location)s, + %(lag)s, + + %(rate_overall)s, + + %(rate_age_0)s, + %(rate_age_1)s, + %(rate_age_2)s, + %(rate_age_3)s, + %(rate_age_4)s, + %(rate_age_5)s, + %(rate_age_6)s, + %(rate_age_7)s, + + %(rate_age_18t29)s, + %(rate_age_30t39)s, + %(rate_age_40t49)s, + %(rate_age_5t11)s, + %(rate_age_12t17)s, + %(rate_age_<18)s, + %(rate_age_>=18)s, + + %(rate_race_white)s, + %(rate_race_black)s, + %(rate_race_hispaniclatino)s, + %(rate_race_asianpacificislander)s, + %(rate_race_americanindianalaskanative)s, + + %(rate_sex_male)s, + %(rate_sex_female)s ) ON DUPLICATE KEY UPDATE - `release_date` = least(`release_date`, %s), - `rate_age_0` = coalesce(%s, `rate_age_0`), - `rate_age_1` = coalesce(%s, `rate_age_1`), - `rate_age_2` = coalesce(%s, `rate_age_2`), - `rate_age_3` = coalesce(%s, `rate_age_3`), - `rate_age_4` = coalesce(%s, `rate_age_4`), - `rate_overall` = coalesce(%s, `rate_overall`), - `rate_age_5` = coalesce(%s, `rate_age_5`), - `rate_age_6` = coalesce(%s, `rate_age_6`), - `rate_age_7` = coalesce(%s, `rate_age_7`) + `release_date` = least(`release_date`, %(release_date)s), + `rate_overall` = coalesce(%(rate_overall)s, `rate_overall`), + + `rate_age_0` = coalesce(%(rate_age_0)s, `rate_age_0`), + `rate_age_1` = coalesce(%(rate_age_1)s, `rate_age_1`), + `rate_age_2` = coalesce(%(rate_age_2)s, `rate_age_2`), + `rate_age_3` = coalesce(%(rate_age_3)s, `rate_age_3`), + `rate_age_4` = coalesce(%(rate_age_4)s, `rate_age_4`), + `rate_age_5` = coalesce(%(rate_age_5)s, `rate_age_5`), + `rate_age_6` = coalesce(%(rate_age_6)s, `rate_age_6`), + `rate_age_7` = coalesce(%(rate_age_7)s, `rate_age_7`), + + `rate_age_18t29` = coalesce(%(rate_age_18t29)s, `rate_age_18t29`), + `rate_age_30t39` = coalesce(%(rate_age_30t39)s, `rate_age_30t39`), + `rate_age_40t49` = coalesce(%(rate_age_40t49)s, `rate_age_40t49`), + `rate_age_5t11` = coalesce(%(rate_age_5t11)s, `rate_age_5t11`), + `rate_age_12t17` = coalesce(%(rate_age_12t17)s, `rate_age_12t17`), + `rate_age_lt18` = coalesce(%(rate_age_<18)s, `rate_age_lt18`), + `rate_age_gte18` = coalesce(%(rate_age_>=18)s, `rate_age_gte18`), + + `rate_race_white` = coalesce(%(rate_race_white)s, `rate_race_white`), + `rate_race_black` = coalesce(%(rate_race_black)s, `rate_race_black`), + `rate_race_hisp` = coalesce(%(rate_race_hispaniclatino)s, `rate_race_hisp`), + `rate_race_asian` = coalesce(%(rate_race_asianpacificislander)s, `rate_race_asian`), + `rate_race_natamer` = coalesce(%(rate_race_americanindianalaskanative)s, `rate_race_natamer`), + + `rate_sex_male` = coalesce(%(rate_sex_male)s, `rate_sex_male`), + `rate_sex_female` = coalesce(%(rate_sex_female)s, `rate_sex_female`) """ # insert/update each row of data (one per epiweek) for epiweek in epiweeks: + # As of Sept 2023, we expect to see these 24 groups, as described in + # the top-level "master_lookup" element of the new GRASP API + # (https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response + # object: + # 'master_lookup' = [ + # {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True} + # {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + # {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + # + # {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + # {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + # {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + # {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + # {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + # + # {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + # {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + # + # {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + # ] + # + # The previous version of the GRASP API + # (https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp) + # used a different age group-id mapping, as described in the + # top-level "ages" element: + # 'ages' = [ + # {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'}, + # {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, + # {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, + # {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, + # {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, + # {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, + # {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, + # {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, + # {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'} + # ] + # + # In addition to the new age, race, and sex breakdowns, the + # group id for overall reporting has changed from 6 to 0. + n_max_expected_groups = 24 + assert len(epiweek.keys()) == n_max_expected_groups, \ + f"{location} {epiweek} data does not contain the expected {n_max_expected_groups} groups" + lag = delta_epiweeks(epiweek, issue) if lag > 52: # Ignore values older than one year, as (1) they are assumed not to # change, and (2) it would adversely affect database performance if all # values (including duplicates) were stored on each run. continue - args_meta = [release_date, issue, epiweek, location, lag] - # List of values in order of columns specified in sql statement above - args_insert = [week_rate_tuple[1] for week_rate_tuple in sorted(data[epiweek].items())] - args_update = [release_date] + args_insert - cur.execute(sql, tuple(args_meta + args_insert + args_update)) + args_meta = { + "release_date": release_date, + "issue": issue, + "epiweek": epiweek, + "location": location, + "lag": lag + } + cur.execute(sql, {**args_meta, **data[epiweek]}) # commit and disconnect rows2 = get_rows(cur) From 229a96cbde2a93b36f8eccde764ff07c25172fa8 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 12 Sep 2023 11:28:04 -0400 Subject: [PATCH 09/38] pass seasonids around to use in requests for location-specific data --- src/acquisition/flusurv/flusurv.py | 100 +++++++++++++--------- src/acquisition/flusurv/flusurv_update.py | 65 ++++++++------ 2 files changed, 98 insertions(+), 67 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index f52a3d38b..18326bbf2 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -122,23 +122,41 @@ def fetch_json(path, payload, call_count=1, requests_impl=requests): return resp.json() -def fetch_flusurv_location(location_code): - """Return decoded FluSurv JSON object for the given location.""" - return fetch_json( +def fetch_flusurv_location(location, seasonids): + """Return FluSurv JSON object for the given location.""" + location_code = location_to_code[location] + + result = fetch_json( "PostPhase03DataTool", { "appversion": "Public", "key": "getdata", - "injson": [{ - "networkid": location_code[0], - "cacthmentid": location_code[1], - "seasonid": seasonid - }], + "injson": [ + { + "networkid": location_code[0], + "catchmentid": location_code[1], + "seasonid": elem, + } for elem in seasonids], }, ) -def fetch_flusurv_object(): - """Return raw FluSurv JSON object for all locations.""" + # If no data is returned (a given seasonid is not reported, + # location codes are invalid, etc), the API returns a JSON like: + # { + # 'default_data': { + # 'response': 'No Data' + # } + # } + # + # If data is returned, then data["default_data"] is a list + # and data["default_data"]["response"] doesn't exist. + assert isinstance(result["default_data"], list) and len(result["default_data"]) > 0, \ + f"Data was not correctly returned from the API for {location}" + return result + + +def fetch_flusurv_metadata(): + """Return FluSurv JSON metadata object.""" return fetch_json( "PostPhase03DataTool", {"appversion": "Public", "key": "", "injson": []} @@ -155,12 +173,13 @@ def mmwrid_to_epiweek(mmwrid): return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() -def reformat_to_nested(data): +def group_by_epiweek(data): """ - Convert the default data object into a dictionary grouped by location and epiweek + Convert default data for a single location into an epiweek-grouped dictionary Args: - A GRASP API response object, as fetched with 'fetch_flusurv_object()' + data: The "default_data" element of a GRASP API response object, + as fetched with 'fetch_flusurv_location' or `fetch_flusurv_metadata` Returns a dictionary of the format { @@ -176,21 +195,22 @@ def reformat_to_nested(data): ... } """ + data = data["default_data"] + # Sanity check the input. We expect to see some epiweeks - if len(data["default_data"]) == 0: + if len(data) == 0: raise Exception("no data found") - id_label_map = make_id_label_map(data) + id_label_map = make_id_label_map() # Create output object - # First layer of keys is locations. Second layer of keys is epiweeks. - # Third layer of keys is groups (by id, not age in years, sex abbr, etc). + # First layer of keys is epiweeks. Second layer of keys is groups + # (by id, not age in years, sex abbr, etc). # # If a top-level key doesn't already exist, create a new empty dict. - # If a secondary key doesn't already exist, create a new empty dict. - # If a tertiary key doesn't already exist, create a new key with a + # If a secondary key doesn't already exist, create a new key with a # default value of None if not provided. - data_out = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None))) + data_out = defaultdict(lambda: defaultdict(lambda: None)) # data["default_data"] is a list of dictionaries, with the format # [ @@ -199,24 +219,23 @@ def reformat_to_nested(data): # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, # ... # ] - for obs in data["default_data"]: + for obs in data: epiweek = mmwrid_to_epiweek(obs["mmwrid"]) - location = code_to_location[(obs["networkid"], obs["catchmentid"])] groupname = groupids_to_name( ageid = obs["ageid"], sexid = obs["sexid"], raceid = obs["raceid"], id_label_map = id_label_map ) rate = obs["weeklyrate"] - prev_rate = data_out[location][epiweek][groupname] + prev_rate = data_out[epiweek][groupname] if prev_rate is None: - # This is the first time to see a rate for this location-epiweek- - # group combo - data_out[location][epiweek][groupname] = rate + # This is the first time to see a rate for this epiweek-group + # combo + data_out[epiweek][groupname] = rate elif prev_rate != rate: # Skip and warn; a different rate was already found for this - # location-epiweek-group combo - warn((f"warning: Multiple rates seen for {location} {epiweek} " + # epiweek-group combo + warn((f"warning: Multiple rates seen for {epiweek} " f"{groupname}, but previous value {prev_rate} does not " f"equal new value {rate}. Using the first value.")) @@ -224,33 +243,29 @@ def reformat_to_nested(data): if len(data_out.keys()) == 0: raise Exception("no data loaded") - print(f"found data for {len(data_out.keys())} locations") - # Just check one location to avoid iterating through the entire - # dictionary. - print(f"found data for {len(data_out[location].keys())} epiweeks for {location}") + print(f"found data for {len(data_out.keys())} epiweeks") return data_out -def get_data(location_code): +def get_data(location, seasonids): """ Fetch and parse flu data for the given location. This method performs the following operations: - - fetches FluSurv data from CDC + - filters location-specific FluSurv data from CDC API response object - extracts and returns hospitalization rates """ - # fetch print("[fetching flusurv data...]") - data_in = fetch_flusurv_location(location_code) + data_in = fetch_flusurv_location(location, seasonids) # extract - print("[extracting values...]") - data_out = reformat_to_nested(data_in) + print("[reformatting flusurv result...]") + data_out = group_by_epiweek(data_in) # return - print("[scraped successfully]") + print(f"[successfully fetched data for {location}]") return data_out @@ -258,7 +273,8 @@ def get_current_issue(data): """ Extract the current issue from the FluSurv API result. - data: dictionary representing a JSON response from the FluSurv API + Args: + data: dictionary representing a JSON response from the FluSurv API """ # extract date = datetime.strptime(data["loaddatetime"], "%b %d, %Y") @@ -267,8 +283,10 @@ def get_current_issue(data): return EpiDate(date.year, date.month, date.day).get_ew() -def make_id_label_map(data): +def make_id_label_map(): """Create a map from valueid to group description""" + data = fetch_flusurv_metadata() + id_to_label = defaultdict(lambda: defaultdict(lambda: None)) for group in data["master_lookup"]: # Skip "overall" group diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 8851a686d..ed04252f2 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -70,6 +70,7 @@ # standard library import argparse +from warnings import warn # third party import mysql.connector @@ -81,6 +82,8 @@ from delphi.utils.epiweek import delta_epiweeks +max_age_to_consider_weeks = 52 + def get_rows(cur): """Return the number of rows in the `flusurv` table.""" @@ -90,13 +93,10 @@ def get_rows(cur): return num -def update(issue, location, test_mode=False): - """Fetch and store the currently avialble weekly FluSurv dataset.""" - - # fetch data - location_code = flusurv.location_to_code[location] - print("fetching data for", location, location_code) - data = flusurv.get_data(location_code) +def update(issue, location, seasonids, test_mode=False): + """Fetch and store the currently available weekly FluSurv dataset.""" + # Fetch location-specific data + data = flusurv.get_data(location, seasonids) # metadata epiweeks = sorted(data.keys()) @@ -214,10 +214,16 @@ def update(issue, location, test_mode=False): # insert/update each row of data (one per epiweek) for epiweek in epiweeks: - # As of Sept 2023, we expect to see these 24 groups, as described in - # the top-level "master_lookup" element of the new GRASP API - # (https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response - # object: + lag = delta_epiweeks(epiweek, issue) + if lag > max_age_to_consider_weeks: + # Ignore values older than one year, as (1) they are assumed not to + # change, and (2) it would adversely affect database performance if all + # values (including duplicates) were stored on each run. + continue + + # As of Sept 2023, for new data we expect to see these 23 groups, as + # described in the top-level "master_lookup" element of the new GRASP API + # (https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response object: # 'master_lookup' = [ # {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, # {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, @@ -247,9 +253,11 @@ def update(issue, location, test_mode=False): # {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, # ] # + # All 23 strata are available starting with epiweek 200935. + # # The previous version of the GRASP API # (https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp) - # used a different age group-id mapping, as described in the + # used the following age groupid mapping, as described in the # top-level "ages" element: # 'ages' = [ # {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'}, @@ -263,18 +271,15 @@ def update(issue, location, test_mode=False): # {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'} # ] # - # In addition to the new age, race, and sex breakdowns, the - # group id for overall reporting has changed from 6 to 0. - n_max_expected_groups = 24 - assert len(epiweek.keys()) == n_max_expected_groups, \ - f"{location} {epiweek} data does not contain the expected {n_max_expected_groups} groups" + # In addition to the new age, race, and sex breakdowns, the group + # id for overall reporting has changed from 6 to 0. Ageids 1-5 + # and 7-9 retain the same the same meanings. + n_expected_groups = 23 + if len(data[epiweek].keys()) != n_expected_groups: + warnings.warn( + f"{location} {epiweek} data does not contain the expected {n_expected_groups} groups" + ) - lag = delta_epiweeks(epiweek, issue) - if lag > 52: - # Ignore values older than one year, as (1) they are assumed not to - # change, and (2) it would adversely affect database performance if all - # values (including duplicates) were stored on each run. - continue args_meta = { "release_date": release_date, "issue": issue, @@ -313,20 +318,28 @@ def main(): # fmt: on args = parser.parse_args() - data = fetch_flusurv_object() + data = flusurv.fetch_flusurv_metadata() # scrape current issue from the main page issue = flusurv.get_current_issue(data) print(f"current issue: {int(issue)}") + # Ignore seasons with all dates older than one year + seasonids = { + season_blob["seasonid"] for season_blob in data["seasons"] + if delta_epiweeks(flusurv.mmwrid_to_epiweek(season_blob["endweek"]), issue) < max_age_to_consider_weeks + } + # fetch flusurv data if args.location == "all": # all locations for location in flusurv.location_to_code.keys(): - update(issue, location, args.test) + update(issue, location, seasonids, args.test) else: # single location - update(issue, args.location, args.test) + assert args.location in flusurv.location_to_code.keys(), \ + f"Requested location {args.location} not available" + update(issue, args.location, seasonids, args.test) if __name__ == "__main__": From 1e942a5568b96229f4d16a8d79c0308d2e72cbc6 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 8 Sep 2023 15:43:09 -0400 Subject: [PATCH 10/38] include old and new example API responses --- .../flusurv/new_grasp_location_result.json | 34 ++++ src/acquisition/flusurv/new_grasp_result.json | 167 ++++++++++++++++++ .../flusurv/old_grasp_location_result.json | 42 +++++ src/acquisition/flusurv/old_grasp_result.json | 89 ++++++++++ 4 files changed, 332 insertions(+) create mode 100644 src/acquisition/flusurv/new_grasp_location_result.json create mode 100644 src/acquisition/flusurv/new_grasp_result.json create mode 100644 src/acquisition/flusurv/old_grasp_location_result.json create mode 100644 src/acquisition/flusurv/old_grasp_result.json diff --git a/src/acquisition/flusurv/new_grasp_location_result.json b/src/acquisition/flusurv/new_grasp_location_result.json new file mode 100644 index 000000000..fe4fc09d9 --- /dev/null +++ b/src/acquisition/flusurv/new_grasp_location_result.json @@ -0,0 +1,34 @@ +### New location-specific API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool?appVersion=Public +# with payload +# {"appversion": "Public", "key": "getdata", "injson": [ +# {"networkid": 1, "catchmentid": 22, "seasonid": 30 }, +# {"networkid": 1, "catchmentid": 22, "seasonid": 49} +# ]} +# The "seasonid" parameter is required. To fetch all historical data, +# each season must be listed separately. Seasonids that don't exist don't +# cause errors, they are simply ignored. +# +# However, if no listed seasonids exist for the specified location, then +# the returned JSON is: +# { +# 'default_data': { +# 'response': 'No Data' +# } +# } +{ + # Data!! Format is list of dicts. Each obs' fields are + # labelled, so we could easily convert this to dataframe. + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ... + ] +} diff --git a/src/acquisition/flusurv/new_grasp_result.json b/src/acquisition/flusurv/new_grasp_result.json new file mode 100644 index 000000000..dca59c6f0 --- /dev/null +++ b/src/acquisition/flusurv/new_grasp_result.json @@ -0,0 +1,167 @@ +### New API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool?appVersion=Public +# with payload +# {"appversion": "Public", "key": "", "injson": []} +{ + # Last data update date + 'loaddatetime': 'Aug 26, 2023', + # Text appearing on dashboard + 'app_text': [ + { + 'description': 'SplashScreenDisclaimerText', + 'text': 'The Influenza Hospitalization Surveillance Network (FluSurv-NET) conducts population-based surveillance for laboratory-confirmed influenza-associated hospitalizations in children (persons younger than 18 years) and adults. The current network covers over 70 counties in the 10 Emerging Infection..." + } + ], + # IDs for different data sources/networks + 'networks': [ + {'networkid': 1, 'name': 'FluSurv-NET'}, + {'networkid': 2, 'name': 'EIP'}, + {'networkid': 3, 'name': 'IHSP'} + ], + # IDs (network ID + catchment ID) specifying geos and data sources available + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + # Season labels, descriptions, IDs, and date ranges + 'seasons': [ + {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2012-13', 'enabled': True, 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2015-16', 'enabled': True, 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'IncludeWeeklyRatesAndStrata': True}, + ... + ], + # Descriptions of data repporting groups (age, race, sex, and overall). Careful, some of these overlap. + # some of "parentidoverlap. + 'master_lookup': [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ], + # Colors by ID, presumably used in dashboard. Appears to be only some of the age groups. Also doesn't agree with "master_lookup" above. + 'colors_qualitative': [ + {'id': 1, 'hex': '#a6cee3'}, + {'id': 2, 'hex': '#1f78b4'}, + {'id': 3, 'hex': '#b2df8a'}, + {'id': 4, 'hex': '#33a02c'}, + {'id': 5, 'hex': '#fb9a99'}, + {'id': 6, 'hex': '#e31a1c'}, + {'id': 7, 'hex': '#fdbf6f'}, + {'id': 8, 'hex': '#ff7f00'}, + {'id': 9, 'hex': '#cab2d6'}, + {'id': 10, 'hex': '#6a3d9a'}, + {'id': 12, 'hex': '#b15928'} + ], + # List of ageids available for each network+catchement combo + 'age_catchment_lookup': [ + {'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 1, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 2, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 3, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 4, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 5, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 7, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 8, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 9, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 10, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 11, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 12, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 21, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 22, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 97, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 98, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 99, 'NetworkId': 1, 'CatchmentID': 22}, + {'NetworkId': 2, 'CatchmentID': 1}, + {'ageID': 1, 'NetworkId': 2, 'CatchmentID': 1}, + {'ageID': 2, 'NetworkId': 2, 'CatchmentID': 1}, + ... + ], + # List of seasonids available for each network+catchement combo + 'season_catchment_lookup': [ + {'seasonid': 49, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 50, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 51, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 52, 'NetworkId': 1, 'CatchmentID': 22}, + ... + {'raceid': 4, 'NetworkId': 2, 'CatchmentID': 14}, + {'raceid': 5, 'NetworkId': 2, 'CatchmentID': 14}, + {'raceid': 99, 'NetworkId': 2, 'CatchmentID': 14}, + {'NetworkId': 2, 'CatchmentID': 17}, + ... + ], + # Data!! But only for the overall category. Format is list + # of dicts. Each obs' fields are labelled, so we could easily + # convert this to dataframe. + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ... + ], + # Mapping each mmwrid to a week number, season, and date. Could use this instead of our current epoch-based function. + 'mmwr': [ + ..., + {'mmwrid': 2828, 'weekend': '2016-03-12', 'weeknumber': 10, 'weekstart': '2016-03-06', 'year': 2016, 'yearweek': 201610, 'seasonid': 55, 'label': 'Mar-12-2016', 'weekendlabel': 'Mar 12, 2016', 'weekendlabel2': 'Mar-12-2016'}, + {'mmwrid': 2885, 'weekend': '2017-04-15', 'weeknumber': 15, 'weekstart': '2017-04-09', 'year': 2017, 'yearweek': 201715, 'seasonid': 56, 'label': 'Apr-15-2017', 'weekendlabel': 'Apr 15, 2017', 'weekendlabel2': 'Apr-15-2017'}, + {'mmwrid': 2911, 'weekend': '2017-10-14', 'weeknumber': 41, 'weekstart': '2017-10-08', 'year': 2017, 'yearweek': 201741, 'seasonid': 57, 'label': 'Oct-14-2017', 'weekendlabel': 'Oct 14, 2017', 'weekendlabel2': 'Oct-14-2017'}, + {'mmwrid': 2928, 'weekend': '2018-02-10', 'weeknumber': 6, 'weekstart': '2018-02-04', 'year': 2018, 'yearweek': 201806, 'seasonid': 57, 'label': 'Feb-10-2018', 'weekendlabel': 'Feb 10, 2018', 'weekendlabel2': 'Feb-10-2018'}, + {'mmwrid': 2974, 'weekend': '2018-12-29', 'weeknumber': 52, 'weekstart': '2018-12-23', 'year': 2018, 'yearweek': 201852, 'seasonid': 58, 'label': 'Dec-29-2018', 'weekendlabel': 'Dec 29, 2018', 'weekendlabel2': 'Dec-29-2018'}, + {'mmwrid': 3031, 'weekend': '2020-02-01', 'weeknumber': 5, 'weekstart': '2020-01-26', 'year': 2020, 'yearweek': 202005, 'seasonid': 59, 'label': 'Feb-01-2020', 'weekendlabel': 'Feb 01, 2020', 'weekendlabel2': 'Feb-01-2020'}, + {'mmwrid': 3037, 'weekend': '2020-03-14', 'weeknumber': 11, 'weekstart': '2020-03-08', 'year': 2020, 'yearweek': 202011, 'seasonid': 59, 'label': 'Mar-14-2020', 'weekendlabel': 'Mar 14, 2020', 'weekendlabel2': 'Mar-14-2020'}, + {'mmwrid': 3077, 'weekend': '2020-12-19', 'weeknumber': 51, 'weekstart': '2020-12-13', 'year': 2020, 'yearweek': 202051, 'seasonid': 60, 'label': 'Dec-19-2020', 'weekendlabel': 'Dec 19, 2020', 'weekendlabel2': 'Dec-19-2020'}, + {'mmwrid': 3140, 'weekend': '2022-03-05', 'weeknumber': 9, 'weekstart': '2022-02-27', 'year': 2022, 'yearweek': 202209, 'seasonid': 61, 'label': 'Mar-05-2022', 'weekendlabel': 'Mar 05, 2022', 'weekendlabel2': 'Mar-05-2022'}, + {'mmwrid': 3183, 'weekend': '2022-12-31', 'weeknumber': 52, 'weekstart': '2022-12-25', 'year': 2022, 'yearweek': 202252, 'seasonid': 62, 'label': 'Dec-31-2022', 'weekendlabel': 'Dec 31, 2022', 'weekendlabel2': 'Dec-31-2022'}, + ] +} diff --git a/src/acquisition/flusurv/old_grasp_location_result.json b/src/acquisition/flusurv/old_grasp_location_result.json new file mode 100644 index 000000000..6ef4952e8 --- /dev/null +++ b/src/acquisition/flusurv/old_grasp_location_result.json @@ -0,0 +1,42 @@ +### Old location-specific API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03GetData?appVersion=Public +# with payload +# { +# "appversion": "Public", +# "networkid": location_code[0], +# "cacthmentid": location_code[1], +# } +{ + 'busdata': { + 'datafields': ['mmwrid', 'weeknumber', 'rate', 'weeklyrate'], + "dataseries": [ + {'season': 55, 'age': 8, 'data': [[2806, 40, 0.7, 0.7], [2807, 41, 0.7, 0], [2808, 42, 1.4, 0.7], [2809, 43, 1.4, 0], [2810, 44, 2.1, 0.7], [2811, 45, 2.1, 0], [2812, 46, 2.1, 0], [2813, 47, 2.1, 0], [2814, 48, 2.8, 0.7], [2815, 49, 4.2, 1.4], [2816, 50, 4.2, 0], [2817, 51, 6.4, 2.1], [2818, 52, 8.5, 2.1], [2819, 1, 13.4, 4.9], [2820, 2, 17, 3.5], [2821, 3, 20.5, 3.5], [2822, 4, 26.2, 5.7], [2823, 5, 30.4, 4.2], [2824, 6, 40.3, 9.9], [2825, 7, 47.4, 7.1], [2826, 8, 53.7, 6.4], [2827, 9, 63.6, 9.9], [2828, 10, 74.9, 11.3], [2829, 11, 82, 7.1], [2830, 12, 91.2, 9.2], [2831, 13, 98.3, 7.1], [2832, 14, 103.9, 5.7], [2833, 15, 106.8, 2.8], [2834, 16, 109.6, 2.8], [2835, 17, 111.7, 2.1]]}, + {'season': 57, 'age': 2, 'data': [[2910, 40, 0, 0], [2911, 41, 0, 0], [2912, 42, 0, 0], [2913, 43, 0, 0], [2914, 44, 0.2, 0.2], [2915, 45, 0.4, 0.2], [2916, 46, 0.8, 0.4], [2917, 47, 0.8, 0], [2918, 48, 0.8, 0], [2919, 49, 1.3, 0.6], [2920, 50, 1.7, 0.4], [2921, 51, 1.9, 0.2], [2922, 52, 3.1, 1.1], [2923, 1, 4.8, 1.7], [2924, 2, 6.9, 2.1], [2925, 3, 7.9, 1], [2926, 4, 9.2, 1.3], [2927, 5, 10.5, 1.3], [2928, 6, 11.3, 0.8], [2929, 7, 11.5, 0.2], [2930, 8, 12.6, 1.1], [2931, 9, 13.8, 1.1], [2932, 10, 15.1, 1.3], [2933, 11, 15.9, 0.8], [2934, 12, 16.3, 0.4], [2935, 13, 16.7, 0.4], [2936, 14, 16.9, 0.2], [2937, 15, 16.9, 0], [2938, 16, 16.9, 0], [2939, 17, 17, 0.2]]}, + ... + ] + }, + 'seasons': [ + {'description': 'Season 2003-04', 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'color': 'Dark Purple', 'color_hexvalue': '#4A298B'}, + {'description': 'Season 2004-05', 'endweek': 2283, 'label': '2004-05', 'seasonid': 44, 'startweek': 2232, 'color': 'Brown', 'color_hexvalue': '#76522E'}, + {'description': 'Season 2005-06', 'endweek': 2335, 'label': '2005-06', 'seasonid': 45, 'startweek': 2284, 'color': 'Salmon', 'color_hexvalue': '#C76751'}, + {'description': 'Season 2006-07', 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'color': 'Purple', 'color_hexvalue': '#B92CC6'}, + {'description': 'Season 2007-08', 'endweek': 2439, 'label': '2007-08', 'seasonid': 47, 'startweek': 2388, 'color': 'Blue', 'color_hexvalue': '#2A44C8'}, + {'description': 'Season 2008-09', 'endweek': 2487, 'label': '2008-09', 'seasonid': 48, 'startweek': 2440, 'color': 'Green', 'color_hexvalue': '#299A42'}, + {'description': 'Season 2009-10', 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'color': 'Red', 'color_hexvalue': '#A12732'}, + {'description': 'Season 2010-11', 'endweek': 2596, 'label': '2010-11', 'seasonid': 50, 'startweek': 2545, 'color': 'Mustard', 'color_hexvalue': '#BDAA2A'}, + {'description': 'Season 2011-12', 'endweek': 2648, 'label': '2011-12', 'seasonid': 51, 'startweek': 2597, 'color': 'Light Blue', 'color_hexvalue': '#3289A4'}, + {'description': 'Season 2012-13', 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'color': 'Grey', 'color_hexvalue': '#5E5E5E'}, + {'description': 'Season 2013-14', 'endweek': 2752, 'label': '2013-14', 'seasonid': 53, 'startweek': 2701, 'color': 'Light Blue', 'color_hexvalue': '#42B5C8'}, + {'description': 'Season 2014-15', 'endweek': 2805, 'label': '2014-15', 'seasonid': 54, 'startweek': 2753, 'color': 'Mint', 'color_hexvalue': '#4EC87B'}, + {'description': 'Season 2015-16', 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'color': 'Orange', 'color_hexvalue': '#C7852E'}, + {'description': 'Season 2016-17', 'endweek': 2909, 'label': '2016-17', 'seasonid': 56, 'startweek': 2858, 'color': 'Purple', 'color_hexvalue': '#7F42A9'}, + {'description': 'Season 2017-18', 'endweek': 2961, 'label': '2017-18', 'seasonid': 57, 'startweek': 2910, 'color': 'Lime', 'color_hexvalue': '#8AC73C'}, + {'description': 'Season 2018-19', 'endweek': 3013, 'label': '2018-19', 'seasonid': 58, 'startweek': 2962, 'color': 'Brown', 'color_hexvalue': '#A06C3A'}, + {'description': 'Season 2019-20', 'endweek': 3065, 'label': '2019-20', 'seasonid': 59, 'startweek': 3014, 'color': 'Light Orange', 'color_hexvalue': '#FFCF48'} + ], + 'mmwr': [ + {'mmwrid': 2179, 'weekend': '2003-10-04', 'weeknumber': 40, 'weekstart': '2003-09-28', 'year': 2003, 'yearweek': 200340, 'seasonid': 43, 'label': '40', 'weekendlabel': 'Oct 04, 2003', 'weekendlabel2': 'Oct-04-2003'}, + {'mmwrid': 2180, 'weekend': '2003-10-11', 'weeknumber': 41, 'weekstart': '2003-10-05', 'year': 2003, 'yearweek': 200341, 'seasonid': 43, 'label': '', 'weekendlabel': 'Oct 11, 2003', 'weekendlabel2': 'Oct-11-2003'}, + {'mmwrid': 2181, 'weekend': '2003-10-18', 'weeknumber': 42, 'weekstart': '2003-10-12', 'year': 2003, 'yearweek': 200342, 'seasonid': 43, 'label': '', 'weekendlabel': 'Oct 18, 2003', 'weekendlabel2': 'Oct-18-2003'}, + ... + ] +} diff --git a/src/acquisition/flusurv/old_grasp_result.json b/src/acquisition/flusurv/old_grasp_result.json new file mode 100644 index 000000000..14fb72b66 --- /dev/null +++ b/src/acquisition/flusurv/old_grasp_result.json @@ -0,0 +1,89 @@ +### Old API response from https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp?appVersion=Public +{ + 'loaddatetime': 'Mar 20, 2021', + 'disclaimer': { + 'id': 1, + 'splashscreensisclaimersext': 'The Influenza Hospitalization Surveillance Network (FluSurv-NET) conducts population-based surveillance for laboratory-confirmed influenza-associated hospitalizations in children (persons younger than 18 years) and adults. The current network covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, and TN) and three additional states (MI, OH, and UT). The network represents approximately 8.5% of US population (~27 million people).

Cases are identified by reviewing hospital, laboratory, and admission databases and infection control logs for patients hospitalized during the influenza season with a documented positive influenza test (i.e., viral culture, direct/indirect fluorescent antibody assay (DFA/IFA), reverse transcription-polymerase chain reaction (RT-PCR), or a rapid influenza diagnostic test (RIDT)).

Data gathered are used to estimate age-specific hospitalization rates on a weekly basis, and describe characteristics of persons hospitalized with severe influenza illness. Laboratory-confirmation is dependent on clinician-ordered influenza testing. Therefore, the rates provided are likely to be underestimated as influenza-related hospitalizations can be missed, either because testing is not performed, or because cases may be attributed to other causes of pneumonia or other common influenza-related complications.

FluSurv-NET hospitalization data are preliminary and subject to change as more data become available. Please use the following citation when referencing these data: “FluView: Influenza Hospitalization Surveillance Network, Centers for Disease Control and Prevention. WEBSITE. Accessed on DATE”.', + 'pagedisclaimertext': "The Influenza Hospitalization Surveillance Network (FluSurv-NET) data are preliminary and subject to change as more data become available. \r\n All incidence rates are unadjusted. FluSurv-NET conducts population-based surveillance for laboratory-confirmed influenza associated \r\n hospitalizations in children <18 years of age (since 2003-2004 influenza season) and adults (since 2005-2006 influenza season). \r\n The FluSurv-NET covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, TN) \r\n and additional Influenza Hospitalization Surveillance Project (IHSP) states. The IHSP began during the 2009-2010 season to enhance \r\n surveillance during the 2009 H1N1 pandemic. IHSP sites included IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, \r\n and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014 season and later. Cumulative and weekly unadjusted incidence rates (per 100,000 population) are calculated using the National Center for Health Statistics' (NCHS) \r\n population estimates for the counties included in the surveillance catchment area. The rates provided are likely to be underestimated as \r\n influenza-associated hospitalizations can be missed if influenza is not suspected and tested for.", + 'surveillancehelptext': 'The Emerging Infections Program (EIP) has conducted ongoing population-based influenza-hospitalization surveillance since the 2003-2004 season. EIP sites include counties within CA, CO, CT, GA, MD, MN, NM, NY, OR, TN. The Influenza Hospitalization Surveillance Project (IHSP) began during the 2009-2010 season to enhance surveillance during the 2009 H1N1 pandemic. IHSP sites included counties within IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014, 2014-15 and 2015-2016 seasons. Together, the EIP and IHSP sites comprise the Influenza Hospitalization Surveillance Network (FluSurv-NET). The FluSurv-NET represents approximately 9% of the US population (~27 million people). FluSurv-NET hospitalization data are preliminary and subject to change as more data become available. All incidence rates are unadjusted.', + 'groupbyhelptext': 'Age Groups: During the 2003-2004 and 2004-2005 seasons, Emerging Infections Program (EIP) sites conducted population-based influenza-hospitalization surveillance for pediatric (persons younger than 18 years) cases only. Surveillance for adults began during the 2005-2006 season and all-age surveillance for laboratory-confirmed influenza associated hospitalizations has been ongoing since then. Seasons: Most calendar years can be divided into 52 epidemiologic weeks (MMWR weeks), but occasionally some years will have 53 weeks. For the Influenza Surveillance Network (FluSurv-Net), the influenza season is normally from October 1 through April 30 (MMWR Week 40-17). However, due to the onset of the influenza A (H1N1)pdm09 pandemic in spring of 2009, the 2008-2009 influenza season ended April 14, 2009 and the 2009-2010 season began September 1, 2009 (MMWR Week 35). The number of sites contributing to the influenza-hospitalization surveillance network has changed over the years. Please, refer to the Surveillance Area Help Button for more detailed information.', + 'downloaddatadisclaimertext': "The Influenza Hospitalization Surveillance Network (FluSurv-NET) data are preliminary and subject to change as more data become available. All incidence rates are unadjusted. FluSurv-NET conducts population-based surveillance for laboratory-confirmed influenza related hospitalizations in children <18 years of age (since 2003-2004 influenza season) and adults (since 2005-2006 influenza season). The FluSurv-NET covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, TN) and additional Influenza Hospitalization Surveillance Project (IHSP) states. The IHSP began during the 2009-2010 season to enhance surveillance during the 2009 H1N1 pandemic. IHSP sites included IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014 , 2014-2015 and 2015-2016 seasons. Cumulative and weekly unadjusted incidence rates (per 100,000 population) are calculated using the National Center for Health Statistics' (NCHS) population estimates for the counties included in the surveillance catchment area. The rates provided are likely to be underestimated as influenza-related hospitalizations can be missed, either because testing is not performed, or because cases may be attributed to other causes of pneumonia or other common influenza-related complications." + }, + 'networks': [ + {'networkid': 1, 'name': 'FluSurv-NET'}, + {'networkid': 2, 'name': 'EIP'}, + {'networkid': 3, 'name': 'IHSP'} + ], + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + 'seasons': [ + {'description': 'Season 2009-10', 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'color': 'Red', 'color_hexvalue': '#A12732'}, + {'description': 'Season 2010-11', 'endweek': 2596, 'label': '2010-11', 'seasonid': 50, 'startweek': 2545, 'color': 'Mustard', 'color_hexvalue': '#BDAA2A'}, + {'description': 'Season 2011-12', 'endweek': 2648, 'label': '2011-12', 'seasonid': 51, 'startweek': 2597, 'color': 'Light Blue', 'color_hexvalue': '#3289A4'}, + {'description': 'Season 2012-13', 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'color': 'Grey', 'color_hexvalue': '#5E5E5E'}, + {'description': 'Season 2013-14', 'endweek': 2752, 'label': '2013-14', 'seasonid': 53, 'startweek': 2701, 'color': 'Light Blue', 'color_hexvalue': '#42B5C8'}, + {'description': 'Season 2014-15', 'endweek': 2805, 'label': '2014-15', 'seasonid': 54, 'startweek': 2753, 'color': 'Mint', 'color_hexvalue': '#4EC87B'}, + {'description': 'Season 2015-16', 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'color': 'Orange', 'color_hexvalue': '#C7852E'}, + {'description': 'Season 2016-17', 'endweek': 2909, 'label': '2016-17', 'seasonid': 56, 'startweek': 2858, 'color': 'Purple', 'color_hexvalue': '#7F42A9'}, + {'description': 'Season 2017-18', 'endweek': 2961, 'label': '2017-18', 'seasonid': 57, 'startweek': 2910, 'color': 'Lime', 'color_hexvalue': '#8AC73C'}, + {'description': 'Season 2018-19', 'endweek': 3013, 'label': '2018-19', 'seasonid': 58, 'startweek': 2962, 'color': 'Brown', 'color_hexvalue': '#A06C3A'}, + {'description': 'Season 2019-20', 'endweek': 3065, 'label': '2019-20', 'seasonid': 59, 'startweek': 3014, 'color': 'Light Orange', 'color_hexvalue': '#FFCF48'} + ], + 'ages': [ + {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'}, + {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, + {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, + {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, + {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, + {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, + {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, + {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, + {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'} + ], + 'busdata': { + 'datafields': ['mmwrid', 'weeknumber', 'rate', 'weeklyrate'], + 'dataseries': [ + ..., + {'season': 56, 'age': 4, 'data': [[2858, 40, 0.1, 0.1], [2859, 41, 0.2, 0.1], [2860, 42, 0.3, 0.1], [2861, 43, 0.5, 0.1], [2862, 44, 0.6, 0.1], [2863, 45, 0.8, 0.2], [2864, 46, 1, 0.2], [2865, 47, 1.4, 0.4], [2866, 48, 1.7, 0.4], [2867, 49, 2.3, 0.5], [2868, 50, 3.3, 1.1], [2869, 51, 4.8, 1.5], [2870, 52, 7.8, 2.9], [2871, 1, 11.7, 4], [2872, 2, 15.7, 3.9], [2873, 3, 19.5, 3.8], [2874, 4, 22.9, 3.4], [2875, 5, 26.3, 3.4], [2876, 6, 31.1, 4.8], [2877, 7, 36.1, 5], [2878, 8, 41.5, 5.3], [2879, 9, 44.7, 3.2], [2880, 10, 48, 3.3], [2881, 11, 50.9, 2.9], [2882, 12, 54, 3.1], [2883, 13, 57.3, 3.3], [2884, 14, 59.3, 2], [2885, 15, 60.8, 1.5], [2886, 16, 62, 1.2], [2887, 17, 62.7, 0.8]]}, + {'season': 55, 'age': 8, 'data': [[2806, 40, 0.6, 0.6], [2807, 41, 1, 0.4], [2808, 42, 1.3, 0.3], [2809, 43, 1.3, 0], [2810, 44, 1.8, 0.5], [2811, 45, 2.3, 0.5], [2812, 46, 2.4, 0.1], [2813, 47, 2.4, 0], [2814, 48, 2.8, 0.4], [2815, 49, 3.2, 0.4], [2816, 50, 4, 0.8], [2817, 51, 5.5, 1.6], [2818, 52, 6.4, 0.9], [2819, 1, 8, 1.6], [2820, 2, 9.6, 1.6], [2821, 3, 10.9, 1.3], [2822, 4, 12.6, 1.8], [2823, 5, 14.8, 2.2], [2824, 6, 19.6, 4.7], [2825, 7, 25.1, 5.5], [2826, 8, 30.9, 5.8], [2827, 9, 41.4, 10.5], [2828, 10, 54.2, 12.8], [2829, 11, 63.5, 9.3], [2830, 12, 73.9, 10.4], [2831, 13, 83.5, 9.6], [2832, 14, 90.3, 6.8], [2833, 15, 95.6, 5.3], [2834, 16, 100.7, 5.1], [2835, 17, 103.7, 3]]}, + {'season': 59, 'age': 11, 'data': [[3014, 40, 0, 0], [3015, 41, 0.1, 0], [3016, 42, 0.2, 0.1], [3017, 43, 0.2, 0], [3018, 44, 0.3, 0.1], [3019, 45, 0.5, 0.2], [3020, 46, 0.8, 0.3], [3021, 47, 1.1, 0.4], [3022, 48, 1.7, 0.5], [3023, 49, 2.5, 0.9], [3024, 50, 3.5, 1], [3025, 51, 4.8, 1.3], [3026, 52, 7.2, 2.4], [3027, 1, 10.2, 3], [3028, 2, 12.7, 2.5], [3029, 3, 14.7, 2], [3030, 4, 17.1, 2.3], [3031, 5, 19.9, 2.8], [3032, 6, 23.5, 3.6], [3033, 7, 25.7, 2.2], [3034, 8, 27.9, 2.2], [3035, 9, 30, 2], [3036, 10, 31.9, 1.9], [3037, 11, 33.2, 1.4], [3038, 12, 34.1, 0.8], [3039, 13, 34.2, 0.1], [3040, 14, 34.2, 0], [3041, 15, 34.2, 0], [3042, 16, 34.3, 0.1], [3043, 17, 34.3, 0]]}, + {'season': 50, 'age': 4, 'data': [[2545, 40, 0.1, 0.1], [2546, 41, 0.1, 0.1], [2547, 42, 0.2, 0.1], [2548, 43, 0.2, 0.1], [2549, 44, 0.3, 0.1], [2550, 45, 0.3, 0.1], [2551, 46, 0.6, 0.2], [2552, 47, 0.7, 0.2], [2553, 48, 0.9, 0.2], [2554, 49, 1.2, 0.3], [2555, 50, 1.5, 0.3], [2556, 51, 2, 0.5], [2557, 52, 2.7, 0.7], [2558, 1, 3.6, 0.9], [2559, 2, 4.6, 1], [2560, 3, 5.9, 1.2], [2561, 4, 7.3, 1.4], [2562, 5, 8.9, 1.6], [2563, 6, 10.5, 1.6], [2564, 7, 12.7, 2.2], [2565, 8, 15.2, 2.5], [2566, 9, 17.2, 2], [2567, 10, 18.7, 1.5], [2568, 11, 19.9, 1.3], [2569, 12, 20.7, 0.8], [2570, 13, 21.2, 0.5], [2571, 14, 21.5, 0.4], [2572, 15, 21.7, 0.2], [2573, 16, 21.8, 0.1], [2574, 17, 21.9, 0.1]]}, + {'season': 58, 'age': 1, 'data': [[2962, 40, 0.1, 0.1], [2963, 41, 0.3, 0.2], [2964, 42, 0.3, 0.1], [2965, 43, 0.5, 0.2], [2966, 44, 1, 0.5], [2967, 45, 1.4, 0.4], [2968, 46, 1.7, 0.3], [2969, 47, 2.3, 0.6], [2970, 48, 3.6, 1.3], [2971, 49, 5.5, 1.9], [2972, 50, 8.5, 3], [2973, 51, 12.3, 3.8], [2974, 52, 17.7, 5.4], [2975, 1, 21.7, 3.9], [2976, 2, 24.9, 3.2], [2977, 3, 27.4, 2.5], [2978, 4, 30.5, 3.1], [2979, 5, 34.5, 4.1], [2980, 6, 37.6, 3.1], [2981, 7, 41.6, 4], [2982, 8, 46.2, 4.6], [2983, 9, 50.2, 3.9], [2984, 10, 54.9, 4.7], [2985, 11, 59.2, 4.3], [2986, 12, 62.6, 3.4], [2987, 13, 65.3, 2.7], [2988, 14, 67.6, 2.3], [2989, 15, 68.9, 1.3], [2990, 16, 69.8, 0.9], [2991, 17, 70.9, 1]]}, + {'season': 52, 'age': 10, 'data': [[2649, 40, 0, 0], [2649, 40, 0, 0], [2650, 41, 0, 0], [2650, 41, 0, 0], [2651, 42, 0.1, 0], [2651, 42, 0, 0], [2652, 43, 0.1, 0], [2652, 43, 0, 0], [2653, 44, 0.1, 0], [2653, 44, 0, 0], [2654, 45, 0.2, 0.1], [2654, 45, 0.1, 0], [2655, 46, 0.5, 0.1], [2655, 46, 0.1, 0], [2656, 47, 0.8, 0.2], [2656, 47, 0.2, 0], [2657, 48, 1.3, 0.2], [2657, 48, 0.3, 0], [2658, 49, 2.3, 0.7], [2658, 49, 0.5, 0.1], [2659, 50, 3.7, 0.7], [2659, 50, 0.7, 0.1], [2660, 51, 5.3, 0.9], [2660, 51, 1.1, 0.2], [2661, 52, 7.4, 1.2], [2661, 52, 1.5, 0.2], [2662, 1, 9.9, 1.3], [2662, 1, 1.9, 0.2], [2663, 2, 12.2, 1.1], [2663, 2, 2.3, 0.2], [2664, 3, 14.4, 1], [2664, 3, 2.6, 0.2], [2665, 4, 16.3, 0.9], [2665, 4, 2.9, 0.2], [2666, 5, 17.8, 0.6], [2666, 5, 3.2, 0.1], [2667, 6, 19, 0.5], [2667, 6, 3.3, 0.1], [2668, 7, 20, 0.6], [2668, 7, 3.5, 0.1], [2669, 8, 21.1, 0.5], [2669, 8, 3.7, 0.1], [2670, 9, 22, 0.4], [2670, 9, 3.8, 0.1], [2671, 10, 22.8, 0.4], [2671, 10, 4, 0.1], [2672, 11, 23.4, 0.3], [2672, 11, 4.1, 0], [2673, 12, 23.8, 0.2], [2673, 12, 4.1, 0], [2674, 13, 24.2, 0.2], [2674, 13, 4.2, 0], [2675, 14, 24.6, 0.2], [2675, 14, 4.2, 0], [2676, 15, 24.9, 0.1], [2676, 15, 4.3, 0], [2677, 16, 25.1, 0.1], [2677, 16, 4.3, 0], [2678, 17, 25.2, 0.1], [2678, 17, 4.3, 0]]}, + {'season': 54, 'age': 7, 'data': [[2753, 40, 0.2, 0.2], [2754, 41, 0.4, 0.1], [2755, 42, 0.9, 0.5], [2756, 43, 1, 0.1], [2757, 44, 1.1, 0.1], [2758, 45, 1.6, 0.4], [2759, 46, 2, 0.5], [2760, 47, 2.9, 0.9], [2761, 48, 5.3, 2.3], [2762, 49, 9.9, 4.7], [2763, 50, 17.6, 7.7], [2764, 51, 30.1, 12.5], [2765, 52, 49.4, 19.3], [2766, 53, 69, 19.5], [2767, 1, 83.2, 14.2], [2768, 2, 91.4, 8.2], [2769, 3, 98.6, 7.3], [2770, 4, 104.6, 5.9], [2771, 5, 110.9, 6.3], [2772, 6, 116, 5.2], [2773, 7, 119.2, 3.1], [2774, 8, 122.3, 3.2], [2775, 9, 124.5, 2.1], [2776, 10, 127.5, 3], [2777, 11, 130.1, 2.7], [2778, 12, 132.4, 2.3], [2779, 13, 135, 2.6], [2780, 14, 137.7, 2.7], [2781, 15, 139.1, 1.4], [2782, 16, 140.3, 1.1], [2783, 17, 141, 0.8]]} + ] + }, + 'mmwr': [ + ..., + {'mmwrid': 3038, 'weekend': '2020-03-21', 'weeknumber': 12, 'weekstart': '2020-03-15', 'year': 2020, 'yearweek': 202012, 'seasonid': 59, 'label': '', 'weekendlabel': 'Mar 21, 2020', 'weekendlabel2': 'Mar-21-2020'}, + {'mmwrid': 3039, 'weekend': '2020-03-28', 'weeknumber': 13, 'weekstart': '2020-03-22', 'year': 2020, 'yearweek': 202013, 'seasonid': 59, 'label': '', 'weekendlabel': 'Mar 28, 2020', 'weekendlabel2': 'Mar-28-2020'}, + {'mmwrid': 3040, 'weekend': '2020-04-04', 'weeknumber': 14, 'weekstart': '2020-03-29', 'year': 2020, 'yearweek': 202014, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 04, 2020', 'weekendlabel2': 'Apr-04-2020'}, + {'mmwrid': 3041, 'weekend': '2020-04-11', 'weeknumber': 15, 'weekstart': '2020-04-05', 'year': 2020, 'yearweek': 202015, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 11, 2020', 'weekendlabel2': 'Apr-11-2020'}, + {'mmwrid': 3042, 'weekend': '2020-04-18', 'weeknumber': 16, 'weekstart': '2020-04-12', 'year': 2020, 'yearweek': 202016, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 18, 2020', 'weekendlabel2': 'Apr-18-2020'}, + {'mmwrid': 3043, 'weekend': '2020-04-25', 'weeknumber': 17, 'weekstart': '2020-04-19', 'year': 2020, 'yearweek': 202017, 'seasonid': 59, 'label': '17', 'weekendlabel': 'Apr 25, 2020', 'weekendlabel2': 'Apr-25-2020'} + ] +} From f8a6706eeaeb118d667e26837f7512b9d309917e Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 12 Sep 2023 19:17:28 -0400 Subject: [PATCH 11/38] flusurv tests --- src/acquisition/flusurv/flusurv.py | 4 +- tests/acquisition/flusurv/test_flusurv.py | 187 +++++++++++++++++++++- 2 files changed, 185 insertions(+), 6 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index 18326bbf2..898f453fe 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -253,8 +253,8 @@ def get_data(location, seasonids): Fetch and parse flu data for the given location. This method performs the following operations: - - filters location-specific FluSurv data from CDC API response object - - extracts and returns hospitalization rates + - fetch location-specific FluSurv data from CDC API + - extracts and returns hospitalization rates for each epiweek """ # fetch print("[fetching flusurv data...]") diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index e0c5acaad..e4f771c12 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -2,15 +2,102 @@ # standard library import unittest -from unittest.mock import MagicMock -from unittest.mock import sentinel +from unittest.mock import (MagicMock, sentinel, patch) -from delphi.epidata.acquisition.flusurv.flusurv import fetch_json +import delphi.epidata.acquisition.flusurv.flusurv as flusurv # py3tester coverage target __test_target__ = "delphi.epidata.acquisition.flusurv.flusurv" +# Example location-specific return JSON from CDC GRASP API. Contains +# partial data for "network_all" location and season 49. +network_all_example_data = { + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'rate': 42, 'weeklyrate': 0.5, 'mmwrid': 2519}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ] +} + +# Example metadata response containing "master_lookup" element only, used +# for mapping between valueids and strata descriptions +master_lookup_metadata = { + 'master_lookup': [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ], +} + +# Map derived from "master_lookup" dictionary above mapping between valueids +# by type and cleaned-up descriptions (no spaces or capital letters, etc) +id_label_map = { + "Age": { + 1: "0t4", + 2: "5t17", + 3: "18t49", + 4: "50t64", + 5: "65+", + 7: "65t74", + 8: "75t84", + 9: "85+", + 10: "18t29", + 11: "30t39", + 12: "40t49", + 21: "5t11", + 22: "12t17", + 97: "<18", + 98: ">=18", + }, + "Race": { + 1: "white", + 2: "black", + 3: "hispaniclatino", + 4: "asianpacificislander", + 5: "americanindianalaskanative", + }, + "Sex": { + 1: "male", + 2: "female", + }, +} + + class FunctionTests(unittest.TestCase): """Tests each function individually.""" @@ -28,6 +115,98 @@ def test_fetch_json(self): requests_impl = MagicMock() requests_impl.get.return_value = response_object - actual = fetch_json(path, payload, requests_impl=requests_impl) + actual = flusurv.fetch_json(path, payload, requests_impl=requests_impl) self.assertEqual(actual, sentinel.expected) + + def test_mmwrid_to_epiweek(self): + # Test epoch + self.assertEqual(flusurv.mmwrid_to_epiweek(2179), 200340) + + metadata = flusurv.fetch_flusurv_metadata() + for mmwr in metadata["mmwr"]: + self.assertEqual(flusurv.mmwrid_to_epiweek(mmwr["mmwrid"]), mmwr["yearweek"]) + + @patch(__test_target__ + ".fetch_flusurv_location") + def test_get_data(self, MockFlusurvLocation): + MockFlusurvLocation.return_value = network_all_example_data + + self.assertEqual(flusurv.get_data("network_all", [30, 49]), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hispaniclatino": 0.1}, + } + ) + + @patch(__test_target__ + ".fetch_flusurv_metadata") + def test_group_by_epiweek(self, MockFlusurvMetadata): + # Flusurv metadata is fetched by `make_id_label_map()`. + MockFlusurvMetadata.return_value = master_lookup_metadata + + input_data = network_all_example_data + self.assertEqual(flusurv.group_by_epiweek(input_data), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hispaniclatino": 0.1}, + } + ) + + duplicate_input_data = { + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'rate': 42, 'weeklyrate': 0.5, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'rate': 42, 'weeklyrate': 54, 'mmwrid': 2519}, + ] + } + + with self.assertWarnsRegex(Warning, "warning: Multiple rates seen for 201014"): + flusurv.group_by_epiweek(duplicate_input_data) + + with self.assertRaisesRegex(Exception, "no data found"): + flusurv.group_by_epiweek({"default_data": []}) + + @patch('builtins.print') + def test_group_by_epiweek_print_msgs(self, mock_print): + input_data = network_all_example_data + flusurv.group_by_epiweek(input_data) + mock_print.assert_called_with("found data for 4 epiweeks") + + def test_get_current_issue(self): + input_data = { + 'loaddatetime': 'Sep 12, 2023' + } + self.assertEqual(flusurv.get_current_issue(input_data), 202337) + + @patch(__test_target__ + ".fetch_flusurv_metadata") + def test_make_id_label_map(self, MockFlusurvMetadata): + MockFlusurvMetadata.return_value = master_lookup_metadata + self.assertEqual(flusurv.make_id_label_map(), id_label_map) + + def test_groupids_to_name(self): + ids = ( + (1, 0, 0), + (9, 0, 0), + (0, 2, 0), + (0, 0, 3), + (0, 0, 5), + (0, 0, 0), + ) + expected_list = [ + "rate_age_0", + "rate_age_7", + "rate_sex_female", + "rate_race_hispaniclatino", + "rate_race_americanindianalaskanative", + "rate_overall", + ] + + for (ageid, sexid, raceid), expected in zip(ids, expected_list): + self.assertEqual(flusurv.groupids_to_name(ageid, sexid, raceid, id_label_map), expected) + + with self.assertRaisesRegex(ValueError, "Ageid cannot be 6"): + flusurv.groupids_to_name(6, 0, 0, id_label_map) + with self.assertRaisesRegex(AssertionError, "At most one groupid can be non-zero"): + flusurv.groupids_to_name(1, 1, 0, id_label_map) + flusurv.groupids_to_name(0, 1, 1, id_label_map) + flusurv.groupids_to_name(1, 1, 1, id_label_map) From 9ac291881854cf0036f9be99ebdf9eefa9688c0f Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 15 Sep 2023 15:59:10 -0400 Subject: [PATCH 12/38] pass metadata around to reduce API calls --- src/acquisition/flusurv/flusurv.py | 14 ++++++-------- src/acquisition/flusurv/flusurv_update.py | 14 +++++++------- tests/acquisition/flusurv/test_flusurv.py | 22 ++++++++-------------- 3 files changed, 21 insertions(+), 29 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index 898f453fe..15a2f006f 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -173,7 +173,7 @@ def mmwrid_to_epiweek(mmwrid): return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() -def group_by_epiweek(data): +def group_by_epiweek(data, metadata): """ Convert default data for a single location into an epiweek-grouped dictionary @@ -201,7 +201,7 @@ def group_by_epiweek(data): if len(data) == 0: raise Exception("no data found") - id_label_map = make_id_label_map() + id_label_map = make_id_label_map(metadata) # Create output object # First layer of keys is epiweeks. Second layer of keys is groups @@ -248,7 +248,7 @@ def group_by_epiweek(data): return data_out -def get_data(location, seasonids): +def get_data(location, seasonids, metadata): """ Fetch and parse flu data for the given location. @@ -262,7 +262,7 @@ def get_data(location, seasonids): # extract print("[reformatting flusurv result...]") - data_out = group_by_epiweek(data_in) + data_out = group_by_epiweek(data_in, metadata) # return print(f"[successfully fetched data for {location}]") @@ -283,12 +283,10 @@ def get_current_issue(data): return EpiDate(date.year, date.month, date.day).get_ew() -def make_id_label_map(): +def make_id_label_map(metadata): """Create a map from valueid to group description""" - data = fetch_flusurv_metadata() - id_to_label = defaultdict(lambda: defaultdict(lambda: None)) - for group in data["master_lookup"]: + for group in metadata["master_lookup"]: # Skip "overall" group if group["Variable"] is None: continue diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index ed04252f2..87cfd582e 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -93,10 +93,10 @@ def get_rows(cur): return num -def update(issue, location, seasonids, test_mode=False): +def update(issue, location, seasonids, metadata, test_mode=False): """Fetch and store the currently available weekly FluSurv dataset.""" # Fetch location-specific data - data = flusurv.get_data(location, seasonids) + data = flusurv.get_data(location, seasonids, metadata) # metadata epiweeks = sorted(data.keys()) @@ -318,15 +318,15 @@ def main(): # fmt: on args = parser.parse_args() - data = flusurv.fetch_flusurv_metadata() + metadata = flusurv.fetch_flusurv_metadata() # scrape current issue from the main page - issue = flusurv.get_current_issue(data) + issue = flusurv.get_current_issue(metadata) print(f"current issue: {int(issue)}") # Ignore seasons with all dates older than one year seasonids = { - season_blob["seasonid"] for season_blob in data["seasons"] + season_blob["seasonid"] for season_blob in metadata["seasons"] if delta_epiweeks(flusurv.mmwrid_to_epiweek(season_blob["endweek"]), issue) < max_age_to_consider_weeks } @@ -334,12 +334,12 @@ def main(): if args.location == "all": # all locations for location in flusurv.location_to_code.keys(): - update(issue, location, seasonids, args.test) + update(issue, location, seasonids, metadata, args.test) else: # single location assert args.location in flusurv.location_to_code.keys(), \ f"Requested location {args.location} not available" - update(issue, args.location, seasonids, args.test) + update(issue, args.location, seasonids, metadata, args.test) if __name__ == "__main__": diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index e4f771c12..7ffb4dfdd 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -131,7 +131,7 @@ def test_mmwrid_to_epiweek(self): def test_get_data(self, MockFlusurvLocation): MockFlusurvLocation.return_value = network_all_example_data - self.assertEqual(flusurv.get_data("network_all", [30, 49]), { + self.assertEqual(flusurv.get_data("network_all", [30, 49], master_lookup_metadata), { 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5}, 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8}, 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5}, @@ -139,13 +139,9 @@ def test_get_data(self, MockFlusurvLocation): } ) - @patch(__test_target__ + ".fetch_flusurv_metadata") - def test_group_by_epiweek(self, MockFlusurvMetadata): - # Flusurv metadata is fetched by `make_id_label_map()`. - MockFlusurvMetadata.return_value = master_lookup_metadata - + def test_group_by_epiweek(self): input_data = network_all_example_data - self.assertEqual(flusurv.group_by_epiweek(input_data), { + self.assertEqual(flusurv.group_by_epiweek(input_data, master_lookup_metadata), { 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5}, 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8}, 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5}, @@ -161,15 +157,15 @@ def test_group_by_epiweek(self, MockFlusurvMetadata): } with self.assertWarnsRegex(Warning, "warning: Multiple rates seen for 201014"): - flusurv.group_by_epiweek(duplicate_input_data) + flusurv.group_by_epiweek(duplicate_input_data, master_lookup_metadata) with self.assertRaisesRegex(Exception, "no data found"): - flusurv.group_by_epiweek({"default_data": []}) + flusurv.group_by_epiweek({"default_data": []}, master_lookup_metadata) @patch('builtins.print') def test_group_by_epiweek_print_msgs(self, mock_print): input_data = network_all_example_data - flusurv.group_by_epiweek(input_data) + flusurv.group_by_epiweek(input_data, master_lookup_metadata) mock_print.assert_called_with("found data for 4 epiweeks") def test_get_current_issue(self): @@ -178,10 +174,8 @@ def test_get_current_issue(self): } self.assertEqual(flusurv.get_current_issue(input_data), 202337) - @patch(__test_target__ + ".fetch_flusurv_metadata") - def test_make_id_label_map(self, MockFlusurvMetadata): - MockFlusurvMetadata.return_value = master_lookup_metadata - self.assertEqual(flusurv.make_id_label_map(), id_label_map) + def test_make_id_label_map(self): + self.assertEqual(flusurv.make_id_label_map(master_lookup_metadata), id_label_map) def test_groupids_to_name(self): ids = ( From aa3dd8cffa2beb0afd4f13be4f628de813ea1007 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 15 Sep 2023 16:47:52 -0400 Subject: [PATCH 13/38] add season label as a descriptive column --- src/acquisition/flusurv/flusurv.py | 15 +++++++ src/acquisition/flusurv/flusurv_update.py | 5 ++- tests/acquisition/flusurv/test_flusurv.py | 51 +++++++++++++++-------- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index 15a2f006f..eb5c44a9e 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -202,6 +202,7 @@ def group_by_epiweek(data, metadata): raise Exception("no data found") id_label_map = make_id_label_map(metadata) + id_season_map = make_id_season_map(metadata) # Create output object # First layer of keys is epiweeks. Second layer of keys is groups @@ -221,11 +222,16 @@ def group_by_epiweek(data, metadata): # ] for obs in data: epiweek = mmwrid_to_epiweek(obs["mmwrid"]) + season = id_season_map[obs["seasonid"]] groupname = groupids_to_name( ageid = obs["ageid"], sexid = obs["sexid"], raceid = obs["raceid"], id_label_map = id_label_map ) + # Set season description. This will be overwritten every iteration, + # but should always have the same value per epiweek group. + data_out[epiweek]["season"] = season + rate = obs["weeklyrate"] prev_rate = data_out[epiweek][groupname] if prev_rate is None: @@ -303,6 +309,15 @@ def make_id_label_map(metadata): return id_to_label +def make_id_season_map(metadata): + """Create a map from seasonid to season description, in the format "YYYY-YY" """ + id_to_label = defaultdict(lambda: defaultdict(lambda: None)) + for season in metadata["seasons"]: + id_to_label[season["seasonid"]] = season["label"] + + return id_to_label + + def groupids_to_name(ageid, sexid, raceid, id_label_map): # Expect at least 2 of three ids to be 0 assert (ageid, sexid, raceid).count(0) >= 2, \ diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 87cfd582e..43473b1ec 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -117,6 +117,7 @@ def update(issue, location, seasonids, metadata, test_mode=False): `epiweek`, `location`, `lag`, + `season`, `rate_overall`, @@ -152,6 +153,7 @@ def update(issue, location, seasonids, metadata, test_mode=False): %(epiweek)s, %(location)s, %(lag)s, + %(season)s, %(rate_overall)s, @@ -275,7 +277,8 @@ def update(issue, location, seasonids, metadata, test_mode=False): # id for overall reporting has changed from 6 to 0. Ageids 1-5 # and 7-9 retain the same the same meanings. n_expected_groups = 23 - if len(data[epiweek].keys()) != n_expected_groups: + # Subtract one since we also store the season description in each epiweek value + if len(data[epiweek].keys() - 1) != n_expected_groups: warnings.warn( f"{location} {epiweek} data does not contain the expected {n_expected_groups} groups" ) diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index 7ffb4dfdd..caa86d4b9 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -31,9 +31,10 @@ ] } -# Example metadata response containing "master_lookup" element only, used -# for mapping between valueids and strata descriptions -master_lookup_metadata = { +# Example metadata response containing "master_lookup" element, used +# for mapping between valueids and strata descriptions, and "seasons" +# element, used for mapping between seasonids and season year spans. +metadata = { 'master_lookup': [ {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, @@ -62,6 +63,13 @@ {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, ], + 'seasons': [ + {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2012-13', 'enabled': True, 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2015-16', 'enabled': True, 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'IncludeWeeklyRatesAndStrata': True}, + ], } # Map derived from "master_lookup" dictionary above mapping between valueids @@ -131,21 +139,21 @@ def test_mmwrid_to_epiweek(self): def test_get_data(self, MockFlusurvLocation): MockFlusurvLocation.return_value = network_all_example_data - self.assertEqual(flusurv.get_data("network_all", [30, 49], master_lookup_metadata), { - 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5}, - 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8}, - 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5}, - 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hispaniclatino": 0.1}, + self.assertEqual(flusurv.get_data("network_all", [30, 49], metadata), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8, "season": "2009-10"}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hispaniclatino": 0.1, "season": "2009-10"}, } ) def test_group_by_epiweek(self): input_data = network_all_example_data - self.assertEqual(flusurv.group_by_epiweek(input_data, master_lookup_metadata), { - 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5}, - 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8}, - 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5}, - 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hispaniclatino": 0.1}, + self.assertEqual(flusurv.group_by_epiweek(input_data, metadata), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8, "season": "2009-10"}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hispaniclatino": 0.1, "season": "2009-10"}, } ) @@ -157,15 +165,15 @@ def test_group_by_epiweek(self): } with self.assertWarnsRegex(Warning, "warning: Multiple rates seen for 201014"): - flusurv.group_by_epiweek(duplicate_input_data, master_lookup_metadata) + flusurv.group_by_epiweek(duplicate_input_data, metadata) with self.assertRaisesRegex(Exception, "no data found"): - flusurv.group_by_epiweek({"default_data": []}, master_lookup_metadata) + flusurv.group_by_epiweek({"default_data": []}, metadata) @patch('builtins.print') def test_group_by_epiweek_print_msgs(self, mock_print): input_data = network_all_example_data - flusurv.group_by_epiweek(input_data, master_lookup_metadata) + flusurv.group_by_epiweek(input_data, metadata) mock_print.assert_called_with("found data for 4 epiweeks") def test_get_current_issue(self): @@ -175,7 +183,16 @@ def test_get_current_issue(self): self.assertEqual(flusurv.get_current_issue(input_data), 202337) def test_make_id_label_map(self): - self.assertEqual(flusurv.make_id_label_map(master_lookup_metadata), id_label_map) + self.assertEqual(flusurv.make_id_label_map(metadata), id_label_map) + + def test_make_id_season_map(self): + self.assertEqual(flusurv.make_id_season_map(metadata), { + 46: '2006-07', + 43: '2003-04', + 49: '2009-10', + 52: '2012-13', + 55: '2015-16', + }) def test_groupids_to_name(self): ids = ( From ca7e47832c3605f9b59b0e361e86525756855404 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 15 Sep 2023 17:39:59 -0400 Subject: [PATCH 14/38] move example API responses to make it clear they are not for prod use --- .../flusurv/{ => reference}/new_grasp_location_result.json | 0 src/acquisition/flusurv/{ => reference}/new_grasp_result.json | 0 .../flusurv/{ => reference}/old_grasp_location_result.json | 0 src/acquisition/flusurv/{ => reference}/old_grasp_result.json | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename src/acquisition/flusurv/{ => reference}/new_grasp_location_result.json (100%) rename src/acquisition/flusurv/{ => reference}/new_grasp_result.json (100%) rename src/acquisition/flusurv/{ => reference}/old_grasp_location_result.json (100%) rename src/acquisition/flusurv/{ => reference}/old_grasp_result.json (100%) diff --git a/src/acquisition/flusurv/new_grasp_location_result.json b/src/acquisition/flusurv/reference/new_grasp_location_result.json similarity index 100% rename from src/acquisition/flusurv/new_grasp_location_result.json rename to src/acquisition/flusurv/reference/new_grasp_location_result.json diff --git a/src/acquisition/flusurv/new_grasp_result.json b/src/acquisition/flusurv/reference/new_grasp_result.json similarity index 100% rename from src/acquisition/flusurv/new_grasp_result.json rename to src/acquisition/flusurv/reference/new_grasp_result.json diff --git a/src/acquisition/flusurv/old_grasp_location_result.json b/src/acquisition/flusurv/reference/old_grasp_location_result.json similarity index 100% rename from src/acquisition/flusurv/old_grasp_location_result.json rename to src/acquisition/flusurv/reference/old_grasp_location_result.json diff --git a/src/acquisition/flusurv/old_grasp_result.json b/src/acquisition/flusurv/reference/old_grasp_result.json similarity index 100% rename from src/acquisition/flusurv/old_grasp_result.json rename to src/acquisition/flusurv/reference/old_grasp_result.json From c1614a1fae4c71f7ba2fd0c0994f115f322f3923 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 28 Sep 2023 18:11:17 -0400 Subject: [PATCH 15/38] review cleanup --- src/acquisition/flusurv/flusurv.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index eb5c44a9e..286f1967d 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -179,7 +179,9 @@ def group_by_epiweek(data, metadata): Args: data: The "default_data" element of a GRASP API response object, - as fetched with 'fetch_flusurv_location' or `fetch_flusurv_metadata` + as fetched with 'fetch_flusurv_location' or `fetch_flusurv_metadata` + metadata: The JSON result returned from `fetch_flusurv_metadata()` + containing mappings from strata IDs and season IDs to descriptions. Returns a dictionary of the format { @@ -222,7 +224,6 @@ def group_by_epiweek(data, metadata): # ] for obs in data: epiweek = mmwrid_to_epiweek(obs["mmwrid"]) - season = id_season_map[obs["seasonid"]] groupname = groupids_to_name( ageid = obs["ageid"], sexid = obs["sexid"], raceid = obs["raceid"], id_label_map = id_label_map @@ -230,7 +231,7 @@ def group_by_epiweek(data, metadata): # Set season description. This will be overwritten every iteration, # but should always have the same value per epiweek group. - data_out[epiweek]["season"] = season + data_out[epiweek]["season"] = id_season_map[obs["seasonid"]] rate = obs["weeklyrate"] prev_rate = data_out[epiweek][groupname] @@ -275,15 +276,15 @@ def get_data(location, seasonids, metadata): return data_out -def get_current_issue(data): +def get_current_issue(metadata): """ Extract the current issue from the FluSurv API result. Args: - data: dictionary representing a JSON response from the FluSurv API + metadata: dictionary representing a JSON response from the FluSurv API """ # extract - date = datetime.strptime(data["loaddatetime"], "%b %d, %Y") + date = datetime.strptime(metadata["loaddatetime"], "%b %d, %Y") # convert and return return EpiDate(date.year, date.month, date.day).get_ew() From 19a5b425c105f07e3dd96e817a0b8e683d8832f8 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 27 Oct 2023 12:46:10 -0400 Subject: [PATCH 16/38] capitalize constant max_age --- src/acquisition/flusurv/flusurv_update.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 43473b1ec..e6de40c67 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -82,7 +82,7 @@ from delphi.utils.epiweek import delta_epiweeks -max_age_to_consider_weeks = 52 +MAX_AGE_TO_CONSIDER_WEEKS = 52 def get_rows(cur): """Return the number of rows in the `flusurv` table.""" @@ -217,7 +217,7 @@ def update(issue, location, seasonids, metadata, test_mode=False): # insert/update each row of data (one per epiweek) for epiweek in epiweeks: lag = delta_epiweeks(epiweek, issue) - if lag > max_age_to_consider_weeks: + if lag > MAX_AGE_TO_CONSIDER_WEEKS: # Ignore values older than one year, as (1) they are assumed not to # change, and (2) it would adversely affect database performance if all # values (including duplicates) were stored on each run. @@ -330,7 +330,7 @@ def main(): # Ignore seasons with all dates older than one year seasonids = { season_blob["seasonid"] for season_blob in metadata["seasons"] - if delta_epiweeks(flusurv.mmwrid_to_epiweek(season_blob["endweek"]), issue) < max_age_to_consider_weeks + if delta_epiweeks(flusurv.mmwrid_to_epiweek(season_blob["endweek"]), issue) < MAX_AGE_TO_CONSIDER_WEEKS } # fetch flusurv data From 2f18451dbcbcef0d349d8d6906331a8ca1aaa130 Mon Sep 17 00:00:00 2001 From: nmdefries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 27 Oct 2023 12:58:20 -0400 Subject: [PATCH 17/38] move paren to subtract from # of dates Co-authored-by: melange396 --- src/acquisition/flusurv/flusurv_update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index e6de40c67..7e11b39ed 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -278,7 +278,7 @@ def update(issue, location, seasonids, metadata, test_mode=False): # and 7-9 retain the same the same meanings. n_expected_groups = 23 # Subtract one since we also store the season description in each epiweek value - if len(data[epiweek].keys() - 1) != n_expected_groups: + if len(data[epiweek].keys()) - 1 != n_expected_groups: warnings.warn( f"{location} {epiweek} data does not contain the expected {n_expected_groups} groups" ) From d64c4c06f8e97aa102a4cc2677e89db2015fafca Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 27 Oct 2023 13:13:03 -0400 Subject: [PATCH 18/38] move n_exected_groups and big groupid comment to global --- src/acquisition/flusurv/flusurv_update.py | 115 +++++++++++----------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 7e11b39ed..1c309e95c 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -82,6 +82,63 @@ from delphi.utils.epiweek import delta_epiweeks +""" +As of Sept 2023, for new data we expect to see these 23 groups, as described +in the top-level "master_lookup" element, below, of the new GRASP API +(https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response object. +See `./reference/new_grasp_result.json` for a full example response. + 'master_lookup' = [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True} + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ] + +All 23 strata are available starting epiweek 200935, inclusive. + +The previous version of the GRASP API +(https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp) used the following age-id +mapping, as described in the top-level "ages" element, below. See +`./reference/old_grasp_result.json` for a full example response. + 'ages' = [ + {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'}, + {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, + {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, + {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, + {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, + {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, + {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, + {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, + {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'} + ] + +In addition to the new age, race, and sex breakdowns, the group id for overall +reporting has changed from 6 to 0. Age ids 1-5 and 7-9 retain the same the +same meanings; age id 6 is not reported. +""" +N_EXPECTED_GROUPS = 23 MAX_AGE_TO_CONSIDER_WEEKS = 52 def get_rows(cur): @@ -223,64 +280,10 @@ def update(issue, location, seasonids, metadata, test_mode=False): # values (including duplicates) were stored on each run. continue - # As of Sept 2023, for new data we expect to see these 23 groups, as - # described in the top-level "master_lookup" element of the new GRASP API - # (https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response object: - # 'master_lookup' = [ - # {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True} - # {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, - # {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, - # - # {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, - # {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - # {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, - # {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - # {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, - # - # {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - # {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, - # - # {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, - # ] - # - # All 23 strata are available starting with epiweek 200935. - # - # The previous version of the GRASP API - # (https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp) - # used the following age groupid mapping, as described in the - # top-level "ages" element: - # 'ages' = [ - # {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'}, - # {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, - # {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, - # {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, - # {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, - # {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, - # {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, - # {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, - # {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'} - # ] - # - # In addition to the new age, race, and sex breakdowns, the group - # id for overall reporting has changed from 6 to 0. Ageids 1-5 - # and 7-9 retain the same the same meanings. - n_expected_groups = 23 # Subtract one since we also store the season description in each epiweek value - if len(data[epiweek].keys()) - 1 != n_expected_groups: + if len(data[epiweek].keys()) - 1 != N_EXPECTED_GROUPS: warnings.warn( - f"{location} {epiweek} data does not contain the expected {n_expected_groups} groups" + f"{location} {epiweek} data does not contain the expected {N_EXPECTED_GROUPS} groups" ) args_meta = { From c7829ee70ef0d458cd66d233953af67fa684949e Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 27 Oct 2023 17:53:20 -0400 Subject: [PATCH 19/38] convert asserts to if-raise --- src/acquisition/flusurv/flusurv.py | 5 ++--- src/acquisition/flusurv/flusurv_update.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py index 286f1967d..bbd20314c 100644 --- a/src/acquisition/flusurv/flusurv.py +++ b/src/acquisition/flusurv/flusurv.py @@ -320,9 +320,8 @@ def make_id_season_map(metadata): def groupids_to_name(ageid, sexid, raceid, id_label_map): - # Expect at least 2 of three ids to be 0 - assert (ageid, sexid, raceid).count(0) >= 2, \ - "At most one groupid can be non-zero" + if ((ageid, sexid, raceid).count(0) < 2): + raise ValueError("Expect at least two of three group ids to be 0") if (ageid, sexid, raceid).count(0) == 3: group = "overall" elif ageid != 0: diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 1c309e95c..bc20e87f6 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -343,8 +343,8 @@ def main(): update(issue, location, seasonids, metadata, args.test) else: # single location - assert args.location in flusurv.location_to_code.keys(), \ - f"Requested location {args.location} not available" + if (args.location not in flusurv.location_to_code.keys()): + raise KeyError("Requested location {args.location} not available") update(issue, args.location, seasonids, metadata, args.test) From c91b7e84824bdeed6a0ee510f3992bc96e0a7b55 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 31 Oct 2023 17:15:51 -0400 Subject: [PATCH 20/38] for robust group checking, list all expected groups and do set diff --- src/acquisition/flusurv/flusurv_update.py | 44 ++++++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index bc20e87f6..f5a19f3de 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -138,7 +138,35 @@ reporting has changed from 6 to 0. Age ids 1-5 and 7-9 retain the same the same meanings; age id 6 is not reported. """ -N_EXPECTED_GROUPS = 23 +EXPECTED_GROUPS = ( + "rate_overall", + + "rate_age_0", + "rate_age_1", + "rate_age_2", + "rate_age_3", + "rate_age_4", + "rate_age_5", + "rate_age_6", + "rate_age_7", + + "rate_age_18t29", + "rate_age_30t39", + "rate_age_40t49", + "rate_age_5t11", + "rate_age_12t17", + "rate_age_lt18", + "rate_age_gte18", + + "rate_race_white", + "rate_race_black", + "rate_race_hisp", + "rate_race_asian", + "rate_race_natamer", + + "rate_sex_male", + "rate_sex_female" +) MAX_AGE_TO_CONSIDER_WEEKS = 52 def get_rows(cur): @@ -280,10 +308,16 @@ def update(issue, location, seasonids, metadata, test_mode=False): # values (including duplicates) were stored on each run. continue - # Subtract one since we also store the season description in each epiweek value - if len(data[epiweek].keys()) - 1 != N_EXPECTED_GROUPS: - warnings.warn( - f"{location} {epiweek} data does not contain the expected {N_EXPECTED_GROUPS} groups" + missing_expected_groups = EXPECTED_GROUPS - data[epiweek].keys() + # Remove the season description since we also store it in each epiweek obj + unexpected_groups = data[epiweek].keys() - EXPECTED_GROUPS - {"season"} + if len(missing_expected_groups) != 0: + raise Exception( + f"{location} {epiweek} data is missing group(s) {missing_expected_groups}" + ) + if len(unexpected_groups) != 0: + raise Exception( + f"{location} {epiweek} data includes new group(s) {unexpected_groups}" ) args_meta = { From 03926ddab239598f55c3e55419b11eef1c897a53 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 1 Nov 2023 12:31:50 -0400 Subject: [PATCH 21/38] separate data fetching fns into metadata and location classes --- src/acquisition/flusurv/api.py | 387 ++++++++++++++++++++++ src/acquisition/flusurv/flusurv.py | 364 -------------------- src/acquisition/flusurv/flusurv_update.py | 31 +- 3 files changed, 398 insertions(+), 384 deletions(-) create mode 100644 src/acquisition/flusurv/api.py delete mode 100644 src/acquisition/flusurv/flusurv.py diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py new file mode 100644 index 000000000..5eb4dc12a --- /dev/null +++ b/src/acquisition/flusurv/api.py @@ -0,0 +1,387 @@ +""" +=============== +=== Purpose === +=============== + +Fetches FluSurv-NET data (flu hospitalization rates) from CDC. Unlike the other +CDC-hosted datasets (e.g. FluView), FluSurv is not available as a direct +download. This program emulates web browser requests for the web app and +extracts data of interest from the JSON response. + +For unknown reasons, the server appears to provide two separate rates for any +given location, epiweek, and age group. These rates are usually identical--but +not always. When two given rates differ, the first is kept. This appears to be +the behavior of the web app, at the following location: + - https://gis.cdc.gov/GRASP/Fluview/FluView3References/Main/FluView3.js:859 + +See also: + - flusurv_update.py + - https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html + - https://wwwnc.cdc.gov/eid/article/21/9/14-1912_article + - Chaves, S., Lynfield, R., Lindegren, M., Bresee, J., & Finelli, L. (2015). + The US Influenza Hospitalization Surveillance Network. Emerging Infectious + Diseases, 21(9), 1543-1550. https://dx.doi.org/10.3201/eid2109.141912. + + +================= +=== Changelog === +================= + +2017-05-22 + * rewrite for new data source +2017-02-17 + * handle discrepancies by prefering more recent values +2017-02-03 + + initial version +""" + +# standard library +from collections import defaultdict +from datetime import datetime +import json +import time +from warnings import warn + +# third party +import requests + +# first party +from delphi.utils.epidate import EpiDate +from delphi.utils.epiweek import delta_epiweeks + + +def fetch_json(path, payload, call_count=1, requests_impl=requests): + """Send a request to the server and return the parsed JSON response.""" + + # it's polite to self-identify this "bot" + DELPHI_URL = "https://delphi.cmu.edu/index.html" + USER_AGENT = f"Mozilla/5.0 (compatible; delphibot/1.0; +{DELPHI_URL})" + + # the FluSurv AMF server + flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path + + # request headers + headers = { + "Accept-Encoding": "gzip", + "User-Agent": USER_AGENT, + } + if payload is not None: + headers["Content-Type"] = "application/json;charset=UTF-8" + + # send the request and read the response + if payload is None: + method = requests_impl.get + data = None + else: + method = requests_impl.post + data = json.dumps(payload) + resp = method(flusurv_url, headers=headers, data=data) + + # check the HTTP status code + if resp.status_code == 500 and call_count <= 2: + # the server often fails with this status, so wait and retry + delay = 10 * call_count + print(f"got status {int(resp.status_code)}, will retry in {int(delay)} sec...") + time.sleep(delay) + return fetch_json(path, payload, call_count=call_count + 1) + elif resp.status_code != 200: + raise Exception(["status code != 200", resp.status_code]) + + # check response mime type + if "application/json" not in resp.headers.get("Content-Type", ""): + raise Exception("response is not json") + + # return the decoded json object + return resp.json() + + +def mmwrid_to_epiweek(mmwrid): + """Convert a CDC week index into an epiweek.""" + + # Add the difference in IDs, which are sequential, to a reference epiweek, + # which is 2003w40 in this case. + epiweek_200340 = EpiDate(2003, 9, 28) + mmwrid_200340 = 2179 + return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() + + +class FlusurvMetadata: + def __init__(self, max_age_weeks): + self.metadata = self._fetch_flusurv_metadata() + + self.location_to_code = self._make_location_to_code_map() + self.locations = self.location_to_code.keys() + + self.id_to_group = self._make_id_group_map() + self.id_to_season = self._make_id_season_map() + + self.issue = self._get_current_issue() + self.max_age_weeks = max_age_weeks + self.seasonids = self._get_recent_seasonids() + + def _fetch_flusurv_metadata(self): + """Return FluSurv JSON metadata object.""" + return fetch_json( + "PostPhase03DataTool", + {"appversion": "Public", "key": "", "injson": []} + ) + + def _make_location_to_code_map(self): + # all currently available FluSurv locations and their associated codes + # the number pair represents NetworkID and CatchmentID + location_to_code = { + "CA": (2, 1), + "CO": (2, 2), + "CT": (2, 3), + "GA": (2, 4), + "IA": (3, 5), + "ID": (3, 6), + "MD": (2, 7), + "MI": (3, 8), + "MN": (2, 9), + "NM": (2, 11), + "NY_albany": (2, 13), + "NY_rochester": (2, 14), + "OH": (3, 15), + "OK": (3, 16), + "OR": (2, 17), + "RI": (3, 18), + "SD": (3, 19), + "TN": (2, 20), + "UT": (3, 21), + "network_all": (1, 22), + "network_eip": (2, 22), + "network_ihsp": (3, 22), + } + return location_to_code + + def fetch_location_to_code_map(self): + return self.location_to_code + + def _get_current_issue(self): + """ + Extract the current issue from the FluSurv metadata result. + + Args: + metadata: dictionary representing a JSON response from the FluSurv API + """ + # extract + date = datetime.strptime(self.metadata["loaddatetime"], "%b %d, %Y") + + # convert and return + return EpiDate(date.year, date.month, date.day).get_ew() + + def _get_recent_seasonids(self): + # Ignore seasons with all dates older than one year + seasonids = { + season_blob["seasonid"] for season_blob in self.metadata["seasons"] + if delta_epiweeks(mmwrid_to_epiweek(season_blob["endweek"]), issue) < self.max_age_weeks + } + + return seasonids + + def _make_id_group_map(self): + """Create a map from valueid to strata description""" + id_to_label = defaultdict(lambda: defaultdict(lambda: None)) + for group in self.metadata["master_lookup"]: + # Skip "overall" group + if group["Variable"] is None: + continue + id_to_label[group["Variable"]][group["valueid"]] = group["Label"].replace( + " ", "" + ).replace( + "/", "" + ).replace( + "-", "t" + ).replace( + "yr", "" + ).lower() + + return id_to_label + + + def _make_id_season_map(self): + """Create a map from seasonid to season description, in the format "YYYY-YY" """ + id_to_label = defaultdict(lambda: defaultdict(lambda: None)) + for season in self.metadata["seasons"]: + id_to_label[season["seasonid"]] = season["label"] + + return id_to_label + + +class FlusurvLocationFetcher: + def __init__(self, max_age_weeks): + self.metadata = FlusurvMetadata(max_age_weeks) + + def get_data(self, location): + """ + Fetch and parse flu data for a given location. + + This method performs the following operations: + - fetch location-specific FluSurv data from CDC API + - extracts and returns hospitalization rates for each epiweek + """ + # fetch + print("[fetching flusurv data...]") + data_in = self._fetch_flusurv_location(location, self.metadata.seasonids) + + # extract + print("[reformatting flusurv result...]") + data_out = self._group_by_epiweek(data_in) + + # return + print(f"[successfully fetched data for {location}]") + return data_out + + def _fetch_flusurv_location(self): + """Return FluSurv JSON object for a given location.""" + location_code = self.metadata.location_to_code[location] + + result = fetch_json( + "PostPhase03DataTool", + { + "appversion": "Public", + "key": "getdata", + "injson": [ + { + "networkid": location_code[0], + "catchmentid": location_code[1], + "seasonid": elem, + } for elem in self.metadata.seasonids], + }, + ) + + # If no data is returned (a given seasonid is not reported, + # location codes are invalid, etc), the API returns a JSON like: + # { + # 'default_data': { + # 'response': 'No Data' + # } + # } + # + # If data is returned, then data["default_data"] is a list + # and data["default_data"]["response"] doesn't exist. + assert isinstance(result["default_data"], list) and len(result["default_data"]) > 0, \ + f"Data was not correctly returned from the API for {location}" + return result + + def _group_by_epiweek(self, data): + """ + Convert default data for a single location into an epiweek-grouped dictionary + + Args: + data: The "default_data" element of a GRASP API response object, + as fetched with 'fetch_flusurv_location' or `fetch_flusurv_metadata` + metadata: The JSON result returned from `fetch_flusurv_metadata()` + containing mappings from strata IDs and season IDs to descriptions. + + Returns a dictionary of the format + { + : { + : { + : , + ... + : , + ... + } + ... + } + ... + } + """ + data = data["default_data"] + + # Sanity check the input. We expect to see some epiweeks + if len(data) == 0: + raise Exception("no data found") + + # Create output object + # First layer of keys is epiweeks. Second layer of keys is groups + # (by id, not age in years, sex abbr, etc). + # + # If a top-level key doesn't already exist, create a new empty dict. + # If a secondary key doesn't already exist, create a new key with a + # default value of None. + data_out = defaultdict(lambda: defaultdict(lambda: None)) + + # data["default_data"] is a list of dictionaries, with the format + # [ + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + # ... + # ] + for obs in data: + epiweek = mmwrid_to_epiweek(obs["mmwrid"]) + groupname = self._groupid_to_name( + ageid = obs["ageid"], sexid = obs["sexid"], raceid = obs["raceid"] + ) + + # Set season description. This will be overwritten every iteration, + # but should always have the same value per epiweek group. + data_out[epiweek]["season"] = self.metadata.id_season_map[obs["seasonid"]] + + rate = obs["weeklyrate"] + prev_rate = data_out[epiweek][groupname] + if prev_rate is None: + # This is the first time to see a rate for this epiweek-group + # combo + data_out[epiweek][groupname] = rate + elif prev_rate != rate: + # Skip and warn; a different rate was already found for this + # epiweek-group combo + warn((f"warning: Multiple rates seen for {epiweek} " + f"{groupname}, but previous value {prev_rate} does not " + f"equal new value {rate}. Using the first value.")) + + # Sanity check the input. We expect to have populated our dictionary + if len(data_out.keys()) == 0: + raise Exception("no data loaded") + + print(f"found data for {len(data_out.keys())} epiweeks") + + return data_out + + def _groupid_to_name(self, ageid, sexid, raceid): + if ((ageid, sexid, raceid).count(0) < 2): + raise ValueError("Expect at least two of three group ids to be 0") + if (ageid, sexid, raceid).count(0) == 3: + group = "overall" + elif ageid != 0: + # The column names used in the DB for the original age groups + # are ordinal, such that: + # "rate_age_0" corresponds to age group 1, 0-4 yr + # "rate_age_1" corresponds to age group 2, 5-17 yr + # "rate_age_2" corresponds to age group 3, 18-49 yr + # "rate_age_3" corresponds to age group 4, 50-64 yr + # "rate_age_4" corresponds to age group 5, 65+ yr + # "rate_age_5" corresponds to age group 7, 65-74 yr + # "rate_age_6" corresponds to age group 8, 75-84 yr + # "rate_age_7" corresponds to age group 9, 85+ yr + # + # Group 6 was the "overall" category and not included in the + # ordinal naming scheme. Because of that, groups 1-5 have column + # ids equal to the ageid - 1; groups 7-9 have column ids equal + # to ageid - 2. + # + # Automatically map from ageids 1-9 to column ids to match + # the historical convention. + if ageid <= 5: + age_group = str(ageid - 1) + elif ageid == 6: + # Ageid of 6 used to be used for the "overall" category. + # Now "overall" is represented by a valueid of 0, and ageid of 6 + # is not used for any group. If we see an ageid of 6, something + # has gone wrong. + raise ValueError("Ageid cannot be 6; please check for changes in the API") + elif ageid <= 9: + age_group = str(ageid - 2) + else: + age_group = self.metadata.id_group_map["Age"][ageid] + group = "age_" + age_group + elif sexid != 0: + group = "sex_" + self.metadata.id_group_map["Sex"][sexid] + elif raceid != 0: + group = "race_" + self.metadata.id_group_map["Race"][raceid] + + return "rate_" + group diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py deleted file mode 100644 index bbd20314c..000000000 --- a/src/acquisition/flusurv/flusurv.py +++ /dev/null @@ -1,364 +0,0 @@ -""" -=============== -=== Purpose === -=============== - -Fetches FluSurv-NET data (flu hospitalization rates) from CDC. Unlike the other -CDC-hosted datasets (e.g. FluView), FluSurv is not available as a direct -download. This program emulates web browser requests for the web app and -extracts data of interest from the JSON response. - -For unknown reasons, the server appears to provide two separate rates for any -given location, epiweek, and age group. These rates are usually identical--but -not always. When two given rates differ, the first is kept. This appears to be -the behavior of the web app, at the following location: - - https://gis.cdc.gov/GRASP/Fluview/FluView3References/Main/FluView3.js:859 - -See also: - - flusurv_update.py - - https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html - - https://wwwnc.cdc.gov/eid/article/21/9/14-1912_article - - Chaves, S., Lynfield, R., Lindegren, M., Bresee, J., & Finelli, L. (2015). - The US Influenza Hospitalization Surveillance Network. Emerging Infectious - Diseases, 21(9), 1543-1550. https://dx.doi.org/10.3201/eid2109.141912. - - -================= -=== Changelog === -================= - -2017-05-22 - * rewrite for new data source -2017-02-17 - * handle discrepancies by prefering more recent values -2017-02-03 - + initial version -""" - -# standard library -from collections import defaultdict -from datetime import datetime -import json -import time -from warnings import warn - -# third party -import requests - -# first party -from delphi.utils.epidate import EpiDate - - -# all currently available FluSurv locations and their associated codes -# the number pair represents NetworkID and CatchmentID -location_to_code = { - "CA": (2, 1), - "CO": (2, 2), - "CT": (2, 3), - "GA": (2, 4), - "IA": (3, 5), - "ID": (3, 6), - "MD": (2, 7), - "MI": (3, 8), - "MN": (2, 9), - "NM": (2, 11), - "NY_albany": (2, 13), - "NY_rochester": (2, 14), - "OH": (3, 15), - "OK": (3, 16), - "OR": (2, 17), - "RI": (3, 18), - "SD": (3, 19), - "TN": (2, 20), - "UT": (3, 21), - "network_all": (1, 22), - "network_eip": (2, 22), - "network_ihsp": (3, 22), -} - - -def fetch_json(path, payload, call_count=1, requests_impl=requests): - """Send a request to the server and return the parsed JSON response.""" - - # it's polite to self-identify this "bot" - delphi_url = "https://delphi.cmu.edu/index.html" - user_agent = f"Mozilla/5.0 (compatible; delphibot/1.0; +{delphi_url})" - - # the FluSurv AMF server - flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path - - # request headers - headers = { - "Accept-Encoding": "gzip", - "User-Agent": user_agent, - } - if payload is not None: - headers["Content-Type"] = "application/json;charset=UTF-8" - - # send the request and read the response - if payload is None: - method = requests_impl.get - data = None - else: - method = requests_impl.post - data = json.dumps(payload) - resp = method(flusurv_url, headers=headers, data=data) - - # check the HTTP status code - if resp.status_code == 500 and call_count <= 2: - # the server often fails with this status, so wait and retry - delay = 10 * call_count - print(f"got status {int(resp.status_code)}, will retry in {int(delay)} sec...") - time.sleep(delay) - return fetch_json(path, payload, call_count=call_count + 1) - elif resp.status_code != 200: - raise Exception(["status code != 200", resp.status_code]) - - # check response mime type - if "application/json" not in resp.headers.get("Content-Type", ""): - raise Exception("response is not json") - - # return the decoded json object - return resp.json() - - -def fetch_flusurv_location(location, seasonids): - """Return FluSurv JSON object for the given location.""" - location_code = location_to_code[location] - - result = fetch_json( - "PostPhase03DataTool", - { - "appversion": "Public", - "key": "getdata", - "injson": [ - { - "networkid": location_code[0], - "catchmentid": location_code[1], - "seasonid": elem, - } for elem in seasonids], - }, - ) - - # If no data is returned (a given seasonid is not reported, - # location codes are invalid, etc), the API returns a JSON like: - # { - # 'default_data': { - # 'response': 'No Data' - # } - # } - # - # If data is returned, then data["default_data"] is a list - # and data["default_data"]["response"] doesn't exist. - assert isinstance(result["default_data"], list) and len(result["default_data"]) > 0, \ - f"Data was not correctly returned from the API for {location}" - return result - - -def fetch_flusurv_metadata(): - """Return FluSurv JSON metadata object.""" - return fetch_json( - "PostPhase03DataTool", - {"appversion": "Public", "key": "", "injson": []} - ) - - -def mmwrid_to_epiweek(mmwrid): - """Convert a CDC week index into an epiweek.""" - - # Add the difference in IDs, which are sequential, to a reference epiweek, - # which is 2003w40 in this case. - epiweek_200340 = EpiDate(2003, 9, 28) - mmwrid_200340 = 2179 - return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() - - -def group_by_epiweek(data, metadata): - """ - Convert default data for a single location into an epiweek-grouped dictionary - - Args: - data: The "default_data" element of a GRASP API response object, - as fetched with 'fetch_flusurv_location' or `fetch_flusurv_metadata` - metadata: The JSON result returned from `fetch_flusurv_metadata()` - containing mappings from strata IDs and season IDs to descriptions. - - Returns a dictionary of the format - { - : { - : { - : , - ... - : , - ... - } - ... - } - ... - } - """ - data = data["default_data"] - - # Sanity check the input. We expect to see some epiweeks - if len(data) == 0: - raise Exception("no data found") - - id_label_map = make_id_label_map(metadata) - id_season_map = make_id_season_map(metadata) - - # Create output object - # First layer of keys is epiweeks. Second layer of keys is groups - # (by id, not age in years, sex abbr, etc). - # - # If a top-level key doesn't already exist, create a new empty dict. - # If a secondary key doesn't already exist, create a new key with a - # default value of None if not provided. - data_out = defaultdict(lambda: defaultdict(lambda: None)) - - # data["default_data"] is a list of dictionaries, with the format - # [ - # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, - # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, - # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, - # ... - # ] - for obs in data: - epiweek = mmwrid_to_epiweek(obs["mmwrid"]) - groupname = groupids_to_name( - ageid = obs["ageid"], sexid = obs["sexid"], raceid = obs["raceid"], - id_label_map = id_label_map - ) - - # Set season description. This will be overwritten every iteration, - # but should always have the same value per epiweek group. - data_out[epiweek]["season"] = id_season_map[obs["seasonid"]] - - rate = obs["weeklyrate"] - prev_rate = data_out[epiweek][groupname] - if prev_rate is None: - # This is the first time to see a rate for this epiweek-group - # combo - data_out[epiweek][groupname] = rate - elif prev_rate != rate: - # Skip and warn; a different rate was already found for this - # epiweek-group combo - warn((f"warning: Multiple rates seen for {epiweek} " - f"{groupname}, but previous value {prev_rate} does not " - f"equal new value {rate}. Using the first value.")) - - # Sanity check the input. We expect to have populated our dictionary - if len(data_out.keys()) == 0: - raise Exception("no data loaded") - - print(f"found data for {len(data_out.keys())} epiweeks") - - return data_out - - -def get_data(location, seasonids, metadata): - """ - Fetch and parse flu data for the given location. - - This method performs the following operations: - - fetch location-specific FluSurv data from CDC API - - extracts and returns hospitalization rates for each epiweek - """ - # fetch - print("[fetching flusurv data...]") - data_in = fetch_flusurv_location(location, seasonids) - - # extract - print("[reformatting flusurv result...]") - data_out = group_by_epiweek(data_in, metadata) - - # return - print(f"[successfully fetched data for {location}]") - return data_out - - -def get_current_issue(metadata): - """ - Extract the current issue from the FluSurv API result. - - Args: - metadata: dictionary representing a JSON response from the FluSurv API - """ - # extract - date = datetime.strptime(metadata["loaddatetime"], "%b %d, %Y") - - # convert and return - return EpiDate(date.year, date.month, date.day).get_ew() - - -def make_id_label_map(metadata): - """Create a map from valueid to group description""" - id_to_label = defaultdict(lambda: defaultdict(lambda: None)) - for group in metadata["master_lookup"]: - # Skip "overall" group - if group["Variable"] is None: - continue - id_to_label[group["Variable"]][group["valueid"]] = group["Label"].replace( - " ", "" - ).replace( - "/", "" - ).replace( - "-", "t" - ).replace( - "yr", "" - ).lower() - - return id_to_label - - -def make_id_season_map(metadata): - """Create a map from seasonid to season description, in the format "YYYY-YY" """ - id_to_label = defaultdict(lambda: defaultdict(lambda: None)) - for season in metadata["seasons"]: - id_to_label[season["seasonid"]] = season["label"] - - return id_to_label - - -def groupids_to_name(ageid, sexid, raceid, id_label_map): - if ((ageid, sexid, raceid).count(0) < 2): - raise ValueError("Expect at least two of three group ids to be 0") - if (ageid, sexid, raceid).count(0) == 3: - group = "overall" - elif ageid != 0: - # The column names used in the DB for the original age groups - # are ordinal, such that: - # "rate_age_0" corresponds to age group 1, 0-4 yr - # "rate_age_1" corresponds to age group 2, 5-17 yr - # "rate_age_2" corresponds to age group 3, 18-49 yr - # "rate_age_3" corresponds to age group 4, 50-64 yr - # "rate_age_4" corresponds to age group 5, 65+ yr - # "rate_age_5" corresponds to age group 7, 65-74 yr - # "rate_age_6" corresponds to age group 8, 75-84 yr - # "rate_age_7" corresponds to age group 9, 85+ yr - # - # Group 6 was the "overall" category and not included in the - # ordinal naming scheme. Because of that, groups 1-5 have column - # ids equal to the ageid - 1; groups 7-9 have column ids equal - # to ageid - 2. - # - # Automatically map from ageids 1-9 to column ids to match - # the historical convention. - if ageid <= 5: - age_group = str(ageid - 1) - elif ageid == 6: - # Ageid of 6 used to be used for the "overall" category. - # Now "overall" is represented by a valueid of 0, and ageid of 6 - # is not used for any group. If we see an ageid of 6, something - # has gone wrong. - raise ValueError("Ageid cannot be 6; please check for changes in the API") - elif ageid <= 9: - age_group = str(ageid - 2) - else: - age_group = id_label_map["Age"][ageid] - group = "age_" + age_group - elif sexid != 0: - group = "sex_" + id_label_map["Sex"][sexid] - elif raceid != 0: - group = "race_" + id_label_map["Race"][raceid] - - return "rate_" + group diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index f5a19f3de..c17f74a80 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -76,7 +76,7 @@ import mysql.connector # first party -from delphi.epidata.acquisition.flusurv import flusurv +from delphi.epidata.acquisition.flusurv.flusurv import FlusurvLocationFetcher import delphi.operations.secrets as secrets from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks @@ -178,10 +178,10 @@ def get_rows(cur): return num -def update(issue, location, seasonids, metadata, test_mode=False): +def update(fetcher, location, test_mode=False): """Fetch and store the currently available weekly FluSurv dataset.""" # Fetch location-specific data - data = flusurv.get_data(location, seasonids, metadata) + data = fetcher.get_data(location) # metadata epiweeks = sorted(data.keys()) @@ -301,7 +301,7 @@ def update(issue, location, seasonids, metadata, test_mode=False): # insert/update each row of data (one per epiweek) for epiweek in epiweeks: - lag = delta_epiweeks(epiweek, issue) + lag = delta_epiweeks(epiweek, fetcher.metadata.issue) if lag > MAX_AGE_TO_CONSIDER_WEEKS: # Ignore values older than one year, as (1) they are assumed not to # change, and (2) it would adversely affect database performance if all @@ -322,7 +322,7 @@ def update(issue, location, seasonids, metadata, test_mode=False): args_meta = { "release_date": release_date, - "issue": issue, + "issue": fetcher.metadata.issue, "epiweek": epiweek, "location": location, "lag": lag @@ -358,28 +358,19 @@ def main(): # fmt: on args = parser.parse_args() - metadata = flusurv.fetch_flusurv_metadata() - - # scrape current issue from the main page - issue = flusurv.get_current_issue(metadata) - print(f"current issue: {int(issue)}") - - # Ignore seasons with all dates older than one year - seasonids = { - season_blob["seasonid"] for season_blob in metadata["seasons"] - if delta_epiweeks(flusurv.mmwrid_to_epiweek(season_blob["endweek"]), issue) < MAX_AGE_TO_CONSIDER_WEEKS - } + fetcher = FlusurvLocationFetcher(MAX_AGE_TO_CONSIDER_WEEKS) + print(f"current issue: {int(fetcher.metadata.issue)}") # fetch flusurv data if args.location == "all": # all locations - for location in flusurv.location_to_code.keys(): - update(issue, location, seasonids, metadata, args.test) + for location in fetcher.metadata.locations: + update(fetcher, location, args.test) else: # single location - if (args.location not in flusurv.location_to_code.keys()): + if (args.location not in fetcher.metadata.locations): raise KeyError("Requested location {args.location} not available") - update(issue, args.location, seasonids, metadata, args.test) + update(fetcher, args.location, args.test) if __name__ == "__main__": From c5d8d4ecf88d0de15cc8e74298220bf38cf55072 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 2 Nov 2023 11:23:40 -0400 Subject: [PATCH 22/38] programmatically create location-networkid map; create constants file --- src/acquisition/flusurv/api.py | 62 +++++++------ src/acquisition/flusurv/constants.py | 106 ++++++++++++++++++++++ src/acquisition/flusurv/flusurv_update.py | 88 +----------------- 3 files changed, 141 insertions(+), 115 deletions(-) create mode 100644 src/acquisition/flusurv/constants.py diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index 5eb4dc12a..c1f93dc9b 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -48,6 +48,7 @@ # first party from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks +from constants import (MAP_REGION_NAMES_TO_ABBR, MAP_ENTIRE_NETWORK_NAMES) def fetch_json(path, payload, call_count=1, requests_impl=requests): @@ -98,8 +99,8 @@ def fetch_json(path, payload, call_count=1, requests_impl=requests): def mmwrid_to_epiweek(mmwrid): """Convert a CDC week index into an epiweek.""" - # Add the difference in IDs, which are sequential, to a reference epiweek, - # which is 2003w40 in this case. + # Add the difference in IDs, which are sequential, to a reference + # epiweek, which is 2003w40 in this case. epiweek_200340 = EpiDate(2003, 9, 28) mmwrid_200340 = 2179 return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() @@ -126,33 +127,38 @@ def _fetch_flusurv_metadata(self): {"appversion": "Public", "key": "", "injson": []} ) + def _location_name_to_abbr(self, geo, network): + """Find short geo name corresponding to a geo and network""" + if geo == "Entire Network": + return MAP_ENTIRE_NETWORK_NAMES[network] + else: + return MAP_REGION_NAMES_TO_ABBR[geo] + def _make_location_to_code_map(self): - # all currently available FluSurv locations and their associated codes - # the number pair represents NetworkID and CatchmentID - location_to_code = { - "CA": (2, 1), - "CO": (2, 2), - "CT": (2, 3), - "GA": (2, 4), - "IA": (3, 5), - "ID": (3, 6), - "MD": (2, 7), - "MI": (3, 8), - "MN": (2, 9), - "NM": (2, 11), - "NY_albany": (2, 13), - "NY_rochester": (2, 14), - "OH": (3, 15), - "OK": (3, 16), - "OR": (2, 17), - "RI": (3, 18), - "SD": (3, 19), - "TN": (2, 20), - "UT": (3, 21), - "network_all": (1, 22), - "network_eip": (2, 22), - "network_ihsp": (3, 22), - } + """Create a map for all currently available FluSurv locations from names to codes""" + location_to_code = dict() + for location in self.metadata["catchments"]: + # "area" is the long-form region (California, etc), and "name" is + # the network/data source type (IHSP, EIP, etc) + location_name = self._location_name_to_abbr(location["area"], location["name"]) + if location["endseasonid"] in self.seasonids: + if location_name in location_to_code.keys(): + raise Exception( + f"catchment {location_name} already seen, but " + \ + "we expect catchments to be unique" + ) + + location_to_code[location_name] = ( + location["networkid"], location["catchmentid"] + ) + else: + unseen_locations.append(location_name) + + print( + f"location(s) {unseen_locations} not included in this issue " + \ + "because they don't include sufficiently recent data" + ) + return location_to_code def fetch_location_to_code_map(self): diff --git a/src/acquisition/flusurv/constants.py b/src/acquisition/flusurv/constants.py new file mode 100644 index 000000000..d20cae9cc --- /dev/null +++ b/src/acquisition/flusurv/constants.py @@ -0,0 +1,106 @@ +from delphi_utils import GeoMapper + +""" +As of Sept 2023, for new data we expect to see these 23 groups, as described +in the top-level "master_lookup" element, below, of the new GRASP API +(https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response object. +See `./reference/new_grasp_result.json` for a full example response. + 'master_lookup' = [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True} + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ] + +All 23 strata are available starting epiweek 200935, inclusive. + +The previous version of the GRASP API +(https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp) used the following age-id +mapping, as described in the top-level "ages" element, below. See +`./reference/old_grasp_result.json` for a full example response. + 'ages' = [ + {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'}, + {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, + {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, + {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, + {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, + {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, + {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, + {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, + {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'} + ] + +In addition to the new age, race, and sex breakdowns, the group id for overall +reporting has changed from 6 to 0. Age ids 1-5 and 7-9 retain the same the +same meanings; age id 6 is not reported. +""" +EXPECTED_GROUPS = ( + "rate_overall", + + "rate_age_0", + "rate_age_1", + "rate_age_2", + "rate_age_3", + "rate_age_4", + "rate_age_5", + "rate_age_6", + "rate_age_7", + + "rate_age_18t29", + "rate_age_30t39", + "rate_age_40t49", + "rate_age_5t11", + "rate_age_12t17", + "rate_age_lt18", + "rate_age_gte18", + + "rate_race_white", + "rate_race_black", + "rate_race_hisp", + "rate_race_asian", + "rate_race_natamer", + + "rate_sex_male", + "rate_sex_female" +) + + +MAX_AGE_TO_CONSIDER_WEEKS = 52 + + +gmpr = GeoMapper() +map_state_names = gmpr.get_crosswalk("state", "state") +map_state_names = map_state_names.to_dict(orient = "records") +map_state_names = {elem["state_name"]: elem["state_id"].upper() for elem in map_state_names} + +map_nonstandard_names = {"New York - Albany": "NY_albany", "New York - Rochester": "NY_rochester"} + +MAP_REGION_NAMES_TO_ABBR = {**map_state_names, **map_nonstandard_names} + +MAP_ENTIRE_NETWORK_NAMES = { + "FluSurv-Net": "network_all", + "EIP": "network_eip", + "IHSP": "network_ihsp" +} diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index c17f74a80..5246205f3 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -80,95 +80,9 @@ import delphi.operations.secrets as secrets from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks +from constants import (MAX_AGE_TO_CONSIDER_WEEKS, EXPECTED_GROUPS) -""" -As of Sept 2023, for new data we expect to see these 23 groups, as described -in the top-level "master_lookup" element, below, of the new GRASP API -(https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response object. -See `./reference/new_grasp_result.json` for a full example response. - 'master_lookup' = [ - {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True} - {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, - {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, - - {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, - {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, - {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, - {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, - - {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, - {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, - - {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, - ] - -All 23 strata are available starting epiweek 200935, inclusive. - -The previous version of the GRASP API -(https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp) used the following age-id -mapping, as described in the top-level "ages" element, below. See -`./reference/old_grasp_result.json` for a full example response. - 'ages' = [ - {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'}, - {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, - {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, - {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, - {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, - {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, - {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, - {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, - {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'} - ] - -In addition to the new age, race, and sex breakdowns, the group id for overall -reporting has changed from 6 to 0. Age ids 1-5 and 7-9 retain the same the -same meanings; age id 6 is not reported. -""" -EXPECTED_GROUPS = ( - "rate_overall", - - "rate_age_0", - "rate_age_1", - "rate_age_2", - "rate_age_3", - "rate_age_4", - "rate_age_5", - "rate_age_6", - "rate_age_7", - - "rate_age_18t29", - "rate_age_30t39", - "rate_age_40t49", - "rate_age_5t11", - "rate_age_12t17", - "rate_age_lt18", - "rate_age_gte18", - - "rate_race_white", - "rate_race_black", - "rate_race_hisp", - "rate_race_asian", - "rate_race_natamer", - - "rate_sex_male", - "rate_sex_female" -) -MAX_AGE_TO_CONSIDER_WEEKS = 52 - def get_rows(cur): """Return the number of rows in the `flusurv` table.""" From 9ed92d8f4d839162edd95fba86862ecb06f27377 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 2 Nov 2023 11:46:19 -0400 Subject: [PATCH 23/38] switch missing data assert to exception --- src/acquisition/flusurv/api.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index c1f93dc9b..70f2abf01 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -144,7 +144,7 @@ def _make_location_to_code_map(self): if location["endseasonid"] in self.seasonids: if location_name in location_to_code.keys(): raise Exception( - f"catchment {location_name} already seen, but " + \ + f"catchment {location_name} already seen, but " + "we expect catchments to be unique" ) @@ -155,7 +155,7 @@ def _make_location_to_code_map(self): unseen_locations.append(location_name) print( - f"location(s) {unseen_locations} not included in this issue " + \ + f"location(s) {unseen_locations} not included in this issue " + "because they don't include sufficiently recent data" ) @@ -259,16 +259,22 @@ def _fetch_flusurv_location(self): # If no data is returned (a given seasonid is not reported, # location codes are invalid, etc), the API returns a JSON like: - # { + # { # 'default_data': { # 'response': 'No Data' # } - # } + # } # # If data is returned, then data["default_data"] is a list # and data["default_data"]["response"] doesn't exist. - assert isinstance(result["default_data"], list) and len(result["default_data"]) > 0, \ - f"Data was not correctly returned from the API for {location}" + if (not isinstance(result["default_data"], list) or + len(result["default_data"]) == 0 or + ( + "response" in result["default_data"].keys() and + result["default_data"]["response"] == "No Data" + )): + raise Exception(f"No data was returned from the API for {location}" + + "but we expect it to be available for some recent dates") return result def _group_by_epiweek(self, data): From eb920cbbba86b6be48159efbf8a21864ef81ac92 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 6 Nov 2023 13:02:22 -0500 Subject: [PATCH 24/38] mock metadata and api objs for tests --- src/acquisition/flusurv/api.py | 30 ++--- src/acquisition/flusurv/constants.py | 2 +- src/acquisition/flusurv/flusurv_update.py | 4 +- tests/acquisition/flusurv/test_flusurv.py | 134 +++++++++++++++------- 4 files changed, 110 insertions(+), 60 deletions(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index 70f2abf01..8054f6d59 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -48,7 +48,7 @@ # first party from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks -from constants import (MAP_REGION_NAMES_TO_ABBR, MAP_ENTIRE_NETWORK_NAMES) +from .constants import (MAP_REGION_NAMES_TO_ABBR, MAP_ENTIRE_NETWORK_NAMES) def fetch_json(path, payload, call_count=1, requests_impl=requests): @@ -110,16 +110,16 @@ class FlusurvMetadata: def __init__(self, max_age_weeks): self.metadata = self._fetch_flusurv_metadata() + self.issue = self._get_current_issue() + self.max_age_weeks = max_age_weeks + self.seasonids = self._get_recent_seasonids() + self.location_to_code = self._make_location_to_code_map() self.locations = self.location_to_code.keys() self.id_to_group = self._make_id_group_map() self.id_to_season = self._make_id_season_map() - self.issue = self._get_current_issue() - self.max_age_weeks = max_age_weeks - self.seasonids = self._get_recent_seasonids() - def _fetch_flusurv_metadata(self): """Return FluSurv JSON metadata object.""" return fetch_json( @@ -137,6 +137,8 @@ def _location_name_to_abbr(self, geo, network): def _make_location_to_code_map(self): """Create a map for all currently available FluSurv locations from names to codes""" location_to_code = dict() + unseen_locations = [] + for location in self.metadata["catchments"]: # "area" is the long-form region (California, etc), and "name" is # the network/data source type (IHSP, EIP, etc) @@ -181,7 +183,7 @@ def _get_recent_seasonids(self): # Ignore seasons with all dates older than one year seasonids = { season_blob["seasonid"] for season_blob in self.metadata["seasons"] - if delta_epiweeks(mmwrid_to_epiweek(season_blob["endweek"]), issue) < self.max_age_weeks + if delta_epiweeks(mmwrid_to_epiweek(season_blob["endweek"]), self.issue) < self.max_age_weeks } return seasonids @@ -229,7 +231,7 @@ def get_data(self, location): """ # fetch print("[fetching flusurv data...]") - data_in = self._fetch_flusurv_location(location, self.metadata.seasonids) + data_in = self._fetch_flusurv_location(location) # extract print("[reformatting flusurv result...]") @@ -239,7 +241,7 @@ def get_data(self, location): print(f"[successfully fetched data for {location}]") return data_out - def _fetch_flusurv_location(self): + def _fetch_flusurv_location(self, location): """Return FluSurv JSON object for a given location.""" location_code = self.metadata.location_to_code[location] @@ -267,9 +269,9 @@ def _fetch_flusurv_location(self): # # If data is returned, then data["default_data"] is a list # and data["default_data"]["response"] doesn't exist. - if (not isinstance(result["default_data"], list) or - len(result["default_data"]) == 0 or + if (len(result["default_data"]) == 0 or ( + isinstance(result["default_data"], dict) and "response" in result["default_data"].keys() and result["default_data"]["response"] == "No Data" )): @@ -331,7 +333,7 @@ def _group_by_epiweek(self, data): # Set season description. This will be overwritten every iteration, # but should always have the same value per epiweek group. - data_out[epiweek]["season"] = self.metadata.id_season_map[obs["seasonid"]] + data_out[epiweek]["season"] = self.metadata.id_to_season[obs["seasonid"]] rate = obs["weeklyrate"] prev_rate = data_out[epiweek][groupname] @@ -389,11 +391,11 @@ def _groupid_to_name(self, ageid, sexid, raceid): elif ageid <= 9: age_group = str(ageid - 2) else: - age_group = self.metadata.id_group_map["Age"][ageid] + age_group = self.metadata.id_to_group["Age"][ageid] group = "age_" + age_group elif sexid != 0: - group = "sex_" + self.metadata.id_group_map["Sex"][sexid] + group = "sex_" + self.metadata.id_to_group["Sex"][sexid] elif raceid != 0: - group = "race_" + self.metadata.id_group_map["Race"][raceid] + group = "race_" + self.metadata.id_to_group["Race"][raceid] return "rate_" + group diff --git a/src/acquisition/flusurv/constants.py b/src/acquisition/flusurv/constants.py index d20cae9cc..9d3ceea01 100644 --- a/src/acquisition/flusurv/constants.py +++ b/src/acquisition/flusurv/constants.py @@ -100,7 +100,7 @@ MAP_REGION_NAMES_TO_ABBR = {**map_state_names, **map_nonstandard_names} MAP_ENTIRE_NETWORK_NAMES = { - "FluSurv-Net": "network_all", + "FluSurv-NET": "network_all", "EIP": "network_eip", "IHSP": "network_ihsp" } diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 5246205f3..a960f4938 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -76,11 +76,11 @@ import mysql.connector # first party -from delphi.epidata.acquisition.flusurv.flusurv import FlusurvLocationFetcher import delphi.operations.secrets as secrets from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks -from constants import (MAX_AGE_TO_CONSIDER_WEEKS, EXPECTED_GROUPS) +from .flusurv import FlusurvLocationFetcher +from .constants import (MAX_AGE_TO_CONSIDER_WEEKS, EXPECTED_GROUPS) def get_rows(cur): diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index caa86d4b9..2fb6a5df9 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -4,16 +4,13 @@ import unittest from unittest.mock import (MagicMock, sentinel, patch) -import delphi.epidata.acquisition.flusurv.flusurv as flusurv +import delphi.epidata.acquisition.flusurv.api as flusurv # py3tester coverage target -__test_target__ = "delphi.epidata.acquisition.flusurv.flusurv" +__test_target__ = "delphi.epidata.acquisition.flusurv.api" -# Example location-specific return JSON from CDC GRASP API. Contains -# partial data for "network_all" location and season 49. -network_all_example_data = { - 'default_data': [ +network_all_example_data = [ {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'rate': 42, 'weeklyrate': 0.5, 'mmwrid': 2519}, @@ -29,12 +26,46 @@ {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, ] -} -# Example metadata response containing "master_lookup" element, used -# for mapping between valueids and strata descriptions, and "seasons" -# element, used for mapping between seasonids and season year spans. -metadata = { +metadata_result = { + # Last data update date + 'loaddatetime': 'Sep 12, 2023', + # IDs (network ID + catchment ID) specifying geos and data sources available + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + # "seasons" element, used for mapping between seasonids and season year spans. + 'seasons': [ + {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2012-13', 'enabled': True, 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2015-16', 'enabled': True, 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'IncludeWeeklyRatesAndStrata': True}, + ], + # "master_lookup" element, used for mapping between valueids and strata descriptions 'master_lookup': [ {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, @@ -63,16 +94,28 @@ {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, ], - 'seasons': [ - {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, - {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, - {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, - {'description': 'Season 2012-13', 'enabled': True, 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'IncludeWeeklyRatesAndStrata': True}, - {'description': 'Season 2015-16', 'enabled': True, 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'IncludeWeeklyRatesAndStrata': True}, - ], + 'default_data': network_all_example_data, + # Mapping each mmwrid to a week number, season, and date. Could use this instead of our current epoch-based function. + 'mmwr': [ + {'mmwrid': 2828, 'weekend': '2016-03-12', 'weeknumber': 10, 'weekstart': '2016-03-06', 'year': 2016, 'yearweek': 201610, 'seasonid': 55, 'label': 'Mar-12-2016', 'weekendlabel': 'Mar 12, 2016', 'weekendlabel2': 'Mar-12-2016'}, + {'mmwrid': 2885, 'weekend': '2017-04-15', 'weeknumber': 15, 'weekstart': '2017-04-09', 'year': 2017, 'yearweek': 201715, 'seasonid': 56, 'label': 'Apr-15-2017', 'weekendlabel': 'Apr 15, 2017', 'weekendlabel2': 'Apr-15-2017'}, + {'mmwrid': 2911, 'weekend': '2017-10-14', 'weeknumber': 41, 'weekstart': '2017-10-08', 'year': 2017, 'yearweek': 201741, 'seasonid': 57, 'label': 'Oct-14-2017', 'weekendlabel': 'Oct 14, 2017', 'weekendlabel2': 'Oct-14-2017'}, + {'mmwrid': 2928, 'weekend': '2018-02-10', 'weeknumber': 6, 'weekstart': '2018-02-04', 'year': 2018, 'yearweek': 201806, 'seasonid': 57, 'label': 'Feb-10-2018', 'weekendlabel': 'Feb 10, 2018', 'weekendlabel2': 'Feb-10-2018'}, + {'mmwrid': 2974, 'weekend': '2018-12-29', 'weeknumber': 52, 'weekstart': '2018-12-23', 'year': 2018, 'yearweek': 201852, 'seasonid': 58, 'label': 'Dec-29-2018', 'weekendlabel': 'Dec 29, 2018', 'weekendlabel2': 'Dec-29-2018'}, + {'mmwrid': 3031, 'weekend': '2020-02-01', 'weeknumber': 5, 'weekstart': '2020-01-26', 'year': 2020, 'yearweek': 202005, 'seasonid': 59, 'label': 'Feb-01-2020', 'weekendlabel': 'Feb 01, 2020', 'weekendlabel2': 'Feb-01-2020'}, + {'mmwrid': 3037, 'weekend': '2020-03-14', 'weeknumber': 11, 'weekstart': '2020-03-08', 'year': 2020, 'yearweek': 202011, 'seasonid': 59, 'label': 'Mar-14-2020', 'weekendlabel': 'Mar 14, 2020', 'weekendlabel2': 'Mar-14-2020'}, + {'mmwrid': 3077, 'weekend': '2020-12-19', 'weeknumber': 51, 'weekstart': '2020-12-13', 'year': 2020, 'yearweek': 202051, 'seasonid': 60, 'label': 'Dec-19-2020', 'weekendlabel': 'Dec 19, 2020', 'weekendlabel2': 'Dec-19-2020'}, + {'mmwrid': 3140, 'weekend': '2022-03-05', 'weeknumber': 9, 'weekstart': '2022-02-27', 'year': 2022, 'yearweek': 202209, 'seasonid': 61, 'label': 'Mar-05-2022', 'weekendlabel': 'Mar 05, 2022', 'weekendlabel2': 'Mar-05-2022'}, + {'mmwrid': 3183, 'weekend': '2022-12-31', 'weeknumber': 52, 'weekstart': '2022-12-25', 'year': 2022, 'yearweek': 202252, 'seasonid': 62, 'label': 'Dec-31-2022', 'weekendlabel': 'Dec 31, 2022', 'weekendlabel2': 'Dec-31-2022'}, + ] } -# Map derived from "master_lookup" dictionary above mapping between valueids +# Example location-specific return JSON from CDC GRASP API. Contains +# partial data for "network_all" location and season 49. +location_api_result = {'default_data': network_all_example_data} + + +# Map derived from "master_lookup" dictionary above, mapping between valueids # by type and cleaned-up descriptions (no spaces or capital letters, etc) id_label_map = { "Age": { @@ -106,6 +149,12 @@ } +with patch(__test_target__ + ".fetch_json", + return_value = metadata_result) as MockFlusurvMetadata: + metadata_fetcher = flusurv.FlusurvMetadata(52) + api_fetcher = flusurv.FlusurvLocationFetcher(52) + + class FunctionTests(unittest.TestCase): """Tests each function individually.""" @@ -131,15 +180,18 @@ def test_mmwrid_to_epiweek(self): # Test epoch self.assertEqual(flusurv.mmwrid_to_epiweek(2179), 200340) - metadata = flusurv.fetch_flusurv_metadata() - for mmwr in metadata["mmwr"]: + for mmwr in metadata_result["mmwr"]: self.assertEqual(flusurv.mmwrid_to_epiweek(mmwr["mmwrid"]), mmwr["yearweek"]) - @patch(__test_target__ + ".fetch_flusurv_location") + @patch(__test_target__ + ".fetch_json") def test_get_data(self, MockFlusurvLocation): - MockFlusurvLocation.return_value = network_all_example_data + MockFlusurvLocation.return_value = location_api_result - self.assertEqual(flusurv.get_data("network_all", [30, 49], metadata), { + season_api_fetcher = api_fetcher + season_api_fetcher.metadata.seasonids = [30, 49] + season_api_fetcher.metadata.location_to_code = {"network_all": (1, 22)} + + self.assertEqual(season_api_fetcher.get_data("network_all"), { 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8, "season": "2009-10"}, 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, @@ -148,8 +200,8 @@ def test_get_data(self, MockFlusurvLocation): ) def test_group_by_epiweek(self): - input_data = network_all_example_data - self.assertEqual(flusurv.group_by_epiweek(input_data, metadata), { + input_data = metadata_result + self.assertEqual(api_fetcher._group_by_epiweek(input_data), { 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8, "season": "2009-10"}, 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, @@ -165,35 +217,31 @@ def test_group_by_epiweek(self): } with self.assertWarnsRegex(Warning, "warning: Multiple rates seen for 201014"): - flusurv.group_by_epiweek(duplicate_input_data, metadata) + api_fetcher._group_by_epiweek(duplicate_input_data) with self.assertRaisesRegex(Exception, "no data found"): - flusurv.group_by_epiweek({"default_data": []}, metadata) + api_fetcher._group_by_epiweek({"default_data": []}) @patch('builtins.print') def test_group_by_epiweek_print_msgs(self, mock_print): - input_data = network_all_example_data - flusurv.group_by_epiweek(input_data, metadata) + input_data = metadata_result + api_fetcher._group_by_epiweek(input_data) mock_print.assert_called_with("found data for 4 epiweeks") def test_get_current_issue(self): - input_data = { - 'loaddatetime': 'Sep 12, 2023' - } - self.assertEqual(flusurv.get_current_issue(input_data), 202337) + self.assertEqual(metadata_fetcher._get_current_issue(), 202337) def test_make_id_label_map(self): - self.assertEqual(flusurv.make_id_label_map(metadata), id_label_map) + self.assertEqual(metadata_fetcher._make_id_group_map(), id_label_map) def test_make_id_season_map(self): - self.assertEqual(flusurv.make_id_season_map(metadata), { + self.assertEqual(metadata_fetcher._make_id_season_map(), { 46: '2006-07', 43: '2003-04', 49: '2009-10', 52: '2012-13', 55: '2015-16', }) - def test_groupids_to_name(self): ids = ( (1, 0, 0), @@ -213,11 +261,11 @@ def test_groupids_to_name(self): ] for (ageid, sexid, raceid), expected in zip(ids, expected_list): - self.assertEqual(flusurv.groupids_to_name(ageid, sexid, raceid, id_label_map), expected) + self.assertEqual(api_fetcher._groupid_to_name(ageid, sexid, raceid), expected) with self.assertRaisesRegex(ValueError, "Ageid cannot be 6"): - flusurv.groupids_to_name(6, 0, 0, id_label_map) - with self.assertRaisesRegex(AssertionError, "At most one groupid can be non-zero"): - flusurv.groupids_to_name(1, 1, 0, id_label_map) - flusurv.groupids_to_name(0, 1, 1, id_label_map) - flusurv.groupids_to_name(1, 1, 1, id_label_map) + api_fetcher._groupid_to_name(6, 0, 0) + with self.assertRaisesRegex(ValueError, "Expect at least two of three group ids to be 0"): + api_fetcher._groupid_to_name(1, 1, 0) + api_fetcher._groupid_to_name(0, 1, 1) + api_fetcher._groupid_to_name(1, 1, 1) From b19160258285ee9a247d40a97871fd7833ca528f Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 6 Nov 2023 17:22:27 -0500 Subject: [PATCH 25/38] stop doing recent season filtering Catchment metadata is not updated with most recent season, so doing this will stop us from fetching data. --- src/acquisition/flusurv/api.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index 8054f6d59..5d90e5456 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -137,30 +137,19 @@ def _location_name_to_abbr(self, geo, network): def _make_location_to_code_map(self): """Create a map for all currently available FluSurv locations from names to codes""" location_to_code = dict() - unseen_locations = [] - for location in self.metadata["catchments"]: # "area" is the long-form region (California, etc), and "name" is # the network/data source type (IHSP, EIP, etc) location_name = self._location_name_to_abbr(location["area"], location["name"]) - if location["endseasonid"] in self.seasonids: - if location_name in location_to_code.keys(): - raise Exception( - f"catchment {location_name} already seen, but " + - "we expect catchments to be unique" - ) - - location_to_code[location_name] = ( - location["networkid"], location["catchmentid"] + if location_name in location_to_code.keys(): + raise Exception( + f"catchment {location_name} already seen, but " + + "we expect catchments to be unique" ) - else: - unseen_locations.append(location_name) - - print( - f"location(s) {unseen_locations} not included in this issue " + - "because they don't include sufficiently recent data" - ) + location_to_code[location_name] = ( + location["networkid"], location["catchmentid"] + ) return location_to_code def fetch_location_to_code_map(self): @@ -333,7 +322,7 @@ def _group_by_epiweek(self, data): # Set season description. This will be overwritten every iteration, # but should always have the same value per epiweek group. - data_out[epiweek]["season"] = self.metadata.id_to_season[obs["seasonid"]] + data_out[epiweek]["season"] = self.metadata.id_to_season[obs["seasonid"].strip()] rate = obs["weeklyrate"] prev_rate = data_out[epiweek][groupname] From b7ba101b71c4d59c3a61160333f522baef156382 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 6 Nov 2023 17:33:19 -0500 Subject: [PATCH 26/38] warn when no location data returned --- src/acquisition/flusurv/api.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index 5d90e5456..0400cb23b 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -152,9 +152,6 @@ def _make_location_to_code_map(self): ) return location_to_code - def fetch_location_to_code_map(self): - return self.location_to_code - def _get_current_issue(self): """ Extract the current issue from the FluSurv metadata result. @@ -264,8 +261,7 @@ def _fetch_flusurv_location(self, location): "response" in result["default_data"].keys() and result["default_data"]["response"] == "No Data" )): - raise Exception(f"No data was returned from the API for {location}" + - "but we expect it to be available for some recent dates") + warn(f"No data was returned from the API for {location}") return result def _group_by_epiweek(self, data): From e1d9d06e252c1c47fc866958ac8c6330e3c13ce5 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 6 Nov 2023 18:10:42 -0500 Subject: [PATCH 27/38] test metadata attributes together --- src/acquisition/flusurv/api.py | 6 +- tests/acquisition/flusurv/test_flusurv.py | 68 ++++++++++++++++------- 2 files changed, 52 insertions(+), 22 deletions(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index 0400cb23b..e5d8b6e25 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -148,7 +148,7 @@ def _make_location_to_code_map(self): ) location_to_code[location_name] = ( - location["networkid"], location["catchmentid"] + int(location["networkid"]), int(location["catchmentid"]) ) return location_to_code @@ -198,7 +198,7 @@ def _make_id_season_map(self): """Create a map from seasonid to season description, in the format "YYYY-YY" """ id_to_label = defaultdict(lambda: defaultdict(lambda: None)) for season in self.metadata["seasons"]: - id_to_label[season["seasonid"]] = season["label"] + id_to_label[season["seasonid"]] = season["label"].strip() return id_to_label @@ -318,7 +318,7 @@ def _group_by_epiweek(self, data): # Set season description. This will be overwritten every iteration, # but should always have the same value per epiweek group. - data_out[epiweek]["season"] = self.metadata.id_to_season[obs["seasonid"].strip()] + data_out[epiweek]["season"] = self.metadata.id_to_season[obs["seasonid"]] rate = obs["weeklyrate"] prev_rate = data_out[epiweek][groupname] diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index 2fb6a5df9..db4e12a37 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -59,11 +59,13 @@ ], # "seasons" element, used for mapping between seasonids and season year spans. 'seasons': [ - {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, - {'description': 'Season 2012-13', 'enabled': True, 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'IncludeWeeklyRatesAndStrata': True}, - {'description': 'Season 2015-16', 'enabled': True, 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2021-22', 'enabled': True, 'endweek': 3170, 'label': '2021-22', 'seasonid': 61, 'startweek': 3119, 'IncludeWeeklyRatesAndStrata': False}, + {'description': 'Season 2022-23', 'enabled': True, 'endweek': 3222, 'label': '2022-23', 'seasonid': 62, 'startweek': 3171, 'IncludeWeeklyRatesAndStrata': False}, + # sic + {'description': 'Season 2023-24 ', 'enabled': True, 'endweek': 3274, 'label': '2023-24', 'seasonid': 63, 'startweek': 3223, 'IncludeWeeklyRatesAndStrata': False}, ], # "master_lookup" element, used for mapping between valueids and strata descriptions 'master_lookup': [ @@ -117,7 +119,7 @@ # Map derived from "master_lookup" dictionary above, mapping between valueids # by type and cleaned-up descriptions (no spaces or capital letters, etc) -id_label_map = { +id_group_map = { "Age": { 1: "0t4", 2: "5t17", @@ -148,6 +150,30 @@ }, } +catchment_name_map = { + "CA": (2, 1), + "CO": (2, 2), + "CT": (2, 3), + "GA": (2, 4), + "IA": (3, 5), + "ID": (3, 6), + "MD": (2, 7), + "MI": (3, 8), + "MN": (2, 9), + "NM": (2, 11), + "NY_albany": (2, 13), + "NY_rochester": (2, 14), + "OH": (3, 15), + "OK": (3, 16), + "OR": (2, 17), + "RI": (3, 18), + "SD": (3, 19), + "TN": (2, 20), + "UT": (3, 21), + "network_all": (1, 22), + "network_eip": (2, 22), + "network_ihsp": (3, 22), +} with patch(__test_target__ + ".fetch_json", return_value = metadata_result) as MockFlusurvMetadata: @@ -183,13 +209,31 @@ def test_mmwrid_to_epiweek(self): for mmwr in metadata_result["mmwr"]: self.assertEqual(flusurv.mmwrid_to_epiweek(mmwr["mmwrid"]), mmwr["yearweek"]) + def test_metadata_attributes(self): + self.assertEqual(metadata_fetcher.metadata, metadata_result) + self.assertEqual(metadata_fetcher.issue, 202337) + self.assertEqual(metadata_fetcher.max_age_weeks, 52) + self.assertEqual(metadata_fetcher.seasonids, {61, 62, 63}) + + self.assertEqual(metadata_fetcher.location_to_code, catchment_name_map) + self.assertEqual(metadata_fetcher.locations, catchment_name_map.keys()) + + self.assertEqual(metadata_fetcher.id_to_group, id_group_map) + self.assertEqual(metadata_fetcher.id_to_season, { + 43: '2003-04', + 46: '2006-07', + 49: '2009-10', + 61: '2021-22', + 62: '2022-23', + 63: '2023-24', + }) + @patch(__test_target__ + ".fetch_json") def test_get_data(self, MockFlusurvLocation): MockFlusurvLocation.return_value = location_api_result season_api_fetcher = api_fetcher season_api_fetcher.metadata.seasonids = [30, 49] - season_api_fetcher.metadata.location_to_code = {"network_all": (1, 22)} self.assertEqual(season_api_fetcher.get_data("network_all"), { 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, @@ -228,20 +272,6 @@ def test_group_by_epiweek_print_msgs(self, mock_print): api_fetcher._group_by_epiweek(input_data) mock_print.assert_called_with("found data for 4 epiweeks") - def test_get_current_issue(self): - self.assertEqual(metadata_fetcher._get_current_issue(), 202337) - - def test_make_id_label_map(self): - self.assertEqual(metadata_fetcher._make_id_group_map(), id_label_map) - - def test_make_id_season_map(self): - self.assertEqual(metadata_fetcher._make_id_season_map(), { - 46: '2006-07', - 43: '2003-04', - 49: '2009-10', - 52: '2012-13', - 55: '2015-16', - }) def test_groupids_to_name(self): ids = ( (1, 0, 0), From 772e127f87e82ba6682ea624c5cffde76107745c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 11:22:48 -0500 Subject: [PATCH 28/38] correct local module name to load classes from --- src/acquisition/flusurv/flusurv_update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index a960f4938..47be71b3a 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -79,7 +79,7 @@ import delphi.operations.secrets as secrets from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks -from .flusurv import FlusurvLocationFetcher +from .api import FlusurvLocationFetcher from .constants import (MAX_AGE_TO_CONSIDER_WEEKS, EXPECTED_GROUPS) From 991fde164d7e2fa6c84db71e6cd265f838f9fa44 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 12:08:32 -0500 Subject: [PATCH 29/38] map some long race groups to abbreviated names --- src/acquisition/flusurv/api.py | 16 ++++++++++++++-- tests/acquisition/flusurv/test_flusurv.py | 10 +++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index e5d8b6e25..2b784a7b5 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -181,7 +181,7 @@ def _make_id_group_map(self): # Skip "overall" group if group["Variable"] is None: continue - id_to_label[group["Variable"]][group["valueid"]] = group["Label"].replace( + clean_group_label = group["Label"].replace( " ", "" ).replace( "/", "" @@ -189,10 +189,22 @@ def _make_id_group_map(self): "-", "t" ).replace( "yr", "" + ).replace( + "<", "lt" # less than + ).replace( + ">=", "gte" # greater or equal to ).lower() - return id_to_label + if clean_group_label == "hispaniclatino": + clean_group_label = "hisp" + elif clean_group_label == "asianpacificislander": + clean_group_label = "asian" + elif clean_group_label == "americanindianalaskanative": + clean_group_label = "natamer" + id_to_label[group["Variable"]][group["valueid"]] = clean_group_label + + return id_to_label def _make_id_season_map(self): """Create a map from seasonid to season description, in the format "YYYY-YY" """ diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index db4e12a37..17fc069e6 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -134,15 +134,15 @@ 12: "40t49", 21: "5t11", 22: "12t17", - 97: "<18", - 98: ">=18", + 97: "lt18", + 98: "gte18", }, "Race": { 1: "white", 2: "black", - 3: "hispaniclatino", - 4: "asianpacificislander", - 5: "americanindianalaskanative", + 3: "hisp", + 4: "asian", + 5: "natamer", }, "Sex": { 1: "male", From 5d00dbb35bb14d1ab9891b3ecc317f82da97e2c0 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 12:10:03 -0500 Subject: [PATCH 30/38] programmatically create sql query to avoid ordering issues --- src/acquisition/flusurv/flusurv_update.py | 110 ++++------------------ 1 file changed, 16 insertions(+), 94 deletions(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 47be71b3a..b0073be4c 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -109,108 +109,30 @@ def update(fetcher, location, test_mode=False): print(f"rows before: {int(rows1)}") # SQL for insert/update - sql = """ + nonrelease_fields = ("issue", "epiweek", "location", "lag", "season") + EXPECTED_GROUPS + other_field_names = ", ".join( + f"`{name}`" for name in nonrelease_fields + ) + other_field_values = ", ".join( + f"%({name})s" for name in nonrelease_fields + ) + # Updates on duplicate key only for release date + signal fields, not metadata. + other_field_coalesce = ", ".join( + f"`{name}` = coalesce(%({name})s, `{name}`)" for name in EXPECTED_GROUPS + ) + + sql = f""" INSERT INTO `flusurv` ( `release_date`, - `issue`, - `epiweek`, - `location`, - `lag`, - `season`, - - `rate_overall`, - - `rate_age_0`, - `rate_age_1`, - `rate_age_2`, - `rate_age_3`, - `rate_age_4`, - `rate_age_5`, - `rate_age_6`, - `rate_age_7`, - - `rate_age_18t29`, - `rate_age_30t39`, - `rate_age_40t49`, - `rate_age_5t11`, - `rate_age_12t17`, - `rate_age_lt18`, - `rate_age_gte18`, - - `rate_race_white`, - `rate_race_black`, - `rate_race_hisp`, - `rate_race_asian`, - `rate_race_natamer`, - - `rate_sex_male`, - `rate_sex_female` + {other_field_names} ) VALUES ( %(release_date)s, - %(issue)s, - %(epiweek)s, - %(location)s, - %(lag)s, - %(season)s, - - %(rate_overall)s, - - %(rate_age_0)s, - %(rate_age_1)s, - %(rate_age_2)s, - %(rate_age_3)s, - %(rate_age_4)s, - %(rate_age_5)s, - %(rate_age_6)s, - %(rate_age_7)s, - - %(rate_age_18t29)s, - %(rate_age_30t39)s, - %(rate_age_40t49)s, - %(rate_age_5t11)s, - %(rate_age_12t17)s, - %(rate_age_<18)s, - %(rate_age_>=18)s, - - %(rate_race_white)s, - %(rate_race_black)s, - %(rate_race_hispaniclatino)s, - %(rate_race_asianpacificislander)s, - %(rate_race_americanindianalaskanative)s, - - %(rate_sex_male)s, - %(rate_sex_female)s + {other_field_values} ) ON DUPLICATE KEY UPDATE `release_date` = least(`release_date`, %(release_date)s), - `rate_overall` = coalesce(%(rate_overall)s, `rate_overall`), - - `rate_age_0` = coalesce(%(rate_age_0)s, `rate_age_0`), - `rate_age_1` = coalesce(%(rate_age_1)s, `rate_age_1`), - `rate_age_2` = coalesce(%(rate_age_2)s, `rate_age_2`), - `rate_age_3` = coalesce(%(rate_age_3)s, `rate_age_3`), - `rate_age_4` = coalesce(%(rate_age_4)s, `rate_age_4`), - `rate_age_5` = coalesce(%(rate_age_5)s, `rate_age_5`), - `rate_age_6` = coalesce(%(rate_age_6)s, `rate_age_6`), - `rate_age_7` = coalesce(%(rate_age_7)s, `rate_age_7`), - - `rate_age_18t29` = coalesce(%(rate_age_18t29)s, `rate_age_18t29`), - `rate_age_30t39` = coalesce(%(rate_age_30t39)s, `rate_age_30t39`), - `rate_age_40t49` = coalesce(%(rate_age_40t49)s, `rate_age_40t49`), - `rate_age_5t11` = coalesce(%(rate_age_5t11)s, `rate_age_5t11`), - `rate_age_12t17` = coalesce(%(rate_age_12t17)s, `rate_age_12t17`), - `rate_age_lt18` = coalesce(%(rate_age_<18)s, `rate_age_lt18`), - `rate_age_gte18` = coalesce(%(rate_age_>=18)s, `rate_age_gte18`), - - `rate_race_white` = coalesce(%(rate_race_white)s, `rate_race_white`), - `rate_race_black` = coalesce(%(rate_race_black)s, `rate_race_black`), - `rate_race_hisp` = coalesce(%(rate_race_hispaniclatino)s, `rate_race_hisp`), - `rate_race_asian` = coalesce(%(rate_race_asianpacificislander)s, `rate_race_asian`), - `rate_race_natamer` = coalesce(%(rate_race_americanindianalaskanative)s, `rate_race_natamer`), - - `rate_sex_male` = coalesce(%(rate_sex_male)s, `rate_sex_male`), - `rate_sex_female` = coalesce(%(rate_sex_female)s, `rate_sex_female`) + {other_field_coalesce} """ # insert/update each row of data (one per epiweek) From 5cedb30668715fe6f4c04b5a0d16f1041c06a385 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 16:20:25 -0500 Subject: [PATCH 31/38] return empty result with right format when no data available --- src/acquisition/flusurv/api.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index 2b784a7b5..fbeea9dd3 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -274,6 +274,9 @@ def _fetch_flusurv_location(self, location): result["default_data"]["response"] == "No Data" )): warn(f"No data was returned from the API for {location}") + # Return empty obs with right format to avoid downstream errors + return {"default_data": []} + return result def _group_by_epiweek(self, data): @@ -302,10 +305,6 @@ def _group_by_epiweek(self, data): """ data = data["default_data"] - # Sanity check the input. We expect to see some epiweeks - if len(data) == 0: - raise Exception("no data found") - # Create output object # First layer of keys is epiweeks. Second layer of keys is groups # (by id, not age in years, sex abbr, etc). @@ -345,10 +344,6 @@ def _group_by_epiweek(self, data): f"{groupname}, but previous value {prev_rate} does not " f"equal new value {rate}. Using the first value.")) - # Sanity check the input. We expect to have populated our dictionary - if len(data_out.keys()) == 0: - raise Exception("no data loaded") - print(f"found data for {len(data_out.keys())} epiweeks") return data_out From e7ad06ead618350d7ae4d715d690202eb1204be2 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 17:04:01 -0500 Subject: [PATCH 32/38] warning prefix --- src/acquisition/flusurv/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index fbeea9dd3..f615df2e8 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -273,7 +273,7 @@ def _fetch_flusurv_location(self, location): "response" in result["default_data"].keys() and result["default_data"]["response"] == "No Data" )): - warn(f"No data was returned from the API for {location}") + warn(f"warning: No data was returned from the API for {location}") # Return empty obs with right format to avoid downstream errors return {"default_data": []} From 75283d772bd4aafc806a2ae19fbb70806e55e31d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 17:04:36 -0500 Subject: [PATCH 33/38] add tests for geo-abbr conversion and location fetching --- tests/acquisition/flusurv/test_flusurv.py | 69 ++++++++++++++++++++--- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index 17fc069e6..5ae4ef55d 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -2,6 +2,7 @@ # standard library import unittest +from collections import defaultdict from unittest.mock import (MagicMock, sentinel, patch) import delphi.epidata.acquisition.flusurv.api as flusurv @@ -228,6 +229,37 @@ def test_metadata_attributes(self): 63: '2023-24', }) + def test_geo_name_conversion(self): + geos = ( + "California", + "Utah", + "Entire Network", + "Entire Network", + "Entire Network", + "New York - Albany", + "New York - Rochester", + ) + networks = ( + "FluSurv-NET", + "FluSurv-NET", + "FluSurv-NET", + "IHSP", + "EIP", + "FluSurv-NET", + ) + expected_list = [ + "CA", + "UT", + "network_all", + "network_ihsp", + "network_eip", + "NY_albany", + "NY_rochester", + ] + + for (geo, network), expected in zip(zip(geos, networks), expected_list): + self.assertEqual(metadata_fetcher._location_name_to_abbr(geo, network), expected) + @patch(__test_target__ + ".fetch_json") def test_get_data(self, MockFlusurvLocation): MockFlusurvLocation.return_value = location_api_result @@ -237,19 +269,38 @@ def test_get_data(self, MockFlusurvLocation): self.assertEqual(season_api_fetcher.get_data("network_all"), { 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, - 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8, "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10"}, 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, - 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hispaniclatino": 0.1, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10"}, } ) + @patch(__test_target__ + ".fetch_json") + def test_fetch_flusurv_location(self, MockFlusurvLocation): + # API returns normal result + MockFlusurvLocation.return_value = location_api_result + self.assertEqual(api_fetcher._fetch_flusurv_location("network_all"), location_api_result) + + # API returns empty result formatted normally + empty_expected_result = {"default_data": []} + MockFlusurvLocation.return_value = empty_expected_result + with self.assertWarnsRegex(Warning, "No data was returned from the API for network_all"): + empty_data_result = api_fetcher._fetch_flusurv_location("network_all") + self.assertEqual(empty_data_result, empty_expected_result) + + # API returns "no data" result + MockFlusurvLocation.return_value = {"default_data": {"response": "No Data"}} + with self.assertWarnsRegex(Warning, "No data was returned from the API for network_all"): + no_data_result = api_fetcher._fetch_flusurv_location("network_all") + self.assertEqual(no_data_result, empty_expected_result) + def test_group_by_epiweek(self): input_data = metadata_result self.assertEqual(api_fetcher._group_by_epiweek(input_data), { 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, - 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hispaniclatino": 4.8, "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10"}, 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, - 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hispaniclatino": 0.1, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10"}, } ) @@ -263,8 +314,10 @@ def test_group_by_epiweek(self): with self.assertWarnsRegex(Warning, "warning: Multiple rates seen for 201014"): api_fetcher._group_by_epiweek(duplicate_input_data) - with self.assertRaisesRegex(Exception, "no data found"): - api_fetcher._group_by_epiweek({"default_data": []}) + self.assertEqual( + api_fetcher._group_by_epiweek({"default_data": []}), + defaultdict(lambda: defaultdict(lambda: None)) + ) @patch('builtins.print') def test_group_by_epiweek_print_msgs(self, mock_print): @@ -285,8 +338,8 @@ def test_groupids_to_name(self): "rate_age_0", "rate_age_7", "rate_sex_female", - "rate_race_hispaniclatino", - "rate_race_americanindianalaskanative", + "rate_race_hisp", + "rate_race_natamer", "rate_overall", ] From 276944ad2605c5275cd4eb0d593617f16563fb00 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 17:09:14 -0500 Subject: [PATCH 34/38] don't error for missing groups -- some are expected --- src/acquisition/flusurv/flusurv_update.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index b0073be4c..d72fc0fe6 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -148,9 +148,13 @@ def update(fetcher, location, test_mode=False): # Remove the season description since we also store it in each epiweek obj unexpected_groups = data[epiweek].keys() - EXPECTED_GROUPS - {"season"} if len(missing_expected_groups) != 0: - raise Exception( + # Not all groups are available for all geos, e.g. UT in 2022-23 doesn't have + # sex breakdowns. + warn( f"{location} {epiweek} data is missing group(s) {missing_expected_groups}" ) + for group in missing_expected_groups: + data[epiweek][group] = None if len(unexpected_groups) != 0: raise Exception( f"{location} {epiweek} data includes new group(s) {unexpected_groups}" From 9431489970f252a2c0667c1912f7e7314bab4660 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 17:46:29 -0500 Subject: [PATCH 35/38] fn to add empty sex breakdowns to UT --- src/acquisition/flusurv/api.py | 17 +++++++++++++++-- src/acquisition/flusurv/constants.py | 12 ++++++++---- tests/acquisition/flusurv/test_flusurv.py | 23 +++++++++++++++++++---- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py index f615df2e8..4c4ac93eb 100644 --- a/src/acquisition/flusurv/api.py +++ b/src/acquisition/flusurv/api.py @@ -48,7 +48,8 @@ # first party from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks -from .constants import (MAP_REGION_NAMES_TO_ABBR, MAP_ENTIRE_NETWORK_NAMES) +from .constants import (MAP_REGION_NAMES_TO_ABBR, MAP_ENTIRE_NETWORK_NAMES, + SEX_GROUPS) def fetch_json(path, payload, call_count=1, requests_impl=requests): @@ -233,7 +234,9 @@ def get_data(self, location): # extract print("[reformatting flusurv result...]") - data_out = self._group_by_epiweek(data_in) + data_out = self._add_sex_breakdowns_ut( + self._group_by_epiweek(data_in), location + ) # return print(f"[successfully fetched data for {location}]") @@ -391,3 +394,13 @@ def _groupid_to_name(self, ageid, sexid, raceid): group = "race_" + self.metadata.id_to_group["Race"][raceid] return "rate_" + group + + def _add_sex_breakdowns_ut(self, data, location): + # UT doesn't have sex breakdowns available at least for 2022-23. Fill + # in to avoid downstream errors. + if location == "UT": + for epiweek in data.keys(): + for group in SEX_GROUPS: + if group not in data[epiweek].keys(): + data[epiweek][group] = None + return(data) diff --git a/src/acquisition/flusurv/constants.py b/src/acquisition/flusurv/constants.py index 9d3ceea01..ad31b1cce 100644 --- a/src/acquisition/flusurv/constants.py +++ b/src/acquisition/flusurv/constants.py @@ -56,7 +56,7 @@ reporting has changed from 6 to 0. Age ids 1-5 and 7-9 retain the same the same meanings; age id 6 is not reported. """ -EXPECTED_GROUPS = ( +HISTORICAL_GROUPS = ( "rate_overall", "rate_age_0", @@ -67,7 +67,8 @@ "rate_age_5", "rate_age_6", "rate_age_7", - +) +NEW_AGE_GROUPS = ( "rate_age_18t29", "rate_age_30t39", "rate_age_40t49", @@ -75,16 +76,19 @@ "rate_age_12t17", "rate_age_lt18", "rate_age_gte18", - +) +RACE_GROUPS = ( "rate_race_white", "rate_race_black", "rate_race_hisp", "rate_race_asian", "rate_race_natamer", - +) +SEX_GROUPS = ( "rate_sex_male", "rate_sex_female" ) +EXPECTED_GROUPS = HISTORICAL_GROUPS + NEW_AGE_GROUPS + RACE_GROUPS + SEX_GROUPS MAX_AGE_TO_CONSIDER_WEEKS = 52 diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index 5ae4ef55d..a08696255 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -295,8 +295,7 @@ def test_fetch_flusurv_location(self, MockFlusurvLocation): self.assertEqual(no_data_result, empty_expected_result) def test_group_by_epiweek(self): - input_data = metadata_result - self.assertEqual(api_fetcher._group_by_epiweek(input_data), { + self.assertEqual(api_fetcher._group_by_epiweek(metadata_result), { 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10"}, 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, @@ -321,8 +320,7 @@ def test_group_by_epiweek(self): @patch('builtins.print') def test_group_by_epiweek_print_msgs(self, mock_print): - input_data = metadata_result - api_fetcher._group_by_epiweek(input_data) + api_fetcher._group_by_epiweek(metadata_result) mock_print.assert_called_with("found data for 4 epiweeks") def test_groupids_to_name(self): @@ -352,3 +350,20 @@ def test_groupids_to_name(self): api_fetcher._groupid_to_name(1, 1, 0) api_fetcher._groupid_to_name(0, 1, 1) api_fetcher._groupid_to_name(1, 1, 1) + + def test_groupids_to_name(self): + input_data = api_fetcher._group_by_epiweek(metadata_result) + self.assertEqual(api_fetcher._add_sex_breakdowns_ut(input_data, "network_all"), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10"}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10"}, + } + ) + self.assertEqual(api_fetcher._add_sex_breakdowns_ut(input_data, "UT"), { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "season": "2009-10", "rate_sex_female": None, "rate_sex_male": None}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10", "rate_sex_female": None, "rate_sex_male": None}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10", "rate_sex_female": None, "rate_sex_male": None}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10", "rate_sex_female": None, "rate_sex_male": None}, + } + ) From 205c061e2e002ddd9bddcd71e5e4b6f6c7ab3d9e Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 Nov 2023 17:48:07 -0500 Subject: [PATCH 36/38] with UT sex subsets getting filled, error on missing groups again --- src/acquisition/flusurv/flusurv_update.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index d72fc0fe6..b0073be4c 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -148,13 +148,9 @@ def update(fetcher, location, test_mode=False): # Remove the season description since we also store it in each epiweek obj unexpected_groups = data[epiweek].keys() - EXPECTED_GROUPS - {"season"} if len(missing_expected_groups) != 0: - # Not all groups are available for all geos, e.g. UT in 2022-23 doesn't have - # sex breakdowns. - warn( + raise Exception( f"{location} {epiweek} data is missing group(s) {missing_expected_groups}" ) - for group in missing_expected_groups: - data[epiweek][group] = None if len(unexpected_groups) != 0: raise Exception( f"{location} {epiweek} data includes new group(s) {unexpected_groups}" From 9de7d643138f7ef24ea26500c644fbcab1f3827b Mon Sep 17 00:00:00 2001 From: nmdefries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 7 May 2024 20:15:35 -0400 Subject: [PATCH 37/38] Update flusurv schema and docs with new age, sex, and race groups (#1287) * add new columns to flusurv endpoint spec * add new age, sex, race strata, and season to schema * list new signals in documentation * move new season column to end of table * add migration script to add new fields to flusurv table * add field descriptions; reformat table so don't need to match line lengths * point from acquisition script to docs and schema def to deduplicate --- docs/api/flusurv.md | 50 ++++++++----- src/acquisition/flusurv/flusurv_update.py | 39 +---------- src/ddl/fluview.sql | 70 +++++++++++++------ .../flusurv_age_sex_race_strata.sql | 18 +++++ src/server/endpoints/flusurv.py | 23 +++++- 5 files changed, 127 insertions(+), 73 deletions(-) create mode 100644 src/ddl/migrations/flusurv_age_sex_race_strata.sql diff --git a/docs/api/flusurv.md b/docs/api/flusurv.md index b33f5c22d..92acd2e5d 100644 --- a/docs/api/flusurv.md +++ b/docs/api/flusurv.md @@ -52,22 +52,40 @@ If neither is specified, the current issues are used. ## Response -| Field | Description | Type | -|--------------------------|-----------------------------------------------------------------|------------------| -| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | -| `epidata` | list of results | array of objects | -| `epidata[].release_date` | | string | -| `epidata[].location` | | string | -| `epidata[].issue` | | integer | -| `epidata[].epiweek` | | integer | -| `epidata[].lag` | | integer | -| `epidata[].rate_age_0` | | float | -| `epidata[].rate_age_1` | | float | -| `epidata[].rate_age_2` | | float | -| `epidata[].rate_age_3` | | float | -| `epidata[].rate_age_4` | | float | -| `epidata[].rate_overall` | | float | -| `message` | `success` or error message | string | +| Field | Description | Type | +|---|---|---| +| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | +| `epidata` | list of results | array of objects | +| `epidata[].release_date` | the date when this record was first published by the CDC | string | +| `epidata[].location` | the name of the catchment (e.g. 'network_all', 'CA', 'NY_albany' | string | +| `epidata[].issue` | the epiweek of publication (e.g. issue 201453 includes epiweeks up to and including 2014w53, but not 2015w01 or following) | integer | +| `epidata[].epiweek` | the epiweek during which the data was collected | integer | +| `epidata[].lag` | number of weeks between `epiweek` and `issue` | integer | +| `epidata[].rate_age_0` | hospitalization rate for ages 0-4 | float | +| `epidata[].rate_age_1` | hospitalization rate for ages 5-17 | float | +| `epidata[].rate_age_2` | hospitalization rate for ages 18-49 | float | +| `epidata[].rate_age_3` | hospitalization rate for ages 50-64 | float | +| `epidata[].rate_age_4` | hospitalization rate for ages 65+ | float | +| `epidata[].rate_overall` | overall hospitalization rate | float | +| `epidata[].rate_age_5` | hospitalization rate for ages 65-74 | float | +| `epidata[].rate_age_6` | hospitalization rate for ages 75-84 | float | +| `epidata[].rate_age_7` | hospitalization rate for ages 85+ | float | +| `epidata[].rate_age_18t29` | hospitalization rate for ages 18 to 29 | float | +| `epidata[].rate_age_30t39` | hospitalization rate for ages 30 to 39 | float | +| `epidata[].rate_age_40t49` | hospitalization rate for ages 40 to 49 | float | +| `epidata[].rate_age_5t11` | hospitalization rate for ages 5 to 11 | float | +| `epidata[].rate_age_12t17` | hospitalization rate for ages 12 to 17 | float | +| `epidata[].rate_age_lt18` | hospitalization rate for ages <18 | float | +| `epidata[].rate_age_gte18` | hospitalization rate for ages >=18 | float | +| `epidata[].rate_race_white` | hospitalization rate for white people | float | +| `epidata[].rate_race_black` | hospitalization rate for black people | float | +| `epidata[].rate_race_hisp` | hospitalization rate for Hispanic/Latino people | float | +| `epidata[].rate_race_asian` | hospitalization rate for Asian people | float | +| `epidata[].rate_race_natamer` | hospitalization rate for American Indian/Alaskan Native people | float | +| `epidata[].rate_sex_male` | hospitalization rate for males | float | +| `epidata[].rate_sex_female` | hospitalization rate for females | float | +| `epidata[].season` | indicates the start and end years of the winter flu season in the format YYYY-YY (e.g. 2022-23 indicates the flu season running late 2022 through early 2023) | string | +| `message` | `success` or error message | string | Notes: * The `flusurv` age groups are, in general, not the same as the ILINet diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index b0073be4c..39e963646 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -19,42 +19,9 @@ === Data Dictionary === ======================= -`flusurv` is the table where US flu hospitalization rates are stored. -+--------------+-------------+------+-----+---------+----------------+ -| Field | Type | Null | Key | Default | Extra | -+--------------+-------------+------+-----+---------+----------------+ -| id | int(11) | NO | PRI | NULL | auto_increment | -| release_date | date | NO | MUL | NULL | | -| issue | int(11) | NO | MUL | NULL | | -| epiweek | int(11) | NO | MUL | NULL | | -| location | varchar(32) | NO | MUL | NULL | | -| lag | int(11) | NO | MUL | NULL | | -| rate_age_0 | double | YES | | NULL | | -| rate_age_1 | double | YES | | NULL | | -| rate_age_2 | double | YES | | NULL | | -| rate_age_3 | double | YES | | NULL | | -| rate_age_4 | double | YES | | NULL | | -| rate_overall | double | YES | | NULL | | -| rate_age_5 | double | YES | | NULL | | -| rate_age_6 | double | YES | | NULL | | -| rate_age_7 | double | YES | | NULL | | -+--------------+-------------+------+-----+---------+----------------+ -id: unique identifier for each record -release_date: the date when this record was first published by the CDC -issue: the epiweek of publication (e.g. issue 201453 includes epiweeks up to - and including 2014w53, but not 2015w01 or following) -epiweek: the epiweek during which the data was collected -location: the name of the catchment (e.g. 'network_all', 'CA', 'NY_albany') -lag: number of weeks between `epiweek` and `issue` -rate_age_0: hospitalization rate for ages 0-4 -rate_age_1: hospitalization rate for ages 5-17 -rate_age_2: hospitalization rate for ages 18-49 -rate_age_3: hospitalization rate for ages 50-64 -rate_age_4: hospitalization rate for ages 65+ -rate_overall: overall hospitalization rate -rate_age_5: hospitalization rate for ages 65-74 -rate_age_6: hospitalization rate for ages 75-84 -rate_age_7: hospitalization rate for ages 85+ +`flusurv` is the table where US flu hospitalization rates are stored. See +`strc/ddl/fluview.sql` for the `flusurv` schema. See `docs/api/flusurv.md` for +field descriptions. ================= === Changelog === diff --git a/src/ddl/fluview.sql b/src/ddl/fluview.sql index 11f10c9dc..adcddc66e 100644 --- a/src/ddl/fluview.sql +++ b/src/ddl/fluview.sql @@ -329,7 +329,7 @@ CREATE TABLE `fluview_public` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; /* -`flusurv` stores FluSurv-NET data (flu hospitaliation rates) as published by +`flusurv` stores FluSurv-NET data (flu hospitalization rates) as published by CDC. Data is public. @@ -345,25 +345,40 @@ Note that the flusurv age groups are, in general, not the same as the ILINet particular "catchment" (e.g. 'network_all', 'CA', 'NY_albany') rather than by regions and states in general. -+--------------+-------------+------+-----+---------+----------------+ -| Field | Type | Null | Key | Default | Extra | -+--------------+-------------+------+-----+---------+----------------+ -| id | int(11) | NO | PRI | NULL | auto_increment | -| release_date | date | NO | MUL | NULL | | -| issue | int(11) | NO | MUL | NULL | | -| epiweek | int(11) | NO | MUL | NULL | | -| location | varchar(32) | NO | MUL | NULL | | -| lag | int(11) | NO | MUL | NULL | | -| rate_age_0 | double | YES | | NULL | | -| rate_age_1 | double | YES | | NULL | | -| rate_age_2 | double | YES | | NULL | | -| rate_age_3 | double | YES | | NULL | | -| rate_age_4 | double | YES | | NULL | | -| rate_overall | double | YES | | NULL | | -| rate_age_5 | double | YES | | NULL | | -| rate_age_6 | double | YES | | NULL | | -| rate_age_7 | double | YES | | NULL | | -+--------------+-------------+------+-----+---------+----------------+ ++-------------------+-------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+-------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| release_date | date | NO | MUL | NULL | | +| issue | int(11) | NO | MUL | NULL | | +| epiweek | int(11) | NO | MUL | NULL | | +| location | varchar(32) | NO | MUL | NULL | | +| lag | int(11) | NO | MUL | NULL | | +| rate_age_0 | double | YES | | NULL | | +| rate_age_1 | double | YES | | NULL | | +| rate_age_2 | double | YES | | NULL | | +| rate_age_3 | double | YES | | NULL | | +| rate_age_4 | double | YES | | NULL | | +| rate_overall | double | YES | | NULL | | +| rate_age_5 | double | YES | | NULL | | +| rate_age_6 | double | YES | | NULL | | +| rate_age_7 | double | YES | | NULL | | +| rate_age_18t29 | double | YES | | NULL | | +| rate_age_30t39 | double | YES | | NULL | | +| rate_age_40t49 | double | YES | | NULL | | +| rate_age_5t11 | double | YES | | NULL | | +| rate_age_12t17 | double | YES | | NULL | | +| rate_age_lt18 | double | YES | | NULL | | +| rate_age_gte18 | double | YES | | NULL | | +| rate_race_white | double | YES | | NULL | | +| rate_race_black | double | YES | | NULL | | +| rate_race_hisp | double | YES | | NULL | | +| rate_race_asian | double | YES | | NULL | | +| rate_race_natamer | double | YES | | NULL | | +| rate_sex_male | double | YES | | NULL | | +| rate_sex_female | double | YES | | NULL | | +| season | char(7) | YES | | NULL | | ++-------------------+-------------+------+-----+---------+----------------+ */ CREATE TABLE `flusurv` ( @@ -382,6 +397,21 @@ CREATE TABLE `flusurv` ( `rate_age_5` double DEFAULT NULL, `rate_age_6` double DEFAULT NULL, `rate_age_7` double DEFAULT NULL, + `rate_age_18t29` double DEFAULT NULL, + `rate_age_30t39` double DEFAULT NULL, + `rate_age_40t49` double DEFAULT NULL, + `rate_age_5t11` double DEFAULT NULL, + `rate_age_12t17` double DEFAULT NULL, + `rate_age_lt18` double DEFAULT NULL, + `rate_age_gte18` double DEFAULT NULL, + `rate_race_white` double DEFAULT NULL, + `rate_race_black` double DEFAULT NULL, + `rate_race_hisp` double DEFAULT NULL, + `rate_race_asian` double DEFAULT NULL, + `rate_race_natamer` double DEFAULT NULL, + `rate_sex_male` double DEFAULT NULL, + `rate_sex_female` double DEFAULT NULL, + `season` char(7) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `issue` (`issue`,`epiweek`,`location`), KEY `release_date` (`release_date`), diff --git a/src/ddl/migrations/flusurv_age_sex_race_strata.sql b/src/ddl/migrations/flusurv_age_sex_race_strata.sql new file mode 100644 index 000000000..b6717e8b0 --- /dev/null +++ b/src/ddl/migrations/flusurv_age_sex_race_strata.sql @@ -0,0 +1,18 @@ +-- Add new age, race, and sex strata, and season descriptor (YYYY-YY format) +ALTER TABLE `flusurv` ADD ( + `rate_age_18t29` double DEFAULT NULL, + `rate_age_30t39` double DEFAULT NULL, + `rate_age_40t49` double DEFAULT NULL, + `rate_age_5t11` double DEFAULT NULL, + `rate_age_12t17` double DEFAULT NULL, + `rate_age_lt18` double DEFAULT NULL, + `rate_age_gte18` double DEFAULT NULL, + `rate_race_white` double DEFAULT NULL, + `rate_race_black` double DEFAULT NULL, + `rate_race_hisp` double DEFAULT NULL, + `rate_race_asian` double DEFAULT NULL, + `rate_race_natamer` double DEFAULT NULL, + `rate_sex_male` double DEFAULT NULL, + `rate_sex_female` double DEFAULT NULL, + `season` char(7) DEFAULT NULL, +); diff --git a/src/server/endpoints/flusurv.py b/src/server/endpoints/flusurv.py index 08b2a14d9..1feea4834 100644 --- a/src/server/endpoints/flusurv.py +++ b/src/server/endpoints/flusurv.py @@ -19,7 +19,7 @@ def handle(): # basic query info q = QueryBuilder("flusurv", "fs") - fields_string = ["release_date", "location"] + fields_string = ["release_date", "location", "season"] fields_int = ["issue", "epiweek", "lag"] fields_float = [ "rate_age_0", @@ -28,6 +28,27 @@ def handle(): "rate_age_3", "rate_age_4", "rate_overall", + + "rate_age_5", + "rate_age_6", + "rate_age_7", + + "rate_age_18t29", + "rate_age_30t39", + "rate_age_40t49", + "rate_age_5t11", + "rate_age_12t17", + "rate_age_lt18", + "rate_age_gte18", + + "rate_race_white", + "rate_race_black", + "rate_race_hisp", + "rate_race_asian", + "rate_race_natamer", + + "rate_sex_male", + "rate_sex_female", ] q.set_fields(fields_string, fields_int, fields_float) q.set_sort_order("epiweek", "location", "issue") From 40212bb84ff3c94938f59b8b318403437a9edc94 Mon Sep 17 00:00:00 2001 From: george Date: Tue, 7 May 2024 20:20:25 -0400 Subject: [PATCH 38/38] Revert "Update flusurv schema and docs with new age, sex, and race groups" (#1426) This reverts commit 9de7d643138f7ef24ea26500c644fbcab1f3827b. --- docs/api/flusurv.md | 50 +++++-------- src/acquisition/flusurv/flusurv_update.py | 39 ++++++++++- src/ddl/fluview.sql | 70 ++++++------------- .../flusurv_age_sex_race_strata.sql | 18 ----- src/server/endpoints/flusurv.py | 23 +----- 5 files changed, 73 insertions(+), 127 deletions(-) delete mode 100644 src/ddl/migrations/flusurv_age_sex_race_strata.sql diff --git a/docs/api/flusurv.md b/docs/api/flusurv.md index 92acd2e5d..b33f5c22d 100644 --- a/docs/api/flusurv.md +++ b/docs/api/flusurv.md @@ -52,40 +52,22 @@ If neither is specified, the current issues are used. ## Response -| Field | Description | Type | -|---|---|---| -| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | -| `epidata` | list of results | array of objects | -| `epidata[].release_date` | the date when this record was first published by the CDC | string | -| `epidata[].location` | the name of the catchment (e.g. 'network_all', 'CA', 'NY_albany' | string | -| `epidata[].issue` | the epiweek of publication (e.g. issue 201453 includes epiweeks up to and including 2014w53, but not 2015w01 or following) | integer | -| `epidata[].epiweek` | the epiweek during which the data was collected | integer | -| `epidata[].lag` | number of weeks between `epiweek` and `issue` | integer | -| `epidata[].rate_age_0` | hospitalization rate for ages 0-4 | float | -| `epidata[].rate_age_1` | hospitalization rate for ages 5-17 | float | -| `epidata[].rate_age_2` | hospitalization rate for ages 18-49 | float | -| `epidata[].rate_age_3` | hospitalization rate for ages 50-64 | float | -| `epidata[].rate_age_4` | hospitalization rate for ages 65+ | float | -| `epidata[].rate_overall` | overall hospitalization rate | float | -| `epidata[].rate_age_5` | hospitalization rate for ages 65-74 | float | -| `epidata[].rate_age_6` | hospitalization rate for ages 75-84 | float | -| `epidata[].rate_age_7` | hospitalization rate for ages 85+ | float | -| `epidata[].rate_age_18t29` | hospitalization rate for ages 18 to 29 | float | -| `epidata[].rate_age_30t39` | hospitalization rate for ages 30 to 39 | float | -| `epidata[].rate_age_40t49` | hospitalization rate for ages 40 to 49 | float | -| `epidata[].rate_age_5t11` | hospitalization rate for ages 5 to 11 | float | -| `epidata[].rate_age_12t17` | hospitalization rate for ages 12 to 17 | float | -| `epidata[].rate_age_lt18` | hospitalization rate for ages <18 | float | -| `epidata[].rate_age_gte18` | hospitalization rate for ages >=18 | float | -| `epidata[].rate_race_white` | hospitalization rate for white people | float | -| `epidata[].rate_race_black` | hospitalization rate for black people | float | -| `epidata[].rate_race_hisp` | hospitalization rate for Hispanic/Latino people | float | -| `epidata[].rate_race_asian` | hospitalization rate for Asian people | float | -| `epidata[].rate_race_natamer` | hospitalization rate for American Indian/Alaskan Native people | float | -| `epidata[].rate_sex_male` | hospitalization rate for males | float | -| `epidata[].rate_sex_female` | hospitalization rate for females | float | -| `epidata[].season` | indicates the start and end years of the winter flu season in the format YYYY-YY (e.g. 2022-23 indicates the flu season running late 2022 through early 2023) | string | -| `message` | `success` or error message | string | +| Field | Description | Type | +|--------------------------|-----------------------------------------------------------------|------------------| +| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | +| `epidata` | list of results | array of objects | +| `epidata[].release_date` | | string | +| `epidata[].location` | | string | +| `epidata[].issue` | | integer | +| `epidata[].epiweek` | | integer | +| `epidata[].lag` | | integer | +| `epidata[].rate_age_0` | | float | +| `epidata[].rate_age_1` | | float | +| `epidata[].rate_age_2` | | float | +| `epidata[].rate_age_3` | | float | +| `epidata[].rate_age_4` | | float | +| `epidata[].rate_overall` | | float | +| `message` | `success` or error message | string | Notes: * The `flusurv` age groups are, in general, not the same as the ILINet diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 39e963646..b0073be4c 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -19,9 +19,42 @@ === Data Dictionary === ======================= -`flusurv` is the table where US flu hospitalization rates are stored. See -`strc/ddl/fluview.sql` for the `flusurv` schema. See `docs/api/flusurv.md` for -field descriptions. +`flusurv` is the table where US flu hospitalization rates are stored. ++--------------+-------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++--------------+-------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| release_date | date | NO | MUL | NULL | | +| issue | int(11) | NO | MUL | NULL | | +| epiweek | int(11) | NO | MUL | NULL | | +| location | varchar(32) | NO | MUL | NULL | | +| lag | int(11) | NO | MUL | NULL | | +| rate_age_0 | double | YES | | NULL | | +| rate_age_1 | double | YES | | NULL | | +| rate_age_2 | double | YES | | NULL | | +| rate_age_3 | double | YES | | NULL | | +| rate_age_4 | double | YES | | NULL | | +| rate_overall | double | YES | | NULL | | +| rate_age_5 | double | YES | | NULL | | +| rate_age_6 | double | YES | | NULL | | +| rate_age_7 | double | YES | | NULL | | ++--------------+-------------+------+-----+---------+----------------+ +id: unique identifier for each record +release_date: the date when this record was first published by the CDC +issue: the epiweek of publication (e.g. issue 201453 includes epiweeks up to + and including 2014w53, but not 2015w01 or following) +epiweek: the epiweek during which the data was collected +location: the name of the catchment (e.g. 'network_all', 'CA', 'NY_albany') +lag: number of weeks between `epiweek` and `issue` +rate_age_0: hospitalization rate for ages 0-4 +rate_age_1: hospitalization rate for ages 5-17 +rate_age_2: hospitalization rate for ages 18-49 +rate_age_3: hospitalization rate for ages 50-64 +rate_age_4: hospitalization rate for ages 65+ +rate_overall: overall hospitalization rate +rate_age_5: hospitalization rate for ages 65-74 +rate_age_6: hospitalization rate for ages 75-84 +rate_age_7: hospitalization rate for ages 85+ ================= === Changelog === diff --git a/src/ddl/fluview.sql b/src/ddl/fluview.sql index adcddc66e..11f10c9dc 100644 --- a/src/ddl/fluview.sql +++ b/src/ddl/fluview.sql @@ -329,7 +329,7 @@ CREATE TABLE `fluview_public` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; /* -`flusurv` stores FluSurv-NET data (flu hospitalization rates) as published by +`flusurv` stores FluSurv-NET data (flu hospitaliation rates) as published by CDC. Data is public. @@ -345,40 +345,25 @@ Note that the flusurv age groups are, in general, not the same as the ILINet particular "catchment" (e.g. 'network_all', 'CA', 'NY_albany') rather than by regions and states in general. -+-------------------+-------------+------+-----+---------+----------------+ -| Field | Type | Null | Key | Default | Extra | -+-------------------+-------------+------+-----+---------+----------------+ -| id | int(11) | NO | PRI | NULL | auto_increment | -| release_date | date | NO | MUL | NULL | | -| issue | int(11) | NO | MUL | NULL | | -| epiweek | int(11) | NO | MUL | NULL | | -| location | varchar(32) | NO | MUL | NULL | | -| lag | int(11) | NO | MUL | NULL | | -| rate_age_0 | double | YES | | NULL | | -| rate_age_1 | double | YES | | NULL | | -| rate_age_2 | double | YES | | NULL | | -| rate_age_3 | double | YES | | NULL | | -| rate_age_4 | double | YES | | NULL | | -| rate_overall | double | YES | | NULL | | -| rate_age_5 | double | YES | | NULL | | -| rate_age_6 | double | YES | | NULL | | -| rate_age_7 | double | YES | | NULL | | -| rate_age_18t29 | double | YES | | NULL | | -| rate_age_30t39 | double | YES | | NULL | | -| rate_age_40t49 | double | YES | | NULL | | -| rate_age_5t11 | double | YES | | NULL | | -| rate_age_12t17 | double | YES | | NULL | | -| rate_age_lt18 | double | YES | | NULL | | -| rate_age_gte18 | double | YES | | NULL | | -| rate_race_white | double | YES | | NULL | | -| rate_race_black | double | YES | | NULL | | -| rate_race_hisp | double | YES | | NULL | | -| rate_race_asian | double | YES | | NULL | | -| rate_race_natamer | double | YES | | NULL | | -| rate_sex_male | double | YES | | NULL | | -| rate_sex_female | double | YES | | NULL | | -| season | char(7) | YES | | NULL | | -+-------------------+-------------+------+-----+---------+----------------+ ++--------------+-------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++--------------+-------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| release_date | date | NO | MUL | NULL | | +| issue | int(11) | NO | MUL | NULL | | +| epiweek | int(11) | NO | MUL | NULL | | +| location | varchar(32) | NO | MUL | NULL | | +| lag | int(11) | NO | MUL | NULL | | +| rate_age_0 | double | YES | | NULL | | +| rate_age_1 | double | YES | | NULL | | +| rate_age_2 | double | YES | | NULL | | +| rate_age_3 | double | YES | | NULL | | +| rate_age_4 | double | YES | | NULL | | +| rate_overall | double | YES | | NULL | | +| rate_age_5 | double | YES | | NULL | | +| rate_age_6 | double | YES | | NULL | | +| rate_age_7 | double | YES | | NULL | | ++--------------+-------------+------+-----+---------+----------------+ */ CREATE TABLE `flusurv` ( @@ -397,21 +382,6 @@ CREATE TABLE `flusurv` ( `rate_age_5` double DEFAULT NULL, `rate_age_6` double DEFAULT NULL, `rate_age_7` double DEFAULT NULL, - `rate_age_18t29` double DEFAULT NULL, - `rate_age_30t39` double DEFAULT NULL, - `rate_age_40t49` double DEFAULT NULL, - `rate_age_5t11` double DEFAULT NULL, - `rate_age_12t17` double DEFAULT NULL, - `rate_age_lt18` double DEFAULT NULL, - `rate_age_gte18` double DEFAULT NULL, - `rate_race_white` double DEFAULT NULL, - `rate_race_black` double DEFAULT NULL, - `rate_race_hisp` double DEFAULT NULL, - `rate_race_asian` double DEFAULT NULL, - `rate_race_natamer` double DEFAULT NULL, - `rate_sex_male` double DEFAULT NULL, - `rate_sex_female` double DEFAULT NULL, - `season` char(7) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `issue` (`issue`,`epiweek`,`location`), KEY `release_date` (`release_date`), diff --git a/src/ddl/migrations/flusurv_age_sex_race_strata.sql b/src/ddl/migrations/flusurv_age_sex_race_strata.sql deleted file mode 100644 index b6717e8b0..000000000 --- a/src/ddl/migrations/flusurv_age_sex_race_strata.sql +++ /dev/null @@ -1,18 +0,0 @@ --- Add new age, race, and sex strata, and season descriptor (YYYY-YY format) -ALTER TABLE `flusurv` ADD ( - `rate_age_18t29` double DEFAULT NULL, - `rate_age_30t39` double DEFAULT NULL, - `rate_age_40t49` double DEFAULT NULL, - `rate_age_5t11` double DEFAULT NULL, - `rate_age_12t17` double DEFAULT NULL, - `rate_age_lt18` double DEFAULT NULL, - `rate_age_gte18` double DEFAULT NULL, - `rate_race_white` double DEFAULT NULL, - `rate_race_black` double DEFAULT NULL, - `rate_race_hisp` double DEFAULT NULL, - `rate_race_asian` double DEFAULT NULL, - `rate_race_natamer` double DEFAULT NULL, - `rate_sex_male` double DEFAULT NULL, - `rate_sex_female` double DEFAULT NULL, - `season` char(7) DEFAULT NULL, -); diff --git a/src/server/endpoints/flusurv.py b/src/server/endpoints/flusurv.py index 1feea4834..08b2a14d9 100644 --- a/src/server/endpoints/flusurv.py +++ b/src/server/endpoints/flusurv.py @@ -19,7 +19,7 @@ def handle(): # basic query info q = QueryBuilder("flusurv", "fs") - fields_string = ["release_date", "location", "season"] + fields_string = ["release_date", "location"] fields_int = ["issue", "epiweek", "lag"] fields_float = [ "rate_age_0", @@ -28,27 +28,6 @@ def handle(): "rate_age_3", "rate_age_4", "rate_overall", - - "rate_age_5", - "rate_age_6", - "rate_age_7", - - "rate_age_18t29", - "rate_age_30t39", - "rate_age_40t49", - "rate_age_5t11", - "rate_age_12t17", - "rate_age_lt18", - "rate_age_gte18", - - "rate_race_white", - "rate_race_black", - "rate_race_hisp", - "rate_race_asian", - "rate_race_natamer", - - "rate_sex_male", - "rate_sex_female", ] q.set_fields(fields_string, fields_int, fields_float) q.set_sort_order("epiweek", "location", "issue")