### Credit to: https://github.com/alipphardt/cdc-wonder-api/blob/master/CDC%2BWONDER%2BAPI%2BExample.ipynb

In [3]:
import requests

# BeautifulSoup library facilitates parsing of XML response
import bs4 as bs

# This library faciliates 2-dimensional array operations and visualization
import pandas as pd

In [70]:
county_codes = pd.read_csv('county_fips_code_by_socio.csv')

In [89]:
def createParameterList(parameterList):
    """Helper function to create a parameter list from a dictionary object"""
    
    parameterString = ""
    
    for key in parameterList:
        parameterString += "<parameter>\n"
        parameterString += "<name>" + key + "</name>\n"
        
        if isinstance(parameterList[key], list):
            for value in parameterList[key]:
                parameterString += "<value>" + value + "</value>\n"
                #parameterString += value + "\n"
        else:
            parameterString += "<value>" + parameterList[key] + "</value>\n"
        
        parameterString += "</parameter>\n"
        
    return parameterString

In [10]:
# measures to return, the default measures plus any optional measures

# For this example, include deaths, population, and crude rate

m_parameters = {
    "M_1": "D157.M1",   # Deaths, must be included
    "M_2": "D157.M2",   # Population, must be included
    "M_3": "D157.M3",   # Crude rate, must be included
    #"M_31": "D157.M31",        # Standard error (crude rate)
    #"M_32": "D157.M32"         # 95% confidence interval (crude rate)
    #"M_41": "D157.M41", # Standard error (age-adjusted rate)
    #"M_42": "D157.M42"  # 95% confidence interval (age-adjusted rate)
}

In [134]:
# by-variables" or those parameters selected in the "Group Results By" and the "And By" drop-down lists 
# in the "Request Form." These "by-variables" are the cross-tabulations, stratifications or indexes 
# to the query results. Expect the results data table to show a row for each category in the by-variables, 
# and a column for each measure. For example, if you wish to compare data by sex, then "group results by" gender, 
# to get a row for females and a row for males in the output.
# M_ are measures to return, the default measures plus any optional measures.


b_parameters = {
    "B_1": "D157.V10-level1", # Census Region
    "B_2": "D157.V1-level1", # Year
    "B_3": "D157.V7", # Gender
    "B_4": "D157.V42", # Single Race 6
    "B_5": "D157.V51" # Five Year Age Group
}

In [12]:
# values for non-standard age adjusted rates (see mortality online databases).

# For this example, these parameters are ignored as standard age adjusted rates are used

vm_parameters = {
    "VM_D157.M6_D157.V1_S": "*All*",       
    "VM_D157.M6_D157.V7": "*All*",   
    "VM_D157.M6_D157.V17": "*All*", 
    "VM_D157.M6_D157.V42": "*All*",   
    "VM_D157.M6_D157.V20": ""
}

In [94]:
fips_to_drop = [51560]

In [95]:
#[str(i) for i in list(county_codes['1']) if i not in fips_to_drop]
county_codes_list = []

for i in list(county_codes['1']):
    if i not in fips_to_drop:
        code_to_add = str(i)
        if len(code_to_add) == 4:
            code_to_add = "0" + code_to_add
        county_codes_list.append(code_to_add)

In [126]:
# variable values to limit in the "where" clause of the query, found in multiple select 
# list boxes and advanced finder text entry boxes in the "Request Form."

# For this example, we want to include ten-year age groups for ages 15-44.
# For all other categories, include all values

v_parameters = {
    "V_D157.V9": county_codes_list, # Census Regions = County FIPs
    "V_D157.V10": "",        # Census Regions
    "V_D157.V27": "",
    "V_D157.V30": "",
    "V_D157.V31": "",
    "V_D157.V19": "*All*",
    "V_D157.V11": "*All*",
    "V_D157.V5": "*All*",
    "V_D157.V51": "*All*",
    "V_D157.V51": "*All*",
    "V_D157.V6": "00",
    "V_D157.V7": "*All*",
    "V_D157.V17": "*All*",
    "V_D157.V42": "00",
    "V_D157.V43": "*All*",
    "V_D157.V44": "*All*",
    "V_D157.V1": "",
    "V_D157.V24": "*All*",
    "V_D157.V20": "*All*",
    "V_D157.V21": "*All*",
    "V_D157.V2": "",
    "V_D157.V4": "*All*",
    "V_D157.V12": "*All*",
    "V_D157.V22": "",
    "V_D157.V23": "*All*",
    "V_D157.V25": "",
    "V_D157.V13": ["T40.0 (Opium)","T40.1 (Heroin)","T40.2 (Other opioids)","T40.3 (Methadone)",
                   "T40.4 (Other synthetic narcotics)","T40.6 (Other and unspecified narcotics)"],
    "V_D157.V13_AND": "",
    "V_D157.V15": "",
    "V_D157.V15_AND": "",
    "V_D157.V16": "",
    "V_D157.V16_AND": "",
    "V_D157.V26": "",
    "V_D157.V26_AND": "",
}

In [15]:
# values highlighted in a "Finder" control for hierarchical lists, 
# such as the "Regions/Divisions/States/Counties hierarchical" list.

# For this example, include all years, months, census regions, hhs regions, states. Only include ICD-10 K00-K92
# for disease of the digestive system

f_parameters = {
    "F_D157.V1": ["*All*"], # year/month
    "F_D157.V10": ["*All*"], # Census Regions - dont change
    "F_D157.V2": ["*All*"], # ICD-10 Codes
    "F_D157.V27": ["*All*"], # HHS Regions - dont change
    "F_D157.V9": ["*All*"], # State County - dont change
    "F_D157.V30": ["*All*"], 
    "F_D157.V31": ["*All*"], 
    "F_D157.V25": ["*All*"],
    "F_D157.V13": ["*All*"],
    "F_D157.V26": ["*All*"],
}

In [136]:
# other parameters, such as radio buttons, checkboxes, and lists that are not data categories

# For this example, include age-adjusted rates, use ten-year age groups (D76.V5), use state location by default, 
# show rates per 100,000, use 2013 urbanization and use ICD-10 Codes (D76.V2) for cause of death category

o_parameters = {
    "O_javascript": "on",     # Set to on by default
    "O_aar": "aar_none",       # age-adjusted rates
    "O_aar_pop": "0000",      # population selection for age-adjusted rates
    "O_title": "",    # title for data run
    "O_oc-sect1-request": "close",
    "O_rate_per": "100000",   # rates calculated per X persons
    "O_location": "D157.V9",   # select location variable to use (e.g. state/county, census, hhs regions)
    "O_V9_fmode": "fadv",     # Use regular finder and ignore v parameter value
    "O_V10_fmode": "fadv",    # Use regular finder and ignore v parameter value
    "O_V27_fmode": "fadv",    # Use regular finder and ignore v parameter value
    "O_V30_fmode": "fadv",    # Use regular finder and ignore v parameter value
    "O_V31_fmode": "fadv",    # Use regular finder and ignore v parameter value
    "O_urban": "D157.V19",      # select urbanization category
    "O_age": "D157.V51",        # select age-group (e.g. ten-year, five-year, single-year, infant groups)
    "O_race": "D157.V42",        # 
    "O_V1_fmode": "freg",     # Use regular finder and ignore v parameter value
    "O_ucd": "D157.V2",        # select underlying cause of death category
    "O_V2_fmode": "fadv",     # Use regular finder and ignore v parameter value
    "O_V25_fmode": "freg",     # Use regular finder and ignore v parameter value
    "O_mcd": "D157.V13",        # select underlying cause of death category
    "O_V13_fmode": "fadv",     # Use regular finder and ignore v parameter value
    "O_V15_fmode": "fadv",    # Use regular finder and ignore v parameter value
    "O_V16_fmode": "fadv",    # Use regular finder and ignore v parameter value    
    "O_V26_fmode": "fadv",    # Use regular finder and ignore v parameter value 
    "O_precision": "1",       # decimal places
    "O_show_totals": "false",  # Show totals for 
    "O_timeout": "600",
}

In [137]:
# contents of the "Currently selected" information areas next to "Finder" controls in the "Request Form."

# For this example, include all dates, census regions, hhs regions, and states.
# Only include ICD-10 code K00-K92 for disease of the digestive system

i_parameters = {
    "I_D157.V9": "*All* (The United States)", # State County - dont change
    "I_D157.V10": "*All* (The United States)", # Census Regions - dont change
    "I_D157.V27": "*All* (The United States)", # HHS Regions - dont change
    "I_D157.V30": "*All* (The United States)",
    "I_D157.V31": "*All* (The United States)",
    "I_D157.V1": "*All* (All Dates)",  # year/month
    "I_D157.V2": "*All* (All Causes of Death)", # ICD-10 Codes
    "I_D157.V25": "All Causes of Death", # ICD-10 Codes
}

In [138]:
# Miscellaneous hidden inputs/parameters usually passed by web form. These do not change.
misc_parameters = {
    "action-Send": "Send",
    "finder-stage-D157.V26": "codeset",
    "finder-stage-D157.V16": "codeset",
    "finder-stage-D157.V15": "codeset",
    "finder-stage-D157.V13": "codeset",
    "finder-stage-D157.V25": "codeset",
    "finder-stage-D157.V2": "codeset",
    "finder-stage-D157.V1": "codeset",
    "finder-stage-D157.V31": "codeset",
    "finder-stage-D157.V30": "codeset",
    "finder-stage-D157.V27": "codeset",
    "finder-stage-D157.V10": "codeset",
    "finder-stage-D157.V9": "codeset",
    "stage": "request",
    "dataset_code": "D157",
    "dataset_label": "Multiple Cause of Death, 2018-2021, Single+Race",
    "dataset_vintage": "2021"
}

In [139]:
xml_request = "<request-parameters>\n"
xml_request += createParameterList(b_parameters)
xml_request += createParameterList(m_parameters)
xml_request += createParameterList(f_parameters)
xml_request += createParameterList(i_parameters)
xml_request += createParameterList(o_parameters)
xml_request += createParameterList(vm_parameters)
xml_request += createParameterList(v_parameters)
xml_request += createParameterList(misc_parameters)
xml_request += "</request-parameters>"

In [140]:
xml_request_file = "Multiple Cause of Death, 2018-2021, Single Race_1699829475331-reqV2.xml"
f = open(xml_request_file, "r")
#f.read()

In [141]:
#xml_request

In [142]:
url = "https://wonder.cdc.gov/controller/datarequest/D157" # D157 Multiple Cause of Death
response = requests.post(url, data={"request_xml": xml_request, "accept_datause_restrictions": "true"})

if response.status_code == 200:
    data = response.text
else:
    print(response)
    print(response.content)

<Response [500]>
b'<?xml version="1.0"?>\r\n<page>\r\n<platform>prod</platform>\r\n<title>Processing Error</title>\r\n<message>Age Adjusted Rates cannot be produced when the data is grouped by Age. #Age-Adjusted Rates Hints#</message>\r\n</page>'


In [4]:
def xml2df(xml_data):
    """ This function grabs the root of the XML document and iterates over
        the 'r' (row) and 'c' (column) tags of the data-table
        Rows with a 'v' attribute contain a numerical value
        Rows with a 'l attribute contain a text label and may contain an
        additional 'r' (rowspan) tag which identifies how many rows the value
        should be added. If present, that label will be added to the following
        rows of the data table.
    
        Function returns a two-dimensional array or data frame that may be 
        used by the pandas library."""
    
    root = bs.BeautifulSoup(xml_data,"lxml")
    all_records = []
    row_number = 0
    rows = root.find_all("r")
    
    for row in rows:
        if row_number >= len(all_records):
            all_records.append([])
              
        for cell in row.find_all("c"):
            if 'v' in cell.attrs:
                try:
                    all_records[row_number].append(float(cell.attrs["v"].replace(',','')))
                except ValueError:
                    all_records[row_number].append(cell.attrs["v"])
            else:
                if 'r' not in cell.attrs:
                    all_records[row_number].append(cell.attrs["l"])
                else:
                
                    for row_index in range(int(cell.attrs["r"])):
                        if (row_number + row_index) >= len(all_records):
                            all_records.append([])
                            all_records[row_number + row_index].append(cell.attrs["l"])
                        else:
                            all_records[row_number + row_index].append(cell.attrs["l"])
                                           
        row_number += 1
    return all_records