In [4]:
from dotenv import load_dotenv
import os
import requests
import pandas as pd

In [5]:
class PlacesClient:
    def __init__(self, token):
        self.base_url = 'https://data.cdc.gov/api/v3/views/'
        self.session = requests.Session()
        self.session.headers.update({
            'X-App-Token': token
        })

    def _make_request(self, url, params=None):
        """
        Make a get request to the API and return responses in JSON
        """
        try:
            response = self.session.get(url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.HTTPError as e:
            print(f"API Error: {e}")
            raise
    
    def _json_to_df(self, data):
        """
        Transform JSON data into pandas DataFrame.
        """
        df = pd.DataFrame(data)
        # remove the API's metadata
        df = df.drop(
            [':id', ':version', ':created_at', ':updated_at', 'data_value_footnote_symbol', 'data_value_footnote'], 
            axis=1, errors='ignore'
            )
        # convert numeric variables
        numeric_cols = ['data_value', 'low_confidence_limit', 'high_confidence_limit', 'totalpopulation']
        for col in numeric_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col])
        return df

    def get_measure_list(self):
        """
        Retrieve the key information of all available measures 
        (all health outcomes and health risk behaviors measures).

        Returns
        -------
        measures_df: pandas Data Frame
            A dataframe displaying the following the information of filtered measures:
            - id: measure identifier
            - short_name: short label
            - full_name: full descriptive name
            - catgory: measure category (Health Outcomes or Health Risk Behaviors)

        Examples
        --------
        >>> measures = client.get_measure_list()
        >>> measures.head()
        """
        data_dictionary_id = 'm35w-spkz'
        url = self.base_url + data_dictionary_id + '/query.json'

        data = self._make_request(url)
        measures_df = self._json_to_df(data)
        measures_df = measures_df[measures_df['categoryid'].isin(['HLTHOUT', 'RISKBEH'])]
        measures_df = measures_df[['measureid', 'measure_short_name', 'measure_full_name', 'category_name']]
        measures_df.columns = pd.Index(['id', 'short_name', 'full_name', 'category'])
        return measures_df
    
    def get_county_data(self, release='2025'):
        """
        Retrieve county-level health-risk behaviors and health outcomes data from The CDC PLACES API.
        
        Parameters
        ----------
        release : string
            The version of release to retrieve from.

        Returns
        -------
        county_df : pandas DataFrame
            A dataframe containing information of county-level PLACES data
        
        Examples
        --------
        >>> df = client.get_county_data('2023')
        >>> df.head()
        """
        release_ids = {
            '2025': 'swc5-untb',
            '2024': 'fu4u-a9bh',
            '2023': 'h3ej-a9ec',
            '2022': 'duw2-7jbt',
            '2021': 'pqpp-u99h',
            '2020': 'dv4u-3x3q'
        }
        
        if not isinstance(release, str):
            raise TypeError("The release must be a string.")
        elif release not in release_ids:
            raise ValueError("This release version is not supported.")
        else:
            url = self.base_url + release_ids[release] + '/query.json'

        data = self._make_request(url)
        county_df = self._json_to_df(data)
        
        # Filter measures categorized as health outcomes and health risk behaviors
        county_df = county_df[county_df['categoryid'].isin(['HLTHOUT', 'RISKBEH'])]
        county_df = county_df.reset_index(drop=True)
        return county_df

    def filter_by_measures(self, df, measures=None, categories=None, measure_ids=None):
        """
        Get a subset of a PLACES DataFrame by measures or categories. 
        Both the short names and ids of measures are supported.
        
        Parameters
        ----------
        df : pandas DataFrame
            The dataframe to subset from.
        measures: list of strings
            Short names of measures to keep.
        categories: list of strings
            Short names of categories to keep.
        measure_ids: list of strings
            ids of measures to keep.


        Returns
        -------
        sub_df : pandas DataFrame
            A dataframe containing only selected measures and/or categories.
        
        Examples
        --------
        >>> new_df = client.filter_by_measures(df, measures=['Physical Inactivity','Current Asthma'])
        >>> new_df = client.filter_by_measures(df, categories=['Health Outcomes'])
        """
        sub_df = df
        if measures:
            sub_df = sub_df[sub_df['short_question_text'].isin(measures)]
        if categories:
            sub_df = sub_df[sub_df['category'].isin(categories)]
        if measure_ids:
            sub_df = sub_df[sub_df['measureid'].isin(measure_ids)]
        return sub_df



In [6]:
load_dotenv()
token = os.getenv('CDC_API_TOKEN')

client = PlacesClient(token=token)

In [8]:
# test get_county_data
places_2024 = client.get_county_data('2024')

In [9]:
places_2024.columns

Index(['year', 'stateabbr', 'statedesc', 'locationname', 'datasource',
       'category', 'measure', 'data_value_unit', 'data_value_type',
       'data_value', 'low_confidence_limit', 'high_confidence_limit',
       'totalpopulation', 'totalpop18plus', 'locationid', 'categoryid',
       'measureid', 'datavaluetypeid', 'short_question_text', 'geolocation'],
      dtype='object')

In [10]:
places_2024.tail(3)

Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,low_confidence_limit,high_confidence_limit,totalpopulation,totalpop18plus,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
100369,2021,WI,Wisconsin,Polk,BRFSS,Health Outcomes,High blood pressure among adults,%,Crude prevalence,34.3,29.9,38.8,45709,36755,55095,HLTHOUT,BPHIGH,CrdPrv,High Blood Pressure,"{'type': 'Point', 'coordinates': [-92.44127559..."
100370,2022,WI,Wisconsin,Trempealeau,BRFSS,Health Outcomes,Depression among adults,%,Age-adjusted prevalence,24.5,20.9,28.2,30899,23116,55121,HLTHOUT,DEPRESSION,AgeAdjPrv,Depression,"{'type': 'Point', 'coordinates': [-91.35842148..."
100371,2022,WY,Wyoming,Fremont,BRFSS,Health Risk Behaviors,Binge drinking among adults,%,Crude prevalence,16.4,13.5,19.5,39472,29818,56013,RISKBEH,BINGE,CrdPrv,Binge Drinking,"{'type': 'Point', 'coordinates': [-108.6304546..."


In [11]:
# test get_measure_list()
client.get_measure_list()

Unnamed: 0,id,short_name,full_name,category
0,ARTHRITIS,Arthritis,Arthritis among adults,Health Outcomes
1,BPHIGH,High Blood Pressure,High blood pressure among adults,Health Outcomes
2,CANCER,Cancer (non-skin) or melanoma,Cancer (non-skin) or melanoma among adults,Health Outcomes
3,CASTHMA,Current Asthma,Current asthma among adults,Health Outcomes
4,CHD,Coronary Heart Disease,Coronary heart disease among adults,Health Outcomes
5,COPD,COPD,Chronic obstructive pulmonary disease among ad...,Health Outcomes
6,DEPRESSION,Depression,Depression among adults,Health Outcomes
7,DIABETES,Diabetes,Diagnosed diabetes among adults,Health Outcomes
8,HIGHCHOL,High Cholesterol,High cholesterol among adults who have ever be...,Health Outcomes
9,KIDNEY,Chronic Kidney Disease,Chronic kidney disease among adults aged >=18 ...,Health Outcomes


In [12]:
sub_df = client.filter_by_measures(places_2024, measure_ids=['COPD', 'CASTHMA'])
sub_df.tail()

Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,low_confidence_limit,high_confidence_limit,totalpopulation,totalpop18plus,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
100316,2022,WI,Wisconsin,Winnebago,BRFSS,Health Outcomes,Current asthma among adults,%,Age-adjusted prevalence,11.1,9.8,12.4,170718,136750,55139,HLTHOUT,CASTHMA,AgeAdjPrv,Current Asthma,"{'type': 'Point', 'coordinates': [-88.64469436..."
100318,2022,WI,Wisconsin,Iron,BRFSS,Health Outcomes,Chronic obstructive pulmonary disease among ad...,%,Age-adjusted prevalence,6.0,5.3,6.7,6224,5259,55051,HLTHOUT,COPD,AgeAdjPrv,COPD,"{'type': 'Point', 'coordinates': [-90.24211599..."
100346,2022,WI,Wisconsin,Juneau,BRFSS,Health Outcomes,Chronic obstructive pulmonary disease among ad...,%,Crude prevalence,9.0,8.1,10.0,26866,21737,55057,HLTHOUT,COPD,CrdPrv,COPD,"{'type': 'Point', 'coordinates': [-90.11373312..."
100352,2022,WI,Wisconsin,Milwaukee,BRFSS,Health Outcomes,Chronic obstructive pulmonary disease among ad...,%,Crude prevalence,6.6,5.9,7.2,918661,702428,55079,HLTHOUT,COPD,CrdPrv,COPD,"{'type': 'Point', 'coordinates': [-87.96694912..."
100363,2022,WI,Wisconsin,Milwaukee,BRFSS,Health Outcomes,Current asthma among adults,%,Crude prevalence,11.7,10.5,12.9,918661,702428,55079,HLTHOUT,CASTHMA,CrdPrv,Current Asthma,"{'type': 'Point', 'coordinates': [-87.96694912..."
