In [1]:
from dotenv import load_dotenv
import os
import requests
import pandas as pd

In [None]:
class PlacesClient:
    def __init__(self, token):
        self.base_url = 'https://data.cdc.gov/api/v3/views/'
        self.session = requests.Session()
        self.session.headers.update({
            'X-App-Token': token
        })

    def _make_request(self, url, params=None):
        """
        Make a get request to the API and return responses in JSON
        """
        try:
            response = self.session.get(url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.HTTPError as e:
            print(f"API Error: {e}")
            raise
    
    def _json_to_df(self, data):
        """
        Transform JSON data into pandas DataFrame.
        """
        df = pd.DataFrame(data)
        # remove the API's metadata
        df = df.drop([':id', ':version', ':created_at', ':updated_at'], axis=1, errors='ignore')
        # convert numeric variables
        numeric_cols = ['data_value', 'low_confidence_limit', 'high_confidence_limit', 'totalpopulation']
        for col in numeric_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col])
        return df

    def get_measure_list(self):
        """
        Queries PLACES measures metadata and display key information of all measures filtered by this package
        (those categorized as health outcomes or health risk behaviors).

        Returns
        -------
        measures_df: pandas Data Frame
            A dataframe displaying the following the information of filtered measures:
            - id: measure identifier
            - short_name: short label
            - full_name: full descriptive name
            - catgory: measure category (Health Outcomes or Health Risk Behaviors)

        Examples
        --------
        >>> measures = client.get_meansure_list()
        >>> measures.head()
        """
        data_dictionary_id = 'm35w-spkz'
        url = self.base_url + data_dictionary_id + '/query.json'

        data = self._make_request(url)
        measures_df = self._json_to_df(data)
        measures_df = measures_df[measures_df['categoryid'].isin(['HLTHOUT', 'RISKBEH'])]
        measures_df = measures_df[['measureid', 'measure_short_name', 'measure_full_name', 'category_name']]
        measures_df.columns = pd.Index(['id', 'short_name', 'full_name', 'category'])
        return measures_df
    
    def get_county_data(self, release='2025'):
        """
        Retrieve county-level health-risk behaviors and health outcomes data from The CDC PLACES API.
        
        Parameters
        ----------
        release : string
            The version of release to retrieve from.

        Returns
        -------
        county_df : pandas DataFrame
            A dataframe containing information of county-level PLACES data
        
        Examples
        --------
        >>> df = client.get_county_data('2023')
        >>> df.head()
        """
        release_ids = {
            '2025': 'swc5-untb',
            '2024': 'fu4u-a9bh',
            '2023': 'h3ej-a9ec',
            '2022': 'duw2-7jbt',
            '2021': 'pqpp-u99h',
            '2020': 'dv4u-3x3q'
        }
        
        if not isinstance(release, str):
            raise TypeError("The release must be a string.")
        elif release not in release_ids:
            raise ValueError("This release version is not supported.")
        else:
            url = self.base_url + release_ids[release] + '/query.json'

        data = self._make_request(url)
        county_df = self._json_to_df(data)
        
        # Filter measures categorized as health outcomes and health risk behaviors
        county_df = county_df[county_df['category'].isin(['Health Outcomes', 'Health Risk Behaviors'])]
        county_df = county_df.reset_index(drop=True)
        return county_df

    def filter_by_measures(self, df, measures=None, categories=None, measure_ids=None, cat_ids=None):
        """
        Get a subset of a PLACES DataFrame by measures or categories. 
        Both short names and ids of measures and categories are supported.
        
        Parameters
        ----------
        df : pandas DataFrame
            The dataframe to subset from.
        measures: list of strings
            Short names of measures to keep.
        categories: list of strings
            Short names of categories to keep.
        measure_ids: list of strings
            ids of measures to keep.
        cat_ids: list of strings
            ids of categories to keep.


        Returns
        -------
        sub_df : pandas DataFrame
            A dataframe containing only selected measures and/or categories.
        
        Examples
        --------
        >>> new_df = client.filter_by_measures(df, measures=['Physical Inactivity','Current Asthma'])
        >>> new_df = client.filter_by_measures(df, cat_ids=['HLTHOUT'])
        """
        sub_df = df
        if measures:
            sub_df = sub_df[sub_df['short_question_text'].isin(measures)]
        if categories:
            sub_df = sub_df[sub_df['category'].isin(categories)]
        if measure_ids:
            sub_df = sub_df[sub_df['measureid'].isin(measure_ids)]
        if cat_ids:
            sub_df = sub_df[sub_df['categoryid'].isin(cat_ids)]
        return sub_df



In [65]:
load_dotenv()
token = os.getenv('CDC_API_TOKEN')

client = PlacesClient(token=token)

In [62]:
# test get_county_data
places_2023 = client.get_county_data('2023')

In [71]:
places_2023.columns

Index(['year', 'stateabbr', 'statedesc', 'locationname', 'datasource',
       'category', 'measure', 'data_value_unit', 'data_value_type',
       'data_value', 'low_confidence_limit', 'high_confidence_limit',
       'totalpopulation', 'locationid', 'categoryid', 'measureid',
       'datavaluetypeid', 'short_question_text', 'geolocation'],
      dtype='object')

In [61]:
# test get_measure_list()
client.get_measure_list()

Unnamed: 0,id,short_name,full_name,category
0,ARTHRITIS,Arthritis,Arthritis among adults,Health Outcomes
1,BPHIGH,High Blood Pressure,High blood pressure among adults,Health Outcomes
2,CANCER,Cancer (non-skin) or melanoma,Cancer (non-skin) or melanoma among adults,Health Outcomes
3,CASTHMA,Current Asthma,Current asthma among adults,Health Outcomes
4,CHD,Coronary Heart Disease,Coronary heart disease among adults,Health Outcomes
5,COPD,COPD,Chronic obstructive pulmonary disease among ad...,Health Outcomes
6,DEPRESSION,Depression,Depression among adults,Health Outcomes
7,DIABETES,Diabetes,Diagnosed diabetes among adults,Health Outcomes
8,HIGHCHOL,High Cholesterol,High cholesterol among adults who have ever be...,Health Outcomes
9,KIDNEY,Chronic Kidney Disease,Chronic kidney disease among adults aged >=18 ...,Health Outcomes


In [69]:
sub_df = client.filter_by_measures(places_2023, cat_ids=['HLTHOUT'])
sub_df.tail()

Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,low_confidence_limit,high_confidence_limit,totalpopulation,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
104880,2021,UT,Utah,Millard,BRFSS,Health Outcomes,Chronic obstructive pulmonary disease among ad...,%,Age-adjusted prevalence,6.0,4.9,7.3,13164,49027,HLTHOUT,COPD,AgeAdjPrv,COPD,"{'type': 'Point', 'coordinates': [-113.133092,..."
104881,2021,RI,Rhode Island,Bristol,BRFSS,Health Outcomes,Current asthma among adults aged >=18 years,%,Crude prevalence,10.7,9.4,12.2,50818,44001,HLTHOUT,CASTHMA,CrdPrv,Current Asthma,"{'type': 'Point', 'coordinates': [-71.2866874,..."
104882,2021,TX,Texas,Brown,BRFSS,Health Outcomes,High cholesterol among adults aged >=18 years ...,%,Crude prevalence,40.4,35.3,45.5,38192,48049,HLTHOUT,HIGHCHOL,CrdPrv,High Cholesterol,"{'type': 'Point', 'coordinates': [-98.998456, ..."
104883,2021,TX,Texas,Grayson,BRFSS,Health Outcomes,Chronic kidney disease among adults aged >=18 ...,%,Age-adjusted prevalence,2.8,2.6,3.2,139336,48181,HLTHOUT,KIDNEY,AgeAdjPrv,Chronic Kidney Disease,"{'type': 'Point', 'coordinates': [-96.675693, ..."
104885,2020,WI,Wisconsin,La Crosse,BRFSS,Health Outcomes,All teeth lost among adults aged >=65 years,%,Crude prevalence,8.1,5.0,11.8,120433,55063,HLTHOUT,TEETHLOST,CrdPrv,All Teeth Lost,"{'type': 'Point', 'coordinates': [-91.1117584,..."
