# Tutorial of the places_client package

### 1. Import the package

In [1]:
from places_client.places_client import PlacesClient

### 2. Create a client for the API
`PlacesClient()` initializes a PlacesClient object with the API token you provided.

In [2]:
# if the api token is stored as "CDC_API_TOKEN" in the .env file
import os
from dotenv import load_dotenv

load_dotenv()
token = os.getenv('CDC_API_TOKEN')
client = PlacesClient(token)

### 3. Get an overview of supported measures
`get_measure_list()` returns a table listing the id, short name, full name, and category of each supported measures (Health Outcomes or Health Risk Behaviors measures).

In [None]:
measures = client.get_measure_list()

Unnamed: 0,id,short_name,full_name,category
0,ARTHRITIS,Arthritis,Arthritis among adults,Health Outcomes
4,CHD,Coronary Heart Disease,Coronary heart disease among adults,Health Outcomes
8,HIGHCHOL,High Cholesterol,High cholesterol among adults who have ever be...,Health Outcomes
12,TEETHLOST,All Teeth Lost,All teeth lost among adults aged >=65 years,Health Outcomes
16,SLEEP,Short Sleep Duration,Short sleep duration among adults,Health Risk Behaviors


### 4. Retrieve a certain release of filtered county-level PLACES data
`get_county_data()` gets one specific release (2020, 2021, 2022, 2023, 2024, or 2025) of county-level PLACES data, and it automatically filters the dataset to keep only “Health Outcomes” and “Health Risk Behaviors” measures.

In [4]:
# get the 2024 release of county-level PLACES data
places_24 = client.get_county_data(release="2024")
places_24.tail()

Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,low_confidence_limit,high_confidence_limit,totalpopulation,totalpop18plus,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
100367,2022,WI,Wisconsin,Fond du Lac,BRFSS,Health Outcomes,Stroke among adults,%,Crude prevalence,3.7,3.3,4.0,103836,82265,55039,HLTHOUT,STROKE,CrdPrv,Stroke,"{'type': 'Point', 'coordinates': [-88.48834337..."
100368,2021,WI,Wisconsin,Iron,BRFSS,Health Outcomes,High cholesterol among adults who have ever be...,%,Age-adjusted prevalence,30.1,25.4,35.0,6224,5259,55051,HLTHOUT,HIGHCHOL,AgeAdjPrv,High Cholesterol,"{'type': 'Point', 'coordinates': [-90.24211599..."
100369,2021,WI,Wisconsin,Polk,BRFSS,Health Outcomes,High blood pressure among adults,%,Crude prevalence,34.3,29.9,38.8,45709,36755,55095,HLTHOUT,BPHIGH,CrdPrv,High Blood Pressure,"{'type': 'Point', 'coordinates': [-92.44127559..."
100370,2022,WI,Wisconsin,Trempealeau,BRFSS,Health Outcomes,Depression among adults,%,Age-adjusted prevalence,24.5,20.9,28.2,30899,23116,55121,HLTHOUT,DEPRESSION,AgeAdjPrv,Depression,"{'type': 'Point', 'coordinates': [-91.35842148..."
100371,2022,WY,Wyoming,Fremont,BRFSS,Health Risk Behaviors,Binge drinking among adults,%,Crude prevalence,16.4,13.5,19.5,39472,29818,56013,RISKBEH,BINGE,CrdPrv,Binge Drinking,"{'type': 'Point', 'coordinates': [-108.6304546..."


In [17]:
places_24.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100372 entries, 0 to 100371
Data columns (total 20 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   year                   100372 non-null  object 
 1   stateabbr              100372 non-null  object 
 2   statedesc              100372 non-null  object 
 3   locationname           100340 non-null  object 
 4   datasource             100372 non-null  object 
 5   category               100372 non-null  object 
 6   measure                100372 non-null  object 
 7   data_value_unit        100372 non-null  object 
 8   data_value_type        100372 non-null  object 
 9   data_value             100372 non-null  float64
 10  low_confidence_limit   100372 non-null  float64
 11  high_confidence_limit  100372 non-null  float64
 12  totalpopulation        100372 non-null  int64  
 13  totalpop18plus         100372 non-null  object 
 14  locationid             100372 non-nu

### 5. Filter Data
`filter_by_measures()` and `filter_by_regions()` allows
#### By measures or categories
To filter by measures, provide either the short names or the ids of measures as a list of strings. 
 
To filter by categories, provide "Health Outcomes" and/or "Health Risk Behaviors" as a list of strings.

In [5]:
# filter by measure short names
sub_by_measure = client.filter_by_measures(places_24, measures=["Coronary Heart Disease", "Current Asthma"])
# OR by measure ids
sub_by_measure = client.filter_by_measures(places_24, measures=["CHD", "CASTHMA"])
print(sub_by_measure['measureid'].unique())

['CASTHMA' 'CHD']


In [6]:
# filter by category
sub_by_category = client.filter_by_measures(places_24, categories=['Health Outcomes'])
print(sub_by_category['category'].unique())
print(sub_by_category['measureid'].unique())

['Health Outcomes']
['CASTHMA' 'ARTHRITIS' 'STROKE' 'OBESITY' 'BPHIGH' 'DIABETES' 'DEPRESSION'
 'TEETHLOST' 'COPD' 'CHD' 'HIGHCHOL' 'CANCER']


#### By states or counties
To filter by states, provide either the state abbreviations (eg. CA), or the state names (eg. California) as a list of strings.   

To filter by counties, provide their locationids as a list of strings. PLACES datasets use Federal Information Processing Standards (FIPS) codes as locationids, and you can look up for the FIPS code of a specific county here: https://www.census.gov/library/reference/code-lists/ansi.2020.html#cou

In [7]:
# Filter by state abbrevations
sub_by_state = client.filter_by_regions(places_24, states=['WI', 'CA'])
# OR by state names
sub_by_state = client.filter_by_regions(places_24, states=['Wisconsin', 'California'])
print(sub_by_state['statedesc'].unique())

['California' 'Wisconsin']


In [8]:
# Filter by county names
sub_by_county = client.filter_by_regions(places_24, counties=['06071', '36001', '01009'])
print(sub_by_county['locationname'].unique())

['San Bernardino' 'Blount' 'Albany']


### 6. Create a pivot table of measure values
`create_pivot_table()` converts the PLACES DataFrame into a wide pivot table, where each measure becomes a column. You can aggregate at the county level or at the state level.

In [9]:
# create a county-level pivot table
county_table = client.create_pivot_table(places_24, level='county')
county_table.head()

Unnamed: 0_level_0,locationname,statedesc,ARTHRITIS,BINGE,BPHIGH,CANCER,CASTHMA,CHD,COPD,CSMOKING,DEPRESSION,DIABETES,HIGHCHOL,LPA,OBESITY,SLEEP,STROKE,TEETHLOST
locationid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1001,Autauga,Alabama,32.1,16.0,38.85,7.7,10.0,6.8,7.3,15.45,24.05,12.15,33.7,26.3,38.65,37.5,3.45,11.65
1003,Baldwin,Alabama,31.2,16.95,35.55,8.8,9.8,7.1,7.15,14.35,24.3,11.65,35.35,24.6,36.8,35.6,3.25,11.15
1005,Barbour,Alabama,34.45,12.95,46.5,7.05,11.2,9.35,10.75,21.4,23.15,17.45,36.1,35.6,43.55,43.8,5.55,27.75
1007,Bibb,Alabama,31.8,15.05,40.7,7.8,10.75,8.15,9.8,21.45,26.25,14.2,36.1,32.55,41.4,41.0,4.3,21.55
1009,Blount,Alabama,31.8,16.15,37.95,8.45,10.55,8.25,9.65,19.3,27.4,12.8,36.85,30.35,37.3,37.05,3.9,16.45


In [10]:
# create a state-level wide table
state_table = client.create_pivot_table(places_24, level='state')
state_table.head()

Unnamed: 0_level_0,ARTHRITIS,BINGE,BPHIGH,CANCER,CASTHMA,CHD,COPD,CSMOKING,DEPRESSION,DIABETES,HIGHCHOL,LPA,OBESITY,SLEEP,STROKE,TEETHLOST
statedesc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Alabama,33.361194,14.379851,41.770149,7.660448,10.91791,8.269403,9.447761,18.762687,24.989552,15.019403,35.558955,31.458955,41.220149,41.163433,4.558209,19.263433
Alaska,24.4,19.513333,31.181667,6.878333,11.048333,7.288333,7.333333,20.483333,20.056667,10.673333,30.106667,25.12,34.458333,37.13,3.991667,17.373333
Arizona,26.796667,17.443333,31.28,8.073333,11.096667,7.92,8.32,15.996667,22.346667,13.393333,31.673333,26.266667,34.546667,36.67,4.21,17.226667
Arkansas,32.684667,15.863333,40.826,8.058,11.316,8.808,9.908667,21.438,27.503333,14.935333,35.471333,34.180667,39.608,40.413333,4.511333,22.021333
California,23.277586,19.008621,29.368103,7.547414,10.339655,6.327586,6.391379,13.15,22.387069,11.056897,32.775862,22.715517,30.214655,34.788793,3.377586,11.65


### 7. Explore the relationship between 2 measures
`get_correlation()` calculates the correlation r between two measures and returns key summary statistics. You can provide either the short name or the measureid of the measures.

In [11]:
corr = client.get_correlation(places_24, 'LPA', 'DEPRESSION')
corr

{'corr_coef': 0.20321713670955188,
 'sample_size': 1838,
 'mean_x': 26.86089867640032,
 'mean_y': 23.600384332489686}

### 8. Get the key descriptive statistics of a measure
`summarize_measure()` returns basic descriptive statistics (mean, median, min, max, standard deviation, count, and number of missing values) for one measure.

In [12]:
# summarize the depression measure
summary_dep = client.summarize_measure(places_24, 'DEPRESSION')
summary_dep

{'mean': 23.730476947535774,
 'median': 23.7,
 'min': 12.4,
 'max': 35.7,
 'std': 3.4438924987399977,
 'count': 6290.0,
 'missing_values_count': 0.0}