In [5]:
from pathlib import Path
import pandas as pd
import csv

[CDC Places Data Portal](https://chronicdata.cdc.gov/browse?category=500+Cities+%26+Places)

In [2]:
data_dir = Path(r'../DATA')
cdc_dir = data_dir / 'places'

In [3]:
%%time
dfs = []
for fobj in cdc_dir.glob('PLACES*.*'):
    print(f'loading: {fobj.name}')
    year = fobj.name.split('___')[-1].split('_')[0]
    print(f'datayear: {year}')
    df = pd.read_csv(fobj)
    n_rows, n_cols = df.shape
    print(f'loaded dataframe: {n_rows} rows, {n_cols} cols')
    df['year'] = year
    dfs.append(df)
cdc_df = pd.concat(dfs)

loading: PLACES__County_Data__GIS_Friendly_Format___2020_release.csv
datayear: 2020
loaded dataframe: 3142 rows, 118 cols
loading: PLACES__County_Data__GIS_Friendly_Format___2021_release.csv
datayear: 2021
loaded dataframe: 3142 rows, 126 cols
Wall time: 553 ms


In [23]:
SUFFIX = 'AdjPrev' ## age-adjusted prevelance

In [26]:
base_cols = ['StateAbbr', 'year', 'CountyName', 'CountyFIPS', 'Geolocation']

In [4]:
ref_dir = Path(r'../ref')

In [20]:
label_file = ref_dir / 'cdc_labels.csv'
labels = {}
with open(label_file, 'r') as csv_in:
    reader = csv.DictReader(csv_in)
    for row in reader:
        label_type = row['Type']
        if label_type not in labels.keys():
            labels[label_type] = []
        labels[label_type].append(row['Label'])

In [35]:
keep_cols = base_cols
for label in labels['Outcome']:
    if label not in ['DEPRESSION']:
        keep_cols.append(f'{label}_{SUFFIX}')

In [36]:
limited_df = cdc_df[keep_cols]

In [37]:
limited_df.head()

Unnamed: 0,StateAbbr,year,CountyName,CountyFIPS,Geolocation,ARTHRITIS_AdjPrev,BPHIGH_AdjPrev,CANCER_AdjPrev,CASTHMA_AdjPrev,CHD_AdjPrev,...,CANCER_AdjPrev.1,CASTHMA_AdjPrev.1,CHD_AdjPrev.1,COPD_AdjPrev,DIABETES_AdjPrev,HIGHCHOL_AdjPrev,KIDNEY_AdjPrev,OBESITY_AdjPrev,STROKE_AdjPrev,TEETHLOST_AdjPrev
0,AL,2020,Autauga,1001,POINT (-86.64301145 32.5350198),29.3,36.6,6.5,10.2,6.9,...,6.5,10.2,6.9,7.7,11.3,31.8,2.8,35.5,3.4,16.3
1,AL,2020,Baldwin,1003,POINT (-87.72275422 30.72811673),27.3,33.1,6.7,9.6,6.0,...,6.7,9.6,6.0,7.1,9.5,32.3,2.6,29.4,3.0,13.4
2,AL,2020,Barbour,1005,POINT (-85.39330307 31.86925212),33.2,45.8,6.1,11.7,9.0,...,6.1,11.7,9.0,10.5,16.9,34.4,3.9,40.8,5.1,27.1
3,AL,2020,Bibb,1007,POINT (-87.12653219 32.99854412),30.6,40.8,6.4,10.3,7.6,...,6.4,10.3,7.6,9.1,12.6,33.2,3.0,38.4,3.8,20.7
4,AL,2020,Blount,1009,POINT (-86.56784586 33.98070621),30.1,37.3,6.6,10.2,7.5,...,6.6,10.2,7.5,9.0,11.3,33.6,2.9,33.7,3.5,19.3


In [20]:
cdc_df['CountyName'].nunique()

1839

In [21]:
ct_df = cdc_df[cdc_df['StateAbbr'] == 'CT']

In [22]:
ct_df['CountyName'].value_counts()

Fairfield     2
Hartford      2
Litchfield    2
Middlesex     2
New Haven     2
New London    2
Tolland       2
Windham       2
Name: CountyName, dtype: int64

In [23]:
ct_df

Unnamed: 0,StateAbbr,StateDesc,CountyName,CountyFIPS,TotalPopulation,ACCESS2_CrudePrev,ACCESS2_Crude95CI,ACCESS2_AdjPrev,ACCESS2_Adj95CI,ARTHRITIS_CrudePrev,...,Geolocation,year,DEPRESSION_CrudePrev,DEPRESSION_Crude95CI,DEPRESSION_AdjPrev,DEPRESSION_Adj95CI,GHLTH_CrudePrev,GHLTH_Crude95CI,GHLTH_AdjPrev,GHLTH_Adj95CI
308,CT,Connecticut,Fairfield,9001,943823,10.0,"( 8.5, 11.6)",10.6,"( 9.0, 12.3)",23.4,...,POINT (-73.38942182 41.27166724),2020,,,,,,,,
309,CT,Connecticut,Hartford,9003,892697,9.8,"( 8.3, 11.5)",10.2,"( 8.6, 12.0)",26.5,...,POINT (-72.73276698 41.80653444),2020,,,,,,,,
310,CT,Connecticut,Litchfield,9005,181111,7.8,"( 6.3, 9.6)",8.6,"( 7.0, 10.5)",28.2,...,POINT (-73.24529195 41.79234069),2020,,,,,,,,
311,CT,Connecticut,Middlesex,9007,162682,7.3,"( 6.0, 8.9)",7.9,"( 6.4, 9.6)",28.2,...,POINT (-72.53513114 41.46333347),2020,,,,,,,,
312,CT,Connecticut,New Haven,9009,857620,10.7,"( 9.0, 12.3)",11.2,"( 9.5, 12.9)",27.3,...,POINT (-72.93263656 41.41052748),2020,,,,,,,,
313,CT,Connecticut,New London,9011,266784,9.3,"( 7.8, 10.8)",9.8,"( 8.3, 11.5)",28.5,...,POINT (-72.10184166 41.48674009),2020,,,,,,,,
314,CT,Connecticut,Tolland,9013,150921,7.5,"( 6.1, 9.3)",7.9,"( 6.4, 9.5)",23.6,...,POINT (-72.33645479 41.8545849),2020,,,,,,,,
315,CT,Connecticut,Windham,9015,117027,9.6,"( 7.9, 11.5)",10.1,"( 8.4, 12.1)",31.2,...,POINT (-71.98753068 41.82994523),2020,,,,,,,,
164,CT,Connecticut,Fairfield,9001,943332,13.5,"(11.3, 16.0)",14.2,"(11.8, 16.8)",20.2,...,POINT (-73.38942182 41.27166724),2021,13.4,"(12.7, 14.2)",13.5,"(12.8, 14.3)",14.0,"(12.1, 15.9)",13.3,"(11.6, 15.2)"
749,CT,Connecticut,New Haven,9009,854757,13.3,"(11.4, 15.3)",13.8,"(11.7, 15.9)",23.7,...,POINT (-72.93263656 41.41052748),2021,16.1,"(15.3, 16.9)",16.2,"(15.4, 17.1)",16.7,"(14.8, 18.7)",16.2,"(14.4, 18.1)"
