### Merging food access and health data

In [None]:
import pandas as pd

In [None]:
# reading food data
food = pd.read_excel('clean_food_access.xlsx', index_col=0)
food.head()

In [None]:
# which years are available?
food['year'].unique()

array([2010, 2011, 2012, 2013, 2014, 2015, 2016, 2007, 2018], dtype=int64)

In [None]:
# read health data
health = pd.read_excel('health_data_clean.xlsx')
health.head()

Unnamed: 0,FIPS,state,county,year,mortality,lower_95,upper_95,disease
0,1001,Alabama,Autauga County,1980,552.68,521.37,586.71,Cardiovascular diseases
1,1003,Alabama,Baldwin County,1980,445.67,423.09,467.49,Cardiovascular diseases
2,1005,Alabama,Barbour County,1980,515.53,484.52,547.21,Cardiovascular diseases
3,1007,Alabama,Bibb County,1980,548.69,513.57,584.89,Cardiovascular diseases
4,1009,Alabama,Blount County,1980,473.26,445.19,501.18,Cardiovascular diseases


In [None]:
# renaming join column fips to be the same in both dataframes
health = health.rename(columns={'FIPS': 'fips'})
health.head()

Unnamed: 0,fips,state,county,year,mortality,lower_95,upper_95,disease
0,1001,Alabama,Autauga County,1980,552.68,521.37,586.71,Cardiovascular diseases
1,1003,Alabama,Baldwin County,1980,445.67,423.09,467.49,Cardiovascular diseases
2,1005,Alabama,Barbour County,1980,515.53,484.52,547.21,Cardiovascular diseases
3,1007,Alabama,Bibb County,1980,548.69,513.57,584.89,Cardiovascular diseases
4,1009,Alabama,Blount County,1980,473.26,445.19,501.18,Cardiovascular diseases


In [None]:
# which years are available?
health['year'].unique()

array([1980, 1985, 1990, 1995, 2000, 2005, 2010, 2014], dtype=int64)

In [None]:
# we only have overlapping data for 2010 and 2014 -> filter for those
new_health = health.loc[(health['year']==2014)|(health['year']==2010)]
new_health.head()

Unnamed: 0,fips,state,county,year,mortality,lower_95,upper_95,disease
18852,1001,Alabama,Autauga County,2010,316.82,299.06,333.37,Cardiovascular diseases
18853,1003,Alabama,Baldwin County,2010,279.98,268.54,291.74,Cardiovascular diseases
18854,1005,Alabama,Barbour County,2010,264.63,249.05,280.85,Cardiovascular diseases
18855,1007,Alabama,Bibb County,2010,374.27,352.26,398.64,Cardiovascular diseases
18856,1009,Alabama,Blount County,2010,307.45,291.0,324.49,Cardiovascular diseases


In [None]:
# merging datasets
df = food.merge(new_health,how='outer',on=['fips','year'])
df.sample(20)

Unnamed: 0,fips,state_x,county_x,indicator,value,year,state_y,county_y,mortality,lower_95,upper_95,disease
609877,51683,Virginia,Manassas city,FARM_TO_SCHOOL,0.5,2014,,,7.69,5.75,9.97,Atrial fibrillation & flutter
90726,13103,Georgia,Effingham County,FMRKT,0.0,2014,,,12.15,10.39,14.24,Other cardiovascular
65170,9013,Connecticut,Tolland County,LACCESS_LOWI,12233.122366,2014,,,2.39,1.91,2.92,Rheumatic heart disease
799928,17067,,,PCT_LACCESS_HHNV,3.86713,2010,,,12.45,11.25,13.79,Other cardiovascular
352322,31081,Nebraska,Hamilton County,PCT_LACCESS_SNAP,0.597901,2014,,,3.57,2.62,4.86,Aortic aneurysm
38098,5147,Arkansas,Woodruff County,FMRKT,0.0,2014,,,5.09,3.17,7.77,Atrial fibrillation & flutter
1208696,46029,,,PCT_LACCESS_LOWI,7.343803,2010,,,6.7,4.55,9.62,Atrial fibrillation & flutter
1344841,53011,,,LACCESS_POP,88355.908553,2010,,,34.43,32.68,36.52,Ischemic stroke
628695,54065,West Virginia,Morgan County,METRO,0.0,2014,,,159.66,142.63,176.83,Ischemic heart disease
321621,29129,Missouri,Mercer County,LACCESS_HHNV,112.376837,2014,,,3.08,2.11,4.42,Rheumatic heart disease


In [None]:
#understanding which variables we have left & their meaning

#fmrkt = # of farmers markets
#fmrktpth = # of farmers markets / 1000 pop
#lacess_hhnv = households, no car and low access to store
#laccess_lowi = low income and low access to store
#laccess_pop = population low access to store
#laccess_snap = SNAP households (Supplemental Nutrition Assistance Program)
#medhhinc = median household income
#metro = metro/non metro counties
#pct_laccess_hhnv = % households, no car and low access to store
#pct_laccess_lowi = % low income and low access to store
#pct_laccess_pop = % population low access to store
#pct_laccess_snap = % SNAP households, low access to store
#pov_rate = poverty rate

df.indicator.unique()

array(['CHILDPOVRATE', 'FARM_TO_SCHOOL', 'FMRKT', 'FMRKTPTH',
       'LACCESS_HHNV', 'LACCESS_LOWI', 'LACCESS_POP', 'LACCESS_SNAP',
       'MEDHHINC', 'METRO', 'PCT_LACCESS_HHNV', 'PCT_LACCESS_LOWI',
       'PCT_LACCESS_POP', 'PCT_LACCESS_SNAP', 'POVRATE',
       'Population_Estimate', 'Census_Population', 'PCT_NHWHITE',
       'PCT_NHBLACK', 'PCT_HISP', 'PCT_NHASIAN', 'PCT_NHNA', 'PCT_NHPI',
       'PCT_65OLDER', 'PCT_18YOUNGER', 'PERPOV', 'PERCHLDPOV', 'POPLOSS',
       nan], dtype=object)

In [None]:
# saving data
df.to_csv('food_health_merged_2010_2014.csv')

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a805a7fe-fe3d-44c2-a936-f271a2a4c12f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>