In [9]:
# Import packages/libs
import pandas as pd
import numpy as np

In [10]:
# Read in food access data
food_df = pd.read_csv('FoodAccessResearchAtlasData2019.csv')

# Filter out census tracts with no data or non numeric data
for key in food_df.keys():
    food_df = food_df[food_df[key].notna()]
    food_df[food_df[key].apply(lambda x: str(x).isnumeric())]
    
# Convert all values to floats
keys = ['poverty_count', 'la_kids_count', 'la_novehicle_count', 'la_snap_count']
for key in keys:
    food_df[key] = food_df[key].apply(float)

# Display first 5 rows
food_df.head()

Unnamed: 0,state,county,name,area_name,population,poverty_count,la_kids_count,la_novehicle_count,la_snap_count
0,AL,Autauga,"Autauga, AL",Autauga County,1912,216.06,507.0,5.0,92.0
1,AL,Autauga,"Autauga, AL",Autauga County,2170,388.43,606.0,93.0,161.0
2,AL,Autauga,"Autauga, AL",Autauga County,3373,505.95,771.0,39.0,139.0
3,AL,Autauga,"Autauga, AL",Autauga County,4386,122.81,847.0,19.0,84.0
4,AL,Autauga,"Autauga, AL",Autauga County,10766,1636.43,2309.0,164.0,235.0


In [21]:
# Group census tracts by county and sum up data
food_df_2 = food_df.groupby(['name', 'area_name'], as_index=False).agg({
    'population':sum,
    'poverty_count':sum,
    'la_kids_count':sum,
    'la_novehicle_count':sum,
    'la_snap_count':sum
})

# Calculate population-based percentages
food_df_2['poverty_%'] = food_df_2['poverty_count'] / food_df_2['population'] * 100
food_df_2['la_kids_%'] = food_df_2['la_kids_count'] / food_df_2['population'] * 100
food_df_2['la_novehicle_%'] = food_df_2['la_novehicle_count'] / food_df_2['population'] * 100
food_df_2['la_snap_%'] = food_df_2['la_snap_count'] / food_df_2['population'] * 100

# Delete count data (not needed anymore)
for key in ['poverty_count', 'la_kids_count', 'la_novehicle_count', 'la_snap_count']:
    del food_df_2[key]

# Display first 5 rows
food_df_2.head()

Unnamed: 0,name,area_name,population,poverty_%,la_kids_%,la_novehicle_%,la_snap_%
0,"Abbeville, SC",Abbeville County,25417,22.331943,20.702679,3.58815,7.365149
1,"Acadia, LA",Acadia Parish,61773,24.331844,19.438913,1.622068,4.354653
2,"Accomack, VA",Accomack County,33164,19.809492,19.54529,4.212399,5.590399
3,"Ada, ID",Ada County,388468,11.162155,20.634647,0.842798,1.891533
4,"Adair, IA",Adair County,7682,9.22377,15.842229,1.210622,3.709971


In [22]:
# Read in county centroid locations
centroid_df = pd.read_csv('county_centroids.csv')

# Display first 5 rows
centroid_df.head()

Unnamed: 0,state,county,name,lat,lon
0,AL,Autauga,"Autauga, AL",32.536382,-86.64449
1,AL,Baldwin,"Baldwin, AL",30.659218,-87.746067
2,AL,Barbour,"Barbour, AL",31.87067,-85.405456
3,AL,Bibb,"Bibb, AL",33.015893,-87.127148
4,AL,Blount,"Blount, AL",33.977448,-86.567246


In [23]:
# Merge centroids with food insecurity data
df = pd.merge(centroid_df, food_df_2, on=['name','name'])
del df['county']
df.head()

Unnamed: 0,state,name,lat,lon,area_name,population,poverty_%,la_kids_%,la_novehicle_%,la_snap_%
0,AL,"Autauga, AL",32.536382,-86.64449,Autauga County,54571,15.137619,24.337102,1.914937,4.284327
1,AL,"Baldwin, AL",30.659218,-87.746067,Baldwin County,182265,10.947884,21.001289,1.194963,2.760815
2,AL,"Barbour, AL",31.87067,-85.405456,Barbour County,27457,29.354554,18.002695,2.702407,8.143643
3,AL,"Bibb, AL",33.015893,-87.127148,Bibb County,22915,13.833472,21.051713,1.924504,3.805368
4,AL,"Blount, AL",33.977448,-86.567246,Blount County,57322,14.623862,23.695963,1.434004,3.42277


In [27]:
# Export total statistics
df.to_csv("./food_insecurity_counties.csv")
df.to_json("./food_insecurity_counties.json", orient="records")

In [25]:
### Export stats as individual factors for mapbox tileset use

poverty_df = df.loc[:, ['lat', 'lon', 'poverty_%']]
poverty_df.to_csv("./poverty_%.csv")

la_kids_df = df.loc[:, ['lat', 'lon', 'la_kids_%']]
la_kids_df.to_csv("./la_kids_%.csv")

la_novehicle_df = df.loc[:, ['lat', 'lon', 'la_novehicle_%']]
la_novehicle_df.to_csv("./la_novehicle_%.csv")

la_snap_df = df.loc[:, ['lat', 'lon', 'la_snap_%']]
la_snap_df.to_csv("./la_snap_%.csv")