# Mapping out Grocery Stores

## Procedure:
- Import census data
- Import local grocery store data
- Use Folium to assign data to geographic regions and map it!


In [1]:
import pandas as pd
import numpy as np
import folium

### Import census data

In [2]:
# food stamp census data
food_df = pd.read_csv("ACS_14_5YR_S2201/ACS_14_5YR_S2201_with_ann.csv", header=[0,1], dtype={0:str, 1:str})

In [3]:
# household income census data
income_df = pd.read_csv("ACS_14_5YR_S1901/ACS_14_5YR_S1901_with_ann.csv", header=[0,1], dtype={0:str, 1:str})

In [4]:
# remove secondary column label
levels = income_df.columns.levels
labels = income_df.columns.labels
income_df.columns = levels[1][labels[1]]

In [5]:
income_df.head()

Unnamed: 0,Id,Id2,Geography,Households; Estimate; Total,Households; Margin of Error; Total,Families; Estimate; Total,Families; Margin of Error; Total,Married-couple families; Estimate; Total,Married-couple families; Margin of Error; Total,Nonfamily households; Estimate; Total,...,Nonfamily households; Estimate; PERCENT IMPUTED - Family income in the past 12 months,Nonfamily households; Margin of Error; PERCENT IMPUTED - Family income in the past 12 months,Households; Estimate; PERCENT IMPUTED - Nonfamily income in the past 12 months,Households; Margin of Error; PERCENT IMPUTED - Nonfamily income in the past 12 months,Families; Estimate; PERCENT IMPUTED - Nonfamily income in the past 12 months,Families; Margin of Error; PERCENT IMPUTED - Nonfamily income in the past 12 months,Married-couple families; Estimate; PERCENT IMPUTED - Nonfamily income in the past 12 months,Married-couple families; Margin of Error; PERCENT IMPUTED - Nonfamily income in the past 12 months,Nonfamily households; Estimate; PERCENT IMPUTED - Nonfamily income in the past 12 months,Nonfamily households; Margin of Error; PERCENT IMPUTED - Nonfamily income in the past 12 months
0,1400000US04019000100,4019000100,"Census Tract 1, Pima County, Arizona",319,50,48,38,34,31,271,...,(X),(X),(X),(X),(X),(X),(X),(X),13.7,(X)
1,1400000US04019000200,4019000200,"Census Tract 2, Pima County, Arizona",1916,189,914,182,452,145,1002,...,(X),(X),(X),(X),(X),(X),(X),(X),26.7,(X)
2,1400000US04019000300,4019000300,"Census Tract 3, Pima County, Arizona",680,86,244,54,109,54,436,...,(X),(X),(X),(X),(X),(X),(X),(X),22.5,(X)
3,1400000US04019000400,4019000400,"Census Tract 4, Pima County, Arizona",1719,97,395,101,253,78,1324,...,(X),(X),(X),(X),(X),(X),(X),(X),27.5,(X)
4,1400000US04019000500,4019000500,"Census Tract 5, Pima County, Arizona",1544,119,309,98,158,64,1235,...,(X),(X),(X),(X),(X),(X),(X),(X),30.8,(X)


In [6]:
median_income = income_df["Households; Estimate; Median income (dollars)"]
median_income.describe()

count       241
unique      240
top       27472
freq          2
Name: Households; Estimate; Median income (dollars), dtype: object

### Fix the median_income so that its a float

Have to pull out null values

In [7]:
income_df.ix[(income_df["Households; Estimate; Total"] > 0), "Households; Estimate; Median income (dollars)"].astype(float).describe()

count       240.000000
mean      50339.804167
std       21997.961866
min       13193.000000
25%       32084.750000
50%       45885.000000
75%       65922.500000
max      112596.000000
Name: Households; Estimate; Median income (dollars), dtype: float64

### Read in grocery store data

In [8]:
supermarkets=pd.read_csv("grocery_stores.csv")
supermarkets.head()

Unnamed: 0,lat,lon,name,addr
0,32.229253,-110.873651,Kimpo Market,5595 E 5th St
1,32.220195,-110.807966,Walmart Neighborhood Market,8640 E Broadway Blvd
2,32.118384,-110.798278,Safeway,9050 E Valencia Rd
3,32.25693,-110.943687,India Dukaan,2754 N Campbell Ave
4,32.193137,-110.841855,Walmart Neighborhood Market,2550 S Kolb Rd


In [9]:
# folium stuff

# geojson shape file of Arizona census tracts
state_geo = "arizona.json"

# initialize map
mp = folium.Map(location=[32.2,-110.94], zoom_start=11)

# map data to geo_json
mp.geo_json(geo_path=state_geo, data=income_df.ix[(income_df["Households; Estimate; Total"] > 0)]
            ,data_out="median_income.json", columns=["Id2", "Households; Estimate; Median income (dollars)"]
            ,key_on="feature.properties.GEOID"
            ,fill_color='YlGn'
            ,fill_opacity=0.7
            ,line_opacity=0.2 
            ,threshold_scale= np.logspace(np.log10(15000), np.log10(125000), 6).tolist()
            ,legend_name='Median Income')

# plot the supermarkets on the map
for i,row in supermarkets.iterrows():
    mp.circle_marker(location=[str(row["lat"]), str(row["lon"])], popup=row["name"], radius=100, fill_color="red", )

# generate the HTML/Javascript
mp.create_map(path='arizona.html')