# Data Setup For Maps



In [1]:
import os.path
from census import Census
from us import states
import geopandas as gpd
import pandas as pd
import zipfile

In [2]:
def build_bg_fips(record):
    fips_code = record['state'] + record['county'] + record['tract'] + record['block group']
    return str(fips_code)

def build_tract_fips(record):
    fips_code = record['state'] + record['county'] + record['tract']
    return str(fips_code)

def census_to_dataframe(var_list, state_code, county_codes):
    fips_codes = []
    all_records = []
    
    for county in county_codes:        
        census_data = c.acs.get(var_list, {'for': 'block group:*', 'in': 'state:{0} county:{1}'.format(state_code, county)})
        
        for idx, record in enumerate(census_data):
            # Build fips codes
            fips_code = build_bg_fips(record)
            census_data[idx]["fips"] = fips_code

            # Eliminate original code components
            key_list = ['state', 'county', 'tract', 'block group']
            for key in key_list:
                if key in census_data[idx]: 
                    del census_data[idx][key]
        
        all_records.extend(census_data)
        
    census_df = pd.DataFrame(all_records)
    census_df = census_df.set_index("fips")
                
    return census_df

def checkDirExist(filepath):
    if not os.path.exists(filepath):
        os.makedirs(filepath)
        
def census_bg_to_dataframe(var_list, state_code, county_codes):
    fips_codes = []
    all_records = []
    
    for county in county_codes:        
        census_data = c.acs.get(var_list, {'for': 'tract:*', 'in': 'state:{0}'.format(state_code)})
        
        for idx, record in enumerate(census_data):
            # Build fips codes
            fips_code = build_bg_fips(record)
            census_data[idx]["fips"] = fips_code

            # Eliminate original code components
            key_list = ['state', 'county', 'tract', 'block group']
            for key in key_list:
                if key in census_data[idx]: 
                    del census_data[idx][key]
        
        all_records.extend(census_data)
        
    census_df = pd.DataFrame(all_records)
    census_df = census_df.set_index("fips")
                
    return census_df

def census_tracts_to_dataframe(var_list, state_codes):
    fips_codes = []
    all_records = []
    
    for state_id in state_codes:
        census_data = c.acs.get(var_list, {'for': 'tract:*', 'in': 'state:{0}'.format(state_id)})

        for idx, record in enumerate(census_data):

            # Build fips codes
            fips_code = build_tract_fips(record)
            census_data[idx]["fips"] = fips_code

            # Eliminate original code components
            key_list = ['state', 'county', 'tract']
            for key in key_list:
                if key in census_data[idx]: 
                    del census_data[idx][key]
        
        all_records.extend(census_data)
      
    census_df = pd.DataFrame(all_records)
    census_df = census_df.set_index("fips")
                
    return census_df

In [16]:
# Specify state and county to download (select one)
loc_name, state_code, county_codes = "balt_city", states.MD.fips, list([510]) # Baltimore

# Create county list (string representation of county IDs)
county_list = ["{:03d}".format(county_id) for county_id in county_codes]

# CENSUS API Stuff
CENSUS_API = 'fe55211c8b3f0350fcb040c07321a129a3d6e266' # Your key here
c = Census(CENSUS_API) # Initialize census class with API key

# Generate codes for census variables of interest
var_ids = ["B19001_0{:02d}E".format(x) for x in range(2, 18)] # Household income over 12 months

# TIGER Stuff
TIGER_BASE_URL = 'http://www2.census.gov/geo/tiger/TIGER2013/'
TIGER_TRACT_DIR = 'TRACT/'
TIGER_BLOCKGROUP_DIR = 'BG/'
TIGER_WATER_DIR = 'AREAWATER/'

# LOCAL DATA DIR FOR STORING THE DATA FROM CENSUS.GOV
LOCAL_DATA_DIR = './data/'
GEO_SUB_DIR = 'geo/'

# GET FILE BY STATE_CODE
tiger_zip_file = 'tl_2013_{0}_bg.zip'.format(state_code)
tiger_shape_file = 'tl_2013_{0}_bg.shp'.format(state_code)

ATTR_FILE_END = '_census_data.csv'
attr_outfile = LOCAL_DATA_DIR + loc_name + ATTR_FILE_END

GEO_FILE_END = '_geo_data.json'
geo_outfile = LOCAL_DATA_DIR + loc_name + GEO_FILE_END

In [4]:
print "Maryland state code is: ", state_code
print county_list
print var_ids
print tiger_zip_file
print tiger_shape_file

Maryland state code is:  24
['510']
['B19001_002E', 'B19001_003E', 'B19001_004E', 'B19001_005E', 'B19001_006E', 'B19001_007E', 'B19001_008E', 'B19001_009E', 'B19001_010E', 'B19001_011E', 'B19001_012E', 'B19001_013E', 'B19001_014E', 'B19001_015E', 'B19001_016E', 'B19001_017E']
tl_2013_24_bg.zip
tl_2013_24_bg.shp


# Get TIGER data (shape data) From Census API

In [5]:
import requests

FULL_TIGER_URL = TIGER_BASE_URL + TIGER_BLOCKGROUP_DIR + tiger_zip_file
print FULL_TIGER_URL

# Check if file is in directory, else download it
if os.path.isfile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file):
    print "Already had the file.  Great."
else:
    r = requests.get(FULL_TIGER_URL)
    
    if r.status_code == requests.codes.ok:
        print "Got files, copying to disk..."
        checkDirExist(LOCAL_DATA_DIR + GEO_SUB_DIR)
        with open(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file, 'wb') as f:
            f.write(r.content)
    else:
        print "Error with getting the data. Status code: ".format(r.status_code)

http://www2.census.gov/geo/tiger/TIGER2013/BG/tl_2013_24_bg.zip
Already had the file.  Great.


In [6]:
# Unzip file, extract contents
zfile = zipfile.ZipFile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file)
zfile.extractall(LOCAL_DATA_DIR + GEO_SUB_DIR)

# Load to GeoDataFrame the shape file
shapes = gpd.GeoDataFrame.from_file(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_shape_file)

print shapes.shape

(3926, 13)


In [7]:
# Only keep counties that we are interested in
shapes = shapes[shapes["COUNTYFP"].isin(county_list)]
print shapes.shape
print shapes.columns

(653, 13)
Index([   u'ALAND',   u'AWATER', u'BLKGRPCE', u'COUNTYFP', u'FUNCSTAT',
          u'GEOID', u'INTPTLAT', u'INTPTLON',    u'MTFCC', u'NAMELSAD',
        u'STATEFP',  u'TRACTCE', u'geometry'],
      dtype='object')


In [8]:
# Check if file is in directory, else download it
for county in county_list:
    tiger_water_zip_file = "tl_2013_{0}{1}_areawater.zip".format(state_code, county)

    if os.path.isfile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_water_zip_file):
        print("Already had the file.  Great.")
    else:
        r = requests.get(TIGER_BASE_URL + TIGER_WATER_DIR + tiger_water_zip_file)

        if r.status_code == requests.codes.ok:
            print("Got the file! Copying to disk.")
            with open(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_water_zip_file, "wb") as f:
                f.write(r.content)
        else:
            print("Something went wrong. Status code: ".format(r.status_code))
    
    # Unzip file, extract contents
    zfile = zipfile.ZipFile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_water_zip_file)
    zfile.extractall(LOCAL_DATA_DIR + GEO_SUB_DIR)
    

Already had the file.  Great.


In [9]:
small_shapes = gpd.GeoDataFrame()
small_shapes["geometry"] = shapes["geometry"].simplify(tolerance=0.0001) # Simplify geometry to reduce file size
small_shapes["fips"] = shapes["GEOID"]
small_shapes = small_shapes.set_index("fips") # set index to the geo IDs of polygons

small_json = small_shapes.to_json()

# Write to file
print "Writing to ", geo_outfile
with open(geo_outfile, 'w') as f:
    f.write(small_json)

Writing to  ./data/balt_city_geo_data.json


In [10]:
loc_name, state_codes, county_codes = 'maryland', states.MD.fips, None
loc_name, state_codes, county_codes = 'delmarva', [states.MD.fips, states.DE.fips, states.VA.fips], None

if county_codes is not None:
    county_list = ["{:03d}".format(county_id) for county_id in county_codes]
else:
    county_list = None
    
print county_list

# CENSUS API Stuff
CENSUS_API = 'fe55211c8b3f0350fcb040c07321a129a3d6e266' # Your key here
c = Census(CENSUS_API) # Initialize census class with API key

# Generate codes for census variables of interest
var_ids = ["B19001_0{:02d}E".format(x) for x in range(2, 18)] # Household income over 12 months

# TIGER Stuff
TIGER_BASE_URL = 'http://www2.census.gov/geo/tiger/TIGER2013/'
TIGER_TRACT_DIR = 'TRACT/'
TIGER_BLOCKGROUP_DIR = 'BG/'

TIGER_WATER_DIR = 'AREAWATER/'

tiger_zip_file = 'tl_2013_{0}_tract.zip'.format(state_code)
tiger_shape_file = 'tl_2013_{0}_tract.shp'.format(state_code)

FULL_TIGER_URL = TIGER_BASE_URL + TIGER_TRACT_DIR + tiger_zip_file

# Local Storage Parameters
LOCAL_DATA_DIR = './data/'
GEO_SUB_DIR = 'geo/'

ATTR_FILE_END = '_census_data.csv'
attr_outfile = LOCAL_DATA_DIR + loc_name + ATTR_FILE_END

GEO_FILE_END = '_geo_data.json'
geo_outfile = LOCAL_DATA_DIR + loc_name + GEO_FILE_END

None


In [11]:
census_data = c.acs.get(var_ids, {'for': 'tract:*', 'in': 'state:{0}'.format(state_code)})
census_df = pd.DataFrame(census_data)
census_df.head()

Unnamed: 0,B19001_002E,B19001_003E,B19001_004E,B19001_005E,B19001_006E,B19001_007E,B19001_008E,B19001_009E,B19001_010E,B19001_011E,B19001_012E,B19001_013E,B19001_014E,B19001_015E,B19001_016E,B19001_017E,county,state,tract
0,132,84,80,98,93,83,80,99,68,215,165,139,134,17,14,0,1,24,100
1,69,92,78,43,80,80,75,61,24,108,127,141,99,30,65,17,1,24,200
2,51,164,29,109,42,165,80,65,10,87,92,113,60,79,0,8,1,24,300
3,87,70,52,32,104,115,120,39,82,77,59,51,149,51,8,45,1,24,400
4,354,134,72,53,32,50,59,14,38,97,35,27,26,32,13,0,1,24,500


In [12]:
# This segment of code will get household income estimates for each block group in Baltimore city
print var_ids
print state_codes
census_df = census_tracts_to_dataframe(var_ids, state_codes)
census_df.to_csv(attr_outfile)

['B19001_002E', 'B19001_003E', 'B19001_004E', 'B19001_005E', 'B19001_006E', 'B19001_007E', 'B19001_008E', 'B19001_009E', 'B19001_010E', 'B19001_011E', 'B19001_012E', 'B19001_013E', 'B19001_014E', 'B19001_015E', 'B19001_016E', 'B19001_017E']
[u'24', u'10', u'51']


# GET TIGER DATA (shapes)

In [24]:
for state_id in state_codes:
    print state_id
    tiger_zip_file = 't1_2013_{0}_tract.zip'.format(state_id)
    
    FULL_TIGER_URL = TIGER_BASE_URL + TIGER_TRACT_DIR + tiger_zip_file
    print FULL_TIGER_URL
    # Check if file is in directory, else download it
    if os.path.isfile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file):
        print("Already had the file.  Great.")
    else:
        r = requests.get(FULL_TIGER_URL)

        if r.status_code == requests.codes.ok:
            print("Got the file! Copying to disk.")
            with open(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file, "wb") as f:
                f.write(r.content)
        else:
            print "Try at this link, ", TIGER_BASE_URL+TIGER_TRACT_DIR
            print r.reason
            print("Something went wrong. Status code: ".format(r.status_code))

24
http://www2.census.gov/geo/tiger/TIGER2013/TRACT/t1_2013_24_tract.zip
Try at this link,  http://www2.census.gov/geo/tiger/TIGER2013/TRACT/
Not Found
Something went wrong. Status code: 
10
http://www2.census.gov/geo/tiger/TIGER2013/TRACT/t1_2013_10_tract.zip
Try at this link,  http://www2.census.gov/geo/tiger/TIGER2013/TRACT/
Not Found
Something went wrong. Status code: 
51
http://www2.census.gov/geo/tiger/TIGER2013/TRACT/t1_2013_51_tract.zip
Try at this link,  http://www2.census.gov/geo/tiger/TIGER2013/TRACT/
Not Found
Something went wrong. Status code: 


In [26]:
state_shapes = []
for idx, state_id in enumerate(state_codes):
    tiger_zip_file = 'tl_2013_{0}_tract.zip'.format(state_id)
    tiger_shape_file = 'tl_2013_{0}_tract.shp'.format(state_id)

    # Unzip file, extract contents
    zfile = zipfile.ZipFile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file)
    zfile.extractall(LOCAL_DATA_DIR + GEO_SUB_DIR)

    # Load to GeoDataFrame
    state_shape = gpd.GeoDataFrame.from_file(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_shape_file)
    
    state_shapes.append(state_shape)
    
    # Only keep counties that we are interested in
    if county_list is not None:
        shapes = shapes[shapes["COUNTYFP"].isin(county_list)]

shapes = gpd.GeoDataFrame( pd.concat(state_shapes, ignore_index=True) )

In [30]:
print shapes.shape
print county_list

(3531, 13)
['510']


# Eliminate Unnecessary Attributes -> geojson


In [None]:
small_shapes = gpd.GeoDataFrame()
small_shapes["geometry"] = shapes["geometry"].simplify(tolerance=0.001) # Simplify geometry to reduce file size
small_shapes["fips"] = shapes["GEOID"]
small_shapes = small_shapes.set_index("fips")
small_json = small_shapes.to_json()

# Write to file
with open(geo_outfile, 'w') as f:
    f.write(small_json)