# Shape Data For Maps

Maps need:
1. Finding data to visualize on map
2. Drawing map via shapes (HERE)

We use geographic information to draw shaps. For US Census data, the format is TIGER shapefiles.

https://www2.census.gov/geo/tiger/TIGER2017/2017_TL_Shapefiles_File_Name_Definitions.pdf

In [1]:
import geopandas as gpd
import pandas as pd
import us
# from us states
import requests
import zipfile
import os
import sys

sys.path.append('../datasets/')
import geo
from geo import GeoShapes
from IPython.display import display

%load_ext autoreload
%autoreload 2


In [24]:

us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
    'District of Columbia': 'DC'
}

In [73]:
TIGER_BASE_URL = 'http://www2.census.gov/geo/tiger/TIGER2017/'
TIGER_TRACT_DIR = 'TRACT/'

# Local Storage Parameters
LOCAL_DATA_DIR = './data/'
GEO_SUB_DIR = 'geozip/'
GEO_DIR = 'geo'
localtigerfile = lambda tiger_zip_file: os.path.join(LOCAL_DATA_DIR, GEO_SUB_DIR, tiger_zip_file)
localshapefile = lambda tiger_shape_file: os.path.join(LOCAL_DATA_DIR, GEO_DIR, tiger_shape_file)

# create geo json file
year = 2017
county_list = None # Extract all counties
GEO_FILE_END = '_geo_data.json'
geo_outfile = lambda state_id: os.path.join(LOCAL_DATA_DIR, state_id + GEO_FILE_END)

# FULL TIGER URL
gettigerurl = lambda tiger_zip_file: os.path.join(TIGER_BASE_URL, TIGER_TRACT_DIR, tiger_zip_file)
gettigerzipfile = lambda state_id: 'tl_' + str(year) + '_{0}_tract.zip'.format(state_id)
gettigershapefile = lambda state_id: 'tl_' + str(year) + '_{0}_tract.shp'.format(state_id)

STATES = us.STATES
STATES = [us.states.CA.name]

# initialize list to store all the fips codes
state_fips = []
# loop through states
for idx,state in enumerate(STATES):
    # get the abbbreviation of this state
    state_abbrv = us_state_abbrev[str(state)]
    
    ''' get fips using state obj '''
    # get the state object
#     state_obj = getattr(us.states, state_abbrv)
#     # get the fips for this state
#     state_fips.append(state_obj.fips)
    
    ''' get fips using mapping '''
    fips_mapping = us.states.mapping('abbr', 'fips')
    state_fips.append(fips_mapping[state_abbrv])

In [74]:
state_shapes = []

# download the state tiger-tract file by year 
for state_id in state_fips:
    tiger_zip_file = gettigerzipfile(state_id)
    tiger_shape_file = gettigershapefile(state_id)
    FULL_TIGER_URL = gettigerurl(tiger_zip_file)
    print(us.states.lookup(state_id), FULL_TIGER_URL)
    
    # Check if file is in directory, else download it
    if os.path.isfile(localtigerfile(tiger_zip_file)):
        print("Already had the file.  Great.")
    else:
        r = requests.get(FULL_TIGER_URL)

        if r.status_code == requests.codes.ok:
            print("Got the file! Copying to disk.")
            with open(localtigerfile(tiger_zip_file), "wb") as f:
                f.write(r.content)
        else:
            print("Something went wrong. Status code: {0}".format(r.status_code))
            
    # Unzip file, extract contents
    zfile = zipfile.ZipFile(localtigerfile(tiger_zip_file))
    zfile.extractall(os.path.join(LOCAL_DATA_DIR,GEO_DIR))

    # Load to GeoDataFrame
    state_shape = gpd.GeoDataFrame.from_file(localshapefile(tiger_shape_file))
    state_shapes.append(state_shape)

California http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_06_tract.zip
Already had the file.  Great.


In [75]:
shapes = gpd.GeoDataFrame( pd.concat(state_shapes, ignore_index=True) )

# Only keep counties that we are interested in
if county_list is not None:
    print("removing counties")
    shapes = shapes[shapes["COUNTYFP"].isin(county_list)]
    
small_shapes = gpd.GeoDataFrame()
small_shapes["geometry"] = shapes["geometry"].simplify(tolerance=0.0001) # Simplify geometry to reduce file size
small_shapes["fips"] = shapes["GEOID"]
small_json = small_shapes.to_json()

# Write to file
with open(geo_outfile(state_id), 'w') as f:
    f.write(small_json)

In [71]:
shapes.head()
print(shapes.shape)
print(small_shapes.shape)

(8057, 13)
(8057, 2)


# Using Geo Class Module I Wrote

In [9]:
year = 2017
geoloader = GeoShapes(year=year)

In [11]:
# run download functions
geoloader.loadtractbyfips()
geoloader.convertgpdtojson()
geo_json = geoloader.small_json

Alabama http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_01_tract.zip
Already had the file.  Great.
Alaska http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_02_tract.zip
Already had the file.  Great.
Arizona http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_04_tract.zip
Already had the file.  Great.
Arkansas http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_05_tract.zip
Already had the file.  Great.
California http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_06_tract.zip
Already had the file.  Great.
Colorado http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_08_tract.zip
Already had the file.  Great.
Connecticut http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_09_tract.zip
Already had the file.  Great.
Delaware http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_10_tract.zip
Already had the file.  Great.
District of Columbia http://www2.census.gov/geo/tiger/TIGER2017/TRACT/tl_2017_11_tract.zip
Already had the file.  Great.
Florid