In [10]:
#import necessary libraries

import requests
import pandas as pd
import geopandas as gpd
import numpy as np
import geojson
import folium
import os



In [2]:
#These are all of the main variables used to build a call url to the the census api website

HOST = "https://api.census.gov/data"
year = "2018"
#dataset = "acs/acs5/subject"
dataset = "acs/acs5"
api_key = "f9e79198302081250c07d556f35d8a81cdae528a"
base_url = "/".join([HOST, year, dataset,])

In [3]:
#These 'predicates' help build the more complex query to the query. 
#Notice that the variables (columns) are specified here, the column names for our new dataframe
#are also created here. The predicate dictionary keys are then assigned values, which will be passed
#into the request.get function. 

In [4]:
# ###Setting up request for Table B08006: Sex of Workers by Means of Transportation to Work
predicates_transpo_mode = {}
get_vars_transpo_mode = ["NAME","B08006_001E","B08006_002E","B08006_003E", "B08006_004E",
                         "B08006_008E","B08006_014E","B08006_015E","B08006_016E", 
                         "B08006_017E", "GEO_ID"
                        ]
col_names_transpo_mode = ["place_name", "total","total_car_truck_van",
                          "car_truck_van_drove_alone","car_truck_van_carpooled",
                          "public_transportation", "bike", "walk", "taxi_moto_other",
                          "work_from_home","geoid","state_code", "county", "tract"
                         ]
predicates_transpo_mode["key"] = api_key
predicates_transpo_mode["get"] = ",".join(get_vars_transpo_mode)
predicates_transpo_mode["for"] = "tract:*"
predicates_transpo_mode["in"] = "state:06"
transpo_mode = requests.get(base_url, params=predicates_transpo_mode)

In [5]:
#Setting up Means of Transportation to Workdata frame, getting rid of first header row
df_transpo_mode = pd.DataFrame(columns=col_names_transpo_mode, data=transpo_mode.json()[1:])

In [6]:
# ###Now we join the census data to Tigerline census tract geometries. 

In [37]:
#A shapefile containing all census tracts in California 
#has been manually pulled from the census ftp site, unzipped, and saved to a folder
#Census Tract Tigerline California Census Tracts location (ftp url included in notes below)

#for personal_laptop
#census_tracts_shp = "../../Users/calvindechicago/PycharmProjects/AltaWork/automated_census_mapping/tl_2018_06_tract/tl_2018_06_tract.shp"

#for jupyter_binder : See: https://discourse.jupyter.org/t/what-is-with-the-weird-jovyan-user/1673
census_tracts_shp = "/home/jovyan/data/tl_2018_06_simplified.shp"

In [38]:
print(os.getcwd() + "\n")

/Users/calvindechicago



In [39]:
os.path.exists(census_tracts_shp)

False

In [14]:
#This reads the census tracts shapefile into a geodataframe
gdf = gpd.read_file(census_tracts_shp)

In [15]:
#the column names are all caps. We want to make them lowercase. This maps the lower function to the column names
gdf.columns = map(str.lower, gdf.columns)


In [16]:
#The geoid field in the df_transpo_mode table does not match the Tigerlines geoid field. 
#This slices the the right 11 most digits, which match the geoid codes in the TigerLine file. 
#(... these are state ('06') for California, followed by county, followed by census tract)
df_transpo_mode.insert(14, "geoid_join",df_transpo_mode['geoid'].str.slice(-11), True) 

In [17]:
#split the place_name to get human known county names
#str.split splits on comma  (',') delimiter. .str[1] selects the second element in the list (the county name) 
df_transpo_mode.insert(1, "county_name",df_transpo_mode['place_name'].str.split(',').str[1].str.strip(), True)

In [18]:

study_counties = ['Los Angeles County']
tracts_select_counties = df_transpo_mode.loc[df_transpo_mode['county_name'].isin(study_counties)]

In [19]:
#JOIN the transpo_mode table with the tigerline table 
#Note:gdf must be left table, the table that merge method is run on, so that a geodataframe (not a dataframe)
#is returned. 
df_transpo_mode_with_geom = gdf.merge(tracts_select_counties,left_on='geoid',right_on='geoid_join')

In [20]:
df_transpo_mode_with_geom['bike'] = df_transpo_mode_with_geom['bike'].astype(int)

In [21]:
df_transpo_mode_with_geom['total'] = df_transpo_mode_with_geom['total'].astype(int)

In [22]:
# Create a Geo-id which is needed by the Folium (it needs to have a unique identifier for each row)
# We do not want the GeoJson object created earlier. Use original df_transpo_mode_with_geom data.
#census_tracts_gjson = folium.features.GeoJson(df_transpo_mode_with_geom, name="census tracts")
df_transpo_mode_with_geom['geoid'] = df_transpo_mode_with_geom.index.astype(str)



In [23]:
#calculate percentage of people that bike to work in each tract
a = (df_transpo_mode_with_geom['bike'] / df_transpo_mode_with_geom['total'])*100

In [24]:
df_transpo_mode_with_geom.insert(2,'pct_bike',a, True)

In [25]:
# Select only needed columns
choropleth_data = df_transpo_mode_with_geom[['geoid', 'bike', 'pct_bike', 'geometry']]

# Convert to geojson (not needed for the simple coropleth map!)
#pop_json = data.to_json()

#check data
choropleth_data.head()

Unnamed: 0,geoid,bike,pct_bike,geometry
0,0,0,0.0,"POLYGON ((-118.58119 34.14318, -118.58099 34.1..."
1,1,0,0.0,"POLYGON ((-118.60573 34.14585, -118.60561 34.1..."
2,2,0,0.0,"POLYGON ((-118.53082 34.18024, -118.52952 34.1..."
3,3,93,3.497555,"POLYGON ((-118.37899 34.15409, -118.37888 34.1..."
4,4,51,1.373922,"POLYGON ((-118.39648 34.15761, -118.39538 34.1..."


In [26]:
choropleth_data['geoid'] = choropleth_data.index.astype(str)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [27]:
bounds = df_transpo_mode_with_geom.total_bounds
a = np.mean(bounds[0:3:2]).round(3)
b = np.mean(bounds[1:4:2]).round(3)
data_centroid = [b,a]
print(data_centroid)

[33.787, -118.299]


In [33]:
# Create a Map instance
m = folium.Map(location=data_centroid, tiles = 'cartodbpositron', zoom_start=10, control_scale=True)

#Plot a choropleth map
#Notice: 'geoid' column that we created earlier needs to be assigned always as the first column
folium.Choropleth(
    geo_data=choropleth_data,
    name='Percentage of Cyclists',
    data=choropleth_data,
    columns=['geoid', 'pct_bike'],
    key_on='feature.id',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    line_color='white',
    line_weight=0,
    highlight=False,
    smooth_factor=1.0,
    #threshold_scale=[1, 2, 3, 4, 5],
    legend_name= 'Percentage of workers that bike to work').add_to(m)

ValueError: All values are expected to fall into one of the provided bins (or to be Nan). Please check the `bins` parameter and/or your data.

In [29]:
# Convert points to GeoJson
# This creates interactive labels
folium.features.GeoJson(choropleth_data,
                        name='Labels',
                        style_function=lambda x: {'color':'transparent','fillColor':'transparent','weight':0},
                        tooltip=folium.features.GeoJsonTooltip(fields=['pct_bike'],
                                                               # aliases = ['Population'],
                                                                labels=True,
                                                                sticky=False
                                                                            )
                       ).add_to(m)


<folium.features.GeoJson at 0x7faabfb4ae90>

In [31]:
#SOURCES
#https://www.w3schools.com/tags/ref_urlencode.ASP
#https://www.census.gov/content/dam/Census/data/developers/api-user-guide/api-guide.pdf
#https://api.census.gov/data/2018/acs/acs5/variables.html
#https://api.census.gov/data/2018/acs/acs5/subject/variables.html
#https://www.youtube.com/watch?v=Wi0_Mb0e4JM
#https://atcoordinates.info/2019/09/24/examples-of-using-the-census-bureaus-api-with-python/
#--downloading tigerline from zip -- 
#http://andrewgaidus.com/Dot_Density_County_Maps/
#--Python for reading zip tigerline shpfile --
#http://andrewgaidus.com/Reading_Zipped_Shapefiles/
#--Aaron's ATP Data Mining Project would also be useful--
#https://github.com/AltaPlanning/GIS-notebooks/tree/master/2020-000%20ATP%20Data%20Mining
#https://automating-gis-processes.github.io/site/notebooks/L5/interactive-map-folium.html

#-- geographies and summary levels --
#https://censusreporter.org/topics/geography/
#geo_ids=140|04000US06  --> this should be a all tracts in California

#There is no great way to use the api to return census tract geometries: the geography api functions 
#only seem to allow calling a specific geoid. One option would be to loop through geoids and call census reporter
#to request geography for each geoid, but that would involve a lot of calls. 
#https://api.censusreporter.org/1.0/data/show/latest?table_ids=B01001&geo_ids=140|04000US06
#error"You requested 8057 geoids. The maximum is 3500. Please contact us for bulk data."



# MEDIAN EARNINGS IN THE PAST 12 MONTHS (IN 2018 INFLATION-ADJUSTED DOLLARS) BY MEANS OF TRANSPORTATION TO WORK
# Survey/Program: American Community Survey
# Universe: Workers 16 years and over with earnings
# Year: 2018
# Estimates: 1-Year
# Table ID: B08121



In [None]:
#Show map
m

In [32]:
# ### OTHER NOTES
#I was a bit confused about obtaining the Census Tiger boundaries. The Tigerweb REST service seemed geared towards
#delivering Web Map Service (WMS) map images. We want the spatial data! Hopefully the stack exchange post linked below
#clears some of the confusion up.
#https://gis.stackexchange.com/questions/269650/how-to-bring-the-tiger-census-reporter-api-to-geopandas
#--->I'm not sure if these geometries still exist on the census api. 
#zipfiles can be downloaded at the ftp site below. I am using blog and aaron's atp data mining python as examples. 
#For now I am just going to manually unzip census geometry, but a link to a tutorial is included below to automate
#download, unzipping, and processing the geometry. 

#ftp://ftp2.census.gov/geo/tiger/TIGER2018/TRACT/tl_2018_06_tract.zip