In [264]:
#import packages
import pandas as pd
import geopandas as gpd
import geojson
from bokeh.io import show
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, 
                          GeoJSONDataSource, HoverTool, BoxZoomTool, PolySelectTool,
                          WheelZoomTool,ResetTool,SaveTool,PanTool,
                          ZoomInTool, ZoomOutTool,
                          LinearColorMapper, Slider, LogColorMapper, CategoricalColorMapper,
                          FixedTicker, BasicTickFormatter, LogTicker, FuncTickFormatter,
                          PrintfTickFormatter, BasicTicker, Legend, LegendItem)
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer
from bokeh.plotting import figure, output_file, save
from bokeh.resources import CDN
from bokeh.embed import file_html

In [265]:
#import Affordable Housing file
housing = pd.read_csv("Housing.csv",sep=",")

housing.head()

Unnamed: 0,Community Area Name,Community Area Number,Property Type,Property Name,Address,Zip Code,Phone Number,Management Company,Units,X Coordinate,Y Coordinate,Latitude,Longitude,Location
0,Portage Park,15,ARO,4812-15 W. Montrose Apts.,4812-15 W. Montrose Ave.,60641,630-694-6968,@properties,2,,,,,
1,West Englewood,67,Multifamily,New West Englewood Homes,2109 W. 63rd St.,60636,773-434-4929,Interfaith Housing Corp.,12,,,,,
2,Englewood,68,Multifamily,Antioch Homes II,301 W. Marquette Road,60621,773-994-4546,"Universal Management Service, Inc.",69,1175445.0,1860492.0,41.772564,-87.632419,"(41.7725637689, -87.6324193182)"
3,Washington Park,40,Senior HUD 202,St. Edmund's Corners,5556 S. Michigan Ave.,60637,773-667-7583,St. Edmund's Redevelopment Corp.,53,1178070.0,1867952.0,41.792975,-87.622569,"(41.7929745219, -87.6225685185)"
4,Humboldt Park,23,Multifamily,Nelson Mandela Apts.,526 N. Troy St.,60624,773-227-6332,Bickerdike Apts.,6,1155238.0,1903559.0,41.891173,-87.705338,"(41.8911727354, -87.7053383382)"


In [266]:
#Group housing data by neighborhood and count number of addresses 

housing_grouped = housing.groupby(["Community Area Name"]).count()[["Address"]].reset_index()

housing_grouped

Unnamed: 0,Community Area Name,Address
0,Albany Park,3
1,Ashburn,1
2,Auburn Gresham,6
3,Austin,11
4,Avalon Park,1
...,...,...
58,West Lawn,1
59,West Pullman,3
60,West Ridge,5
61,West Town,23


In [267]:
#import shapefile with neighborhood boundaries
map_nbhoods = gpd.read_file('Neighborhoods.shp')

map_nbhoods

Unnamed: 0,pri_neigh,sec_neigh,shape_area,shape_len,geometry
0,Grand Boulevard,BRONZEVILLE,4.849250e+07,28196.837157,"POLYGON ((-87.60671 41.81681, -87.60670 41.816..."
1,Printers Row,PRINTERS ROW,2.162138e+06,6864.247156,"POLYGON ((-87.62761 41.87437, -87.62760 41.873..."
2,United Center,UNITED CENTER,3.252051e+07,23101.363745,"POLYGON ((-87.66707 41.88885, -87.66707 41.888..."
3,Sheffield & DePaul,SHEFFIELD & DEPAUL,1.048259e+07,13227.049745,"POLYGON ((-87.65833 41.92166, -87.65835 41.922..."
4,Humboldt Park,HUMBOLDT PARK,1.250104e+08,46126.751351,"POLYGON ((-87.74060 41.88782, -87.74060 41.887..."
...,...,...,...,...,...
93,Belmont Cragin,"BELMONT CRAGIN,HERMOSA",1.090994e+08,43311.706886,"POLYGON ((-87.74143 41.91698, -87.74141 41.916..."
94,Austin,AUSTIN,1.700378e+08,55473.345911,"POLYGON ((-87.75620 41.91547, -87.75588 41.915..."
95,Gold Coast,GOLD COAST,7.165706e+06,13685.479377,"POLYGON ((-87.62646 41.91147, -87.62640 41.911..."
96,Boystown,BOYSTOWN,3.365779e+06,9780.268985,"POLYGON ((-87.64878 41.93999, -87.64927 41.939..."


In [268]:
map_nbhoods.dtypes #check to be sure it's a geodataframe

pri_neigh       object
sec_neigh       object
shape_area     float64
shape_len      float64
geometry      geometry
dtype: object

In [269]:
'''Find Mismatched Neighorhoods'''

#merge neighborhood data with housing data based on neighborhood name
housing_merged = pd.merge(map_nbhoods,housing_grouped,
                         left_on='pri_neigh',right_on='Community Area Name',
                         how='outer',indicator=True)



housing_merged

Unnamed: 0,pri_neigh,sec_neigh,shape_area,shape_len,geometry,Community Area Name,Address,_merge
0,Grand Boulevard,BRONZEVILLE,4.849250e+07,28196.837157,"POLYGON ((-87.60671 41.81681, -87.60670 41.816...",Grand Boulevard,27.0,both
1,Printers Row,PRINTERS ROW,2.162138e+06,6864.247156,"POLYGON ((-87.62761 41.87437, -87.62760 41.873...",,,left_only
2,United Center,UNITED CENTER,3.252051e+07,23101.363745,"POLYGON ((-87.66707 41.88885, -87.66707 41.888...",,,left_only
3,Sheffield & DePaul,SHEFFIELD & DEPAUL,1.048259e+07,13227.049745,"POLYGON ((-87.65833 41.92166, -87.65835 41.922...",,,left_only
4,Humboldt Park,HUMBOLDT PARK,1.250104e+08,46126.751351,"POLYGON ((-87.74060 41.88782, -87.74060 41.887...",Humboldt Park,36.0,both
...,...,...,...,...,...,...,...,...
102,,,,,,Near North Side,11.0,right_only
103,,,,,,Near West Side,17.0,right_only
104,,,,,,South Lawndale,3.0,right_only
105,,,,,,West Englewood,3.0,right_only


In [270]:
#find housing units with mismatched neighborhoods
right_only = housing_merged.loc[housing_merged['_merge'] == 'right_only']

right_only

Unnamed: 0,pri_neigh,sec_neigh,shape_area,shape_len,geometry,Community Area Name,Address,_merge
98,,,,,,East Garfield Park,10.0,right_only
99,,,,,,East Garfiled Park,1.0,right_only
100,,,,,,Greater Grand Crossing,2.0,right_only
101,,,,,,Lakeview,9.0,right_only
102,,,,,,Near North Side,11.0,right_only
103,,,,,,Near West Side,17.0,right_only
104,,,,,,South Lawndale,3.0,right_only
105,,,,,,West Englewood,3.0,right_only
106,,,,,,West Garfield Park,3.0,right_only


In [271]:
'''Clean Housing Data'''

#fix typos
housing.replace({'East Garfiled Park':'East Garfield Park'},inplace=True)
housing.replace({'Lakeview':'Lake View'},inplace=True)

#match up mismatched neighborhoods
housing.replace({'West Englewood':'Englewood',
                    'Near West Side':'Little Italy, UIC',
                    'Near North Side':'River North',
                    'East Garfield Park':'Garfield Park',
                    'West Garfield Park':'Garfield Park',
                    'Greater Grand Crossing':'Grand Crossing',
                    'South Lawndale':'Little Village'},inplace=True)

#drop rows with non-affordable housing
luxury = housing[(housing["Property Type"] == 'ARO')].index
housing.drop(luxury, inplace=True)

In [272]:
#Group cleaned housing data by neighborhood and count number of addresses 

housing_cleaned = housing.groupby(["Community Area Name"]).count()[["Address"]].reset_index()

In [273]:
#merge cleaned neighborhood data with housing data based on neighborhood name
housing_nbhoods = pd.merge(map_nbhoods,housing_cleaned,
                         left_on='pri_neigh',right_on='Community Area Name',
                         how='outer',indicator=True)

In [274]:
#replace NaN (neighborhoods with no matching housing units) with 0
housing_nbhoods['Address'] = housing_nbhoods['Address'].fillna(0)
housing_nbhoods['Address'] = housing_nbhoods['Address'].astype(int)
housing_nbhoods

Unnamed: 0,pri_neigh,sec_neigh,shape_area,shape_len,geometry,Community Area Name,Address,_merge
0,Grand Boulevard,BRONZEVILLE,4.849250e+07,28196.837157,"POLYGON ((-87.60671 41.81681, -87.60670 41.816...",Grand Boulevard,27,both
1,Printers Row,PRINTERS ROW,2.162138e+06,6864.247156,"POLYGON ((-87.62761 41.87437, -87.62760 41.873...",,0,left_only
2,United Center,UNITED CENTER,3.252051e+07,23101.363745,"POLYGON ((-87.66707 41.88885, -87.66707 41.888...",,0,left_only
3,Sheffield & DePaul,SHEFFIELD & DEPAUL,1.048259e+07,13227.049745,"POLYGON ((-87.65833 41.92166, -87.65835 41.922...",,0,left_only
4,Humboldt Park,HUMBOLDT PARK,1.250104e+08,46126.751351,"POLYGON ((-87.74060 41.88782, -87.74060 41.887...",Humboldt Park,36,both
...,...,...,...,...,...,...,...,...
93,Belmont Cragin,"BELMONT CRAGIN,HERMOSA",1.090994e+08,43311.706886,"POLYGON ((-87.74143 41.91698, -87.74141 41.916...",Belmont Cragin,4,both
94,Austin,AUSTIN,1.700378e+08,55473.345911,"POLYGON ((-87.75620 41.91547, -87.75588 41.915...",Austin,11,both
95,Gold Coast,GOLD COAST,7.165706e+06,13685.479377,"POLYGON ((-87.62646 41.91147, -87.62640 41.911...",,0,left_only
96,Boystown,BOYSTOWN,3.365779e+06,9780.268985,"POLYGON ((-87.64878 41.93999, -87.64927 41.939...",,0,left_only


In [275]:
#read dataframe as geodataframe

gdf = gpd.GeoDataFrame(housing_nbhoods, geometry='geometry')

#convert geodataframe to geojson
geosource = GeoJSONDataSource(geojson=gdf.to_json())

In [276]:
'''Create Affordable Housing by Neighborhood Map'''

# Define color palettes
palette = brewer['BuGn'][6]
palette = palette[::-1] # reverse order of colors so higher values have darker colors

# Instantiate LogColorMapper that exponentially maps numbers in a range, into a sequence of colors.
color_mapper = LogColorMapper(palette = palette, low = 0, high = 40)

# Define custom tick labels for color bar.
tick_labels = {1.35:'0',2.5:'1-2',4.6: '3-5',8.5: '6-10', 16:'11-19',
 30:'20+'}

# Create color bar
color_bar = ColorBar(title = 'Number of Housing Units',
                     color_mapper = color_mapper, 
                     label_standoff = 6,
                     width = 500, height = 20,
                     border_line_color = None,
                     location = (0,0),
                     orientation = 'horizontal',
                     ticker=FixedTicker(num_minor_ticks=0,
                                        ticks=[1.35,2.5,4.6,8.5,16,30]),
                     major_label_overrides = tick_labels,
                     major_tick_line_color = None,
                     major_label_text_align = 'center')

# Create figure object
p = figure(title = 'Affordable Housing per Neighborhood in Chicago')


# Add patch renderer to figure
neighborhoods = p.patches('xs','ys', source = geosource,
                   fill_color = {'field' :'Address',
                                 'transform' : color_mapper},
                   line_color = "gray", 
                   line_width = 0.25, 
                   fill_alpha = 1)

# Create hover tool
p.add_tools(HoverTool(renderers = [neighborhoods],
                      tooltips = [('Neighborhood','@pri_neigh'),
                                  ('No. of Housing Units','@Address')]))

#remove axes, axis labels, and grid lines
p.xaxis.major_tick_line_color = None
p.xaxis.minor_tick_line_color = None
p.yaxis.major_tick_line_color = None
p.yaxis.minor_tick_line_color = None
p.xaxis.major_label_text_font_size = '0pt'
p.yaxis.major_label_text_font_size = '0pt'
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# Specify layout
p.add_layout(color_bar, 'below')

show(p)

In [277]:
#read L_stops data
L_Stops = pd.read_csv('L_Stops.csv',sep=",")

L_Stops.head()

Unnamed: 0,STOP_ID,DIRECTION_ID,STOP_NAME,STATION_NAME,STATION_DESCRIPTIVE_NAME,MAP_ID,ADA,RED,BLUE,G,BRN,P,Pexp,Y,Pnk,O,Location
0,30162,W,18th (54th/Cermak-bound),18th,18th (Pink Line),40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)"
1,30161,E,18th (Loop-bound),18th,18th (Pink Line),40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)"
2,30022,N,35th/Archer (Loop-bound),35th/Archer,35th/Archer (Orange Line),40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)"
3,30023,S,35th/Archer (Midway-bound),35th/Archer,35th/Archer (Orange Line),40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)"
4,30214,S,35-Bronzeville-IIT (63rd-bound),35th-Bronzeville-IIT,35th-Bronzeville-IIT (Green Line),41120,True,False,False,True,False,False,False,False,False,False,"(41.831677, -87.625826)"


In [278]:
#Need to convert Location to Latitude and Longitude columns
new = L_Stops["Location"].str.split(",", n = 1, expand = True) 

#remove parentheses
new[0] = new[0].str.replace("(","") 
new[1] = new[1].str.replace(")","")

#convert type from string to float
new[0]= new[0].astype(float) 
new[1]= new[1].astype(float)

#split into 2 columns
L_Stops["Latitude"] = new[0]
L_Stops["Longitude"] = new[1]

In [279]:
#convert Latitude and Longitude to geometry datatype

GeoStops = gpd.GeoDataFrame(
    L_Stops, geometry=gpd.points_from_xy(L_Stops.Longitude, L_Stops.Latitude))

GeoStops.head()

Unnamed: 0,STOP_ID,DIRECTION_ID,STOP_NAME,STATION_NAME,STATION_DESCRIPTIVE_NAME,MAP_ID,ADA,RED,BLUE,G,BRN,P,Pexp,Y,Pnk,O,Location,Latitude,Longitude,geometry
0,30162,W,18th (54th/Cermak-bound),18th,18th (Pink Line),40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)",41.857908,-87.669147,POINT (-87.66915 41.85791)
1,30161,E,18th (Loop-bound),18th,18th (Pink Line),40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)",41.857908,-87.669147,POINT (-87.66915 41.85791)
2,30022,N,35th/Archer (Loop-bound),35th/Archer,35th/Archer (Orange Line),40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)",41.829353,-87.680622,POINT (-87.68062 41.82935)
3,30023,S,35th/Archer (Midway-bound),35th/Archer,35th/Archer (Orange Line),40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)",41.829353,-87.680622,POINT (-87.68062 41.82935)
4,30214,S,35-Bronzeville-IIT (63rd-bound),35th-Bronzeville-IIT,35th-Bronzeville-IIT (Green Line),41120,True,False,False,True,False,False,False,False,False,False,"(41.831677, -87.625826)",41.831677,-87.625826,POINT (-87.62583 41.83168)


In [280]:
'''Add counter for number of connecting lines per station'''

#Convert boolean to int
GeoStops["RED"] = GeoStops["RED"].astype(int)
GeoStops["BLUE"] = GeoStops["BLUE"].astype(int)
GeoStops["G"] = GeoStops["G"].astype(int)
GeoStops["Y"] = GeoStops["Y"].astype(int)
GeoStops["Pexp"] = GeoStops["Pexp"].astype(int)
GeoStops["Pnk"] = GeoStops["Pnk"].astype(int)
GeoStops["O"] = GeoStops["O"].astype(int)
GeoStops["BRN"] = GeoStops["BRN"].astype(int)

#add a column summing the number of lines that connect at each stop
GeoStops['Num_Lines'] = GeoStops[{"RED","BLUE","G","BRN","Pexp","Y","Pnk","O"}].sum(axis=1)
GeoStops_Lines = GeoStops.copy()

GeoStops_Lines.head()


Unnamed: 0,STOP_ID,DIRECTION_ID,STOP_NAME,STATION_NAME,STATION_DESCRIPTIVE_NAME,MAP_ID,ADA,RED,BLUE,G,...,P,Pexp,Y,Pnk,O,Location,Latitude,Longitude,geometry,Num_Lines
0,30162,W,18th (54th/Cermak-bound),18th,18th (Pink Line),40830,True,0,0,0,...,False,0,0,1,0,"(41.857908, -87.669147)",41.857908,-87.669147,POINT (-87.66915 41.85791),1
1,30161,E,18th (Loop-bound),18th,18th (Pink Line),40830,True,0,0,0,...,False,0,0,1,0,"(41.857908, -87.669147)",41.857908,-87.669147,POINT (-87.66915 41.85791),1
2,30022,N,35th/Archer (Loop-bound),35th/Archer,35th/Archer (Orange Line),40120,True,0,0,0,...,False,0,0,0,1,"(41.829353, -87.680622)",41.829353,-87.680622,POINT (-87.68062 41.82935),1
3,30023,S,35th/Archer (Midway-bound),35th/Archer,35th/Archer (Orange Line),40120,True,0,0,0,...,False,0,0,0,1,"(41.829353, -87.680622)",41.829353,-87.680622,POINT (-87.68062 41.82935),1
4,30214,S,35-Bronzeville-IIT (63rd-bound),35th-Bronzeville-IIT,35th-Bronzeville-IIT (Green Line),41120,True,0,0,1,...,False,0,0,0,0,"(41.831677, -87.625826)",41.831677,-87.625826,POINT (-87.62583 41.83168),1


In [281]:
#drop rows with an extra direction at the ends of lines

end_lines = GeoStops_Lines[(GeoStops_Lines["STOP_ID"] == 30077) | #Forest Park end of Blue Line
                           (GeoStops_Lines["STOP_ID"] == 30171) | #O'Hare end of Blue Line
                           (GeoStops_Lines["STOP_ID"] == 30249) | #End of Brown Line
                           (GeoStops_Lines["STOP_ID"] == 30182) | #End of Orange Line
                           (GeoStops_Lines["STOP_ID"] == 30203) | #End of Purple Line
                           (GeoStops_Lines["STOP_ID"] == 30089) | #95th end of Red Line
                           (GeoStops_Lines["STOP_ID"] == 30173) | #Howard end of Red Line
                           (GeoStops_Lines["STOP_ID"] == 30176) | #Howard end of Yellow Line
                           (GeoStops_Lines["STOP_ID"] == 30026) | #end of Yellow Line
                           (GeoStops_Lines["STOP_ID"] == 30139) | #Cottage Grove end of Green Line
                           (GeoStops_Lines["STOP_ID"] == 30057) | #Ashland end of Green Line
                           (GeoStops_Lines["STOP_ID"] == 30114) | #end of Pink Line
                           (GeoStops_Lines["STOP_ID"] == 30004)].index #Harlem end of Green Line

GeoStops_Lines.drop(end_lines, inplace=True)

In [282]:
#Group the number of lines by directions per station
Grouped_Stops = GeoStops_Lines.groupby(["STATION_NAME","MAP_ID","Latitude","Longitude"]).sum()[["Num_Lines"]].reset_index()

#must manually fix a few connectivity calculations 
Grouped_Stops.at[73, 'Num_Lines'] = 4 #fixing Howard - it was undercounted since only Purple Exp included in calculation
Grouped_Stops.at[61, 'Num_Lines'] = 3 # fixing Garfield - it branches and both branches weren't counted
Grouped_Stops.head()

Unnamed: 0,STATION_NAME,MAP_ID,Latitude,Longitude,Num_Lines
0,18th,40830,41.857908,-87.669147,2
1,35th-Bronzeville-IIT,41120,41.831677,-87.625826,2
2,35th/Archer,40120,41.829353,-87.680622,2
3,43rd,41270,41.816462,-87.619021,2
4,47th,41080,41.809209,-87.618826,2


In [283]:
'''Create Affordable Housing by Neighborhood with L Stops Map'''

#define neighborhoods/boundaries data source
geosource1 = GeoJSONDataSource(geojson=gdf.to_json())

# Define color palettes
palette = brewer['BuGn'][6]
palette = palette[::-1] # reverse order of colors so higher values have darker colors

# Instantiate LogColorMapper that exponentially maps numbers in a range, into a sequence of colors.
color_mapper = LogColorMapper(palette = palette, low = 0, high = 40)

# Define custom tick labels for color bar.
tick_labels = {1.35:'0',2.5:'1-2',4.6: '3-5',8.5: '6-10', 16:'11-19',
 30:'20+'}


# Create color bar.
color_bar = ColorBar(title = 'Number of Affordable Housing Units',
                     color_mapper = color_mapper, 
                     label_standoff = 6,
                     width = 500, height = 20,
                     border_line_color = None,
                     location = (0,0),
                     orientation = 'horizontal',
                     ticker=FixedTicker(num_minor_ticks=0,
                                        ticks=[1.35,2.5,4.6,8.5,16,30]),
                     major_label_overrides = tick_labels,
                     major_tick_line_color = None,
                     major_label_text_align = 'center')

# Convert stops dataframe to a ColumnDataSource
source_stops = ColumnDataSource(data=dict(
                        x=list(Grouped_Stops['Longitude']), 
                        y=list(Grouped_Stops['Latitude']),
                        sizes=list(Grouped_Stops['Num_Lines']),
                        #scaled so differences b/w number of lines/station is visible
                        circle_sizes=list(Grouped_Stops['Num_Lines']*2.5), 
                        stationname=list(Grouped_Stops['STATION_NAME'])))

#Create hover tool for stops
hover = HoverTool(names = ['hoverhere'],tooltips=[
    ("Stop", "@stationname"),
    ("Connectivity", "@sizes")],attachment="right")

# Create figure object.
p = figure(tools=[hover],title = 'Affordable Housing Rapid Transit Access in Chicago')



# Add patch renderer for neighborhood boundaries
neighborhoods = p.patches('xs','ys', source = geosource1,
                   fill_color = {'field' :'Address',
                                 'transform' : color_mapper},
                   line_color = "gray", 
                   line_width = 0.25, 
                   fill_alpha = 1)



# Add patch renderer for stops
stops = p.scatter(x='x',y='y', source=source_stops,
                  size='circle_sizes', 
                  line_color="#FF0000", 
                  fill_color="#FF0000",
                  fill_alpha=0.05,
                  name = 'hoverhere')


#Add Legend that varies by size
legend1 = Legend(items=[
    LegendItem(label='1 connection', renderers=[stops])
    ],glyph_height=19, glyph_width=6, location=(22,85), border_line_color = None, label_standoff=16,
                title='L Stations',title_text_align="left", title_standoff=15)

legend2 = Legend(items=[
    LegendItem(label='4 connections', renderers=[stops])],glyph_height=25, glyph_width=25,
                      location=(13,50), border_line_color = None, label_standoff=7)

legend3 = Legend(items=[
    LegendItem(label='8 connections', renderers=[stops])],glyph_height=50, glyph_width=50,
                      location=(1,1), border_line_color = None, label_standoff=-5)

p.add_layout(legend1)
p.add_layout(legend2)
p.add_layout(legend3)


#remove axes, axis labels, and grid lines
p.xaxis.major_tick_line_color = None
p.xaxis.minor_tick_line_color = None
p.yaxis.major_tick_line_color = None
p.yaxis.minor_tick_line_color = None
p.xaxis.major_label_text_font_size = '0pt'
p.yaxis.major_label_text_font_size = '0pt'
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None


# Specify layout
p.add_layout(color_bar, 'below')

# Add Pan, Zoom, Reset, and Save Tools
p.add_tools(PanTool(),BoxZoomTool(),ZoomInTool(),ZoomOutTool(),ResetTool(),SaveTool())

show(p)

In [284]:
#export map as html file
output_file("transit-housing-bokeh.html")
save(p)

'/Users/annasienagrumman/Desktop/Chicago/GitHub/Housing/transit-housing-bokeh.html'

In [285]:
#make a copy of dataframe with L stops that has shapely geometry datatype
Stops_Geom = GeoStops_Lines.copy()
Stops_Geom.drop_duplicates(subset='MAP_ID', keep="last", inplace=True) #remove dups so there's one stop / station
Stops_Geom = Stops_Geom[["MAP_ID","geometry"]]
Stops_Geom.head()

Unnamed: 0,MAP_ID,geometry
1,40830,POINT (-87.66915 41.85791)
3,40120,POINT (-87.68062 41.82935)
5,41120,POINT (-87.62583 41.83168)
7,41270,POINT (-87.61902 41.81646)
9,40130,POINT (-87.61849 41.80209)


In [288]:
#join Stops and Grouped Stops
Full_Stops = pd.merge(Stops_Geom, Grouped_Stops, on="MAP_ID")
Full_Stops = Full_Stops[["MAP_ID","geometry","STATION_NAME","Num_Lines"]]
Full_Stops.head()

Unnamed: 0,MAP_ID,geometry,STATION_NAME,Num_Lines
0,40830,POINT (-87.66915 41.85791),18th,2
1,40120,POINT (-87.68062 41.82935),35th/Archer,2
2,41120,POINT (-87.62583 41.83168),35th-Bronzeville-IIT,2
3,41270,POINT (-87.61902 41.81646),43rd,2
4,40130,POINT (-87.61849 41.80209),51st,2


In [289]:
#spatial join on geometry to match stops with neighborhoods
spatial_joined = gpd.sjoin(housing_nbhoods, Full_Stops, how='left',op='contains')
spatial_joined = spatial_joined[["pri_neigh","Address","MAP_ID","STATION_NAME","Num_Lines"]]
spatial_joined.head()

  "(%s != %s)" % (left_df.crs, right_df.crs)


Unnamed: 0,pri_neigh,Address,MAP_ID,STATION_NAME,Num_Lines
0,Grand Boulevard,27,40130.0,51st,2.0
0,Grand Boulevard,27,41080.0,47th,2.0
0,Grand Boulevard,27,41270.0,43rd,2.0
0,Grand Boulevard,27,40300.0,Indiana,2.0
1,Printers Row,0,,,


In [290]:
#look at neighborhoods with no L stops
spatial_joined_null = spatial_joined[spatial_joined['MAP_ID'].isnull()]
spatial_joined_null

Unnamed: 0,pri_neigh,Address,MAP_ID,STATION_NAME,Num_Lines
1,Printers Row,0,,,
4,Humboldt Park,36,,,
9,Avalon Park,1,,,
10,Burnside,0,,,
11,Hermosa,0,,,
14,Calumet Heights,0,,,
15,East Side,0,,,
16,West Pullman,3,,,
18,New City,4,,,
21,Ashburn,1,,,


In [291]:
#replace neighborhoods with no L stops with 0
spatial_joined['MAP_ID'] = spatial_joined['MAP_ID'].fillna(0)
spatial_joined['Num_Lines'] = spatial_joined['Num_Lines'].fillna(0)
spatial_joined.head()

Unnamed: 0,pri_neigh,Address,MAP_ID,STATION_NAME,Num_Lines
0,Grand Boulevard,27,40130.0,51st,2.0
0,Grand Boulevard,27,41080.0,47th,2.0
0,Grand Boulevard,27,41270.0,43rd,2.0
0,Grand Boulevard,27,40300.0,Indiana,2.0
1,Printers Row,0,0.0,,0.0


In [292]:
#Count number of stops per neighborhood
stops_nbhoods = spatial_joined.groupby(["pri_neigh","Address","Num_Lines"]).count()[["MAP_ID"]].reset_index()
stops_nbhoods.tail(50)

Unnamed: 0,pri_neigh,Address,Num_Lines,MAP_ID
63,Millenium Park,0,0.0,1
64,Montclare,1,0.0,1
65,Morgan Park,0,0.0,1
66,Mount Greenwood,0,0.0,1
67,Museum Campus,0,0.0,1
68,Near South Side,7,2.0,2
69,Near South Side,7,4.0,1
70,New City,4,0.0,1
71,North Center,1,2.0,2
72,North Lawndale,33,2.0,4


In [293]:
#Add new metric that measures number of stops*connectivity / neighborhood
stops_nbhoods["conn_dist"] = stops_nbhoods["Num_Lines"]*stops_nbhoods["MAP_ID"]
stops_nbhoods.head()

Unnamed: 0,pri_neigh,Address,Num_Lines,MAP_ID,conn_dist
0,Albany Park,3,1.0,1,1.0
1,Albany Park,3,2.0,2,4.0
2,Andersonville,0,0.0,1,0.0
3,Archer Heights,0,0.0,1,0.0
4,Armour Square,0,2.0,1,2.0


In [294]:
#Take summed conn_dist per neighborhood
stops_nbhoods = stops_nbhoods.groupby(["pri_neigh","Address"]).sum()[["conn_dist"]].reset_index()
stops_nbhoods.head(20)

Unnamed: 0,pri_neigh,Address,conn_dist
0,Albany Park,3,5.0
1,Andersonville,0,0.0
2,Archer Heights,0,0.0
3,Armour Square,0,2.0
4,Ashburn,1,0.0
5,Auburn Gresham,6,0.0
6,Austin,11,10.0
7,Avalon Park,1,0.0
8,Avondale,1,2.0
9,Belmont Cragin,4,0.0


In [295]:
#Adjust conn_dist to weight its score equally with housing
stops_nbhoods["adj_conn_dist"] = (stops_nbhoods["conn_dist"]+1)*4.5
stops_nbhoods.head()

Unnamed: 0,pri_neigh,Address,conn_dist,adj_conn_dist
0,Albany Park,3,5.0,27.0
1,Andersonville,0,0.0,4.5
2,Archer Heights,0,0.0,4.5
3,Armour Square,0,2.0,13.5
4,Ashburn,1,0.0,4.5


In [296]:
#Calculate housing-transit score
stops_nbhoods["housing_transit_score"] = ((stops_nbhoods["Address"]+1)*(stops_nbhoods["adj_conn_dist"]))-4.5
stops_nbhoods.head()

Unnamed: 0,pri_neigh,Address,conn_dist,adj_conn_dist,housing_transit_score
0,Albany Park,3,5.0,27.0,103.5
1,Andersonville,0,0.0,4.5,0.0
2,Archer Heights,0,0.0,4.5,0.0
3,Armour Square,0,2.0,13.5,9.0
4,Ashburn,1,0.0,4.5,4.5


In [297]:
#Merge neighborhoods back with geometry so data can be mapped in Bokeh
scores = pd.merge(stops_nbhoods,housing_nbhoods, on="pri_neigh")
scores = scores[["pri_neigh","geometry","Address_x","conn_dist","housing_transit_score"]]
scores["Address_x"] = scores["Address_x"].astype(int)
scores["conn_dist"] = scores["conn_dist"].astype(int)
scores["housing_transit_score"] = scores["housing_transit_score"].astype(int)
scores.tail(45)

Unnamed: 0,pri_neigh,geometry,Address_x,conn_dist,housing_transit_score
53,Lower West Side,"POLYGON ((-87.63516 41.85772, -87.63532 41.857...",13,8,562
54,Magnificent Mile,"POLYGON ((-87.62373 41.88985, -87.62401 41.888...",0,0,0
55,Mckinley Park,"POLYGON ((-87.65611 41.82331, -87.65630 41.823...",0,2,9
56,Millenium Park,"POLYGON ((-87.61749 41.88092, -87.61825 41.880...",0,0,0
57,Montclare,"POLYGON ((-87.78871 41.91721, -87.78873 41.917...",1,0,4
58,Morgan Park,"POLYGON ((-87.64215 41.68508, -87.64249 41.685...",0,0,0
59,Mount Greenwood,"POLYGON ((-87.69646 41.70714, -87.69644 41.706...",0,0,0
60,Museum Campus,"POLYGON ((-87.61235 41.86747, -87.61235 41.867...",0,0,0
61,Near South Side,"POLYGON ((-87.61666 41.85779, -87.61657 41.857...",7,8,319
62,New City,"POLYGON ((-87.63546 41.79448, -87.63599 41.794...",4,0,18


In [298]:
#Rename columns and create table of the 10 best neighborhoods ranked by housing-transit score
scores_table = scores[["pri_neigh","Address_x","conn_dist","housing_transit_score"]]
scores_table.rename({'pri_neigh':'Neighborhood','Address_x':'Housing Score','conn_dist':'Transit Score',},axis='columns',inplace=True)
scores_ranked = scores_table.sort_values(by="housing_transit_score", ascending=False)
top_10_scores = scores_ranked.head(10)
top_10_scores.reset_index(drop=True, inplace=True)
top_10_scores

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Neighborhood,Housing Score,Transit Score,housing_transit_score
0,North Lawndale,33,8,1372
1,Grand Boulevard,27,8,1129
2,Garfield Park,14,12,873
3,Austin,11,10,589
4,Uptown,13,8,562
5,Lower West Side,13,8,562
6,Loop,1,58,526
7,River North,8,12,522
8,West Town,11,6,373
9,Lake View,3,20,373


In [299]:

'''Create Map that scores neighborhoods by housing and transit'''

#read dataframe as geodataframe

gdf_scores = gpd.GeoDataFrame(scores, geometry='geometry')

#convert geodataframe to geojson
geosource_scores = GeoJSONDataSource(geojson=gdf_scores.to_json())


# Define color palettes
palette = brewer['Reds'][6]
palette = palette[::-1] # reverse order of colors so higher values have darker colors

# Instantiate LogColorMapper that exponentially maps numbers in a range, into a sequence of colors.
color_mapper = LogColorMapper(palette = palette, low = 0, high = 1400)

# Define custom tick labels for color bar.
tick_labels = {1.8:'0-5',6.2:'5-20',21: '20-45',70: '45-100', 225:'100-300', 760:'300+'}

# Create color bar
color_bar = ColorBar(title = 'Housing-Transit Score',
                     color_mapper = color_mapper, 
                     label_standoff = 6,
                     width = 500, height = 20,
                     border_line_color = None,
                     location = (0,0),
                     orientation = 'horizontal',
                     major_tick_line_color = None,
                     ticker=FixedTicker(num_minor_ticks=0,ticks=[1.8,6.2,21,70,225,760]),
                     major_label_overrides = tick_labels,
                     major_label_text_align = 'center')


                     

# Create figure object
fig = figure(title = 'Optimal Neighborhoods in Chicago by Housing and Transit Access')


# Add patch renderer to figure
neighborhoods = fig.patches('xs','ys', source = geosource_scores, 
                          fill_color = {'field' :'housing_transit_score','transform' : color_mapper},
                          line_color = "gray", 
                          line_width = 0.25, 
                          fill_alpha = 1)

# Create hover tool
fig.add_tools(HoverTool(renderers = [neighborhoods],
                      tooltips = [('Neighborhood','@pri_neigh'),
                                  ('Score','@housing_transit_score'),
                                  ('No. of Housing Units','@Address_x'),
                                  ('Transit Score','@conn_dist')]))

#remove axes, axis labels, and grid lines
fig.xaxis.major_tick_line_color = None
fig.xaxis.minor_tick_line_color = None
fig.yaxis.major_tick_line_color = None
fig.yaxis.minor_tick_line_color = None
fig.xaxis.major_label_text_font_size = '0pt'
fig.yaxis.major_label_text_font_size = '0pt'
fig.xgrid.grid_line_color = None
fig.ygrid.grid_line_color = None

# Specify layout
fig.add_layout(color_bar, 'below')

show(fig)

In [None]:
#export map as html file
output_file("transit-housing-score.html")
save(fig)