# Map Neighborhood
In this notebook, I use the development pipeline data of SF development in order to create an online map of residential construction by Zillow neighborhood. 


In [108]:
#import packages
import pandas as pd
import numpy as np
import re as re
import json    # library for working with JSON-formatted text strings
import requests  # library for accessing content from web URLs
import pprint  # library for making Python data structures readable
pp = pprint.PrettyPrinter()
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point
from geopy.distance import great_circle
# magic command to display matplotlib plots inline within the ipython notebook
%matplotlib inline

pd.options.display.float_format = '{:.2f}'.format #display series descriptions in normal format (i.e. not scientific notation)

In [109]:
#specify file paths
import_path = "/Users/briangoggin/Dropbox/CP 255/SF Development Project/Intermediate Files/"
code_path = "/Users/briangoggin/Dropbox/CP 255/SF Development Project/Code/Maps/"

# Section 1: Create polygons with recently completed development

## Determine constructed units over time

First, I assume that units were constructed in the latest quarter for which the project had "construction" as a project status. I identify these observations.

In [110]:
full_df = pd.read_csv(import_path+"/pipeline.csv")

In [111]:
#create dataframes for line graph of construction, BP, and BI starts over time
cons_end = full_df[full_df['status'] == "CONSTRUCTION"].groupby(['lot_number'], as_index=False)['quarter_order'].max()
cons_end.rename(columns = {'quarter_order': 'consdate'}, inplace = True)
#merge data together to identify quarter that projects were built
full_df2 = full_df.merge(cons_end, on = 'lot_number', how = "outer")
full_df2 = full_df2[full_df2['consdate'] == full_df2['quarter_order']]
full_df2 = full_df2[full_df2['quarter_order'] != 18]

## Create Point Data

In [112]:
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(full_df2.lon, full_df2.lat)]
construction = GeoDataFrame(full_df2, crs=crs, geometry=geometry)
#construction.plot();

## Import Zillow Neighborhood Boundaries

In [113]:
root = '/Users/briangoggin/Dropbox/CP 255/SF Development Project/Raw Data'
boundaries = gpd.read_file(root+'/ZillowNeighborhoods-CA/ZillowNeighborhoods-CA.shp')

In [114]:
boundaries = boundaries[boundaries['COUNTY']=='San Francisco'] #keep only SF neighborhoods
#boundaries.plot();

In [115]:
#set boundaries into same geographic coordinate system as points
boundaries.crs = {'init' :'epsg:4326'}

## Combine Layers

In [116]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
nbcum = gpd.sjoin(construction, boundaries, how = 'right', op='within')

In [117]:
#Next, dissolve by neighborhoods to get sum of units
nbcum['REGIONID'] = nbcum['REGIONID'].astype(int)
nbcum = nbcum[['NAME','REGIONID', 'geometry', 'net_units', 'net_affordable_units']]
nb_map = nbcum.dissolve(by=['NAME', 'REGIONID'], aggfunc='sum')

nb_map['net_units'].fillna(0, inplace = True)
nb_map['net_affordable_units'].fillna(0, inplace = True)

nb_map['net_units'] = nb_map['net_units'].astype(int)
nb_map['net_affordable_units'] = nb_map['net_affordable_units'].astype(int)

nb_map['index'] = nb_map.index
nb_map['name'] = nb_map['index'].astype(str).str.split(',').str[0].str.strip('(').str.replace("'", '')
nb_map['RegionID'] = nb_map['index'].astype(str).str.split(',').str[1].str.strip(')')
nb_map.drop('index', axis = 1, inplace = True)
#nb_map.head(20)

In [118]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if (value['net_units'] >=0) & (value['net_units']<=50): 
        field = 0
        
    elif (value['net_units'] >=51) & (value['net_units']<=200):
        field = 1
        
    elif (value['net_units'] >=201) & (value['net_units']<=500):
        field = 2
        
    elif (value['net_units'] >=501) & (value['net_units']<=2000):
        field = 3
    else: 
        field = 4
        
    return field


nb_map['unitcat'] = nb_map.apply(cats, axis = 1)

In [119]:
#export to geojson object
export_path = code_path
with open(export_path+'/Neighborhood Maps/nb_recent.js', 'w') as f:
    f.write('var dataset1 = {};'.format(nb_map.to_json()))

In [120]:
#export to csv for affordability analysis
export_path = import_path
nb_map.to_csv(import_path+'/completed.csv')

In [121]:
nb_map['net_units'].sum()

27467

# Section 2: Create polygons with currently proposed development only

In [122]:
#isolate currently proposed development
current = full_df[full_df['quarter']=='Q4-2016']

In [123]:
#create geodataframe for current development
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(current.lon, current.lat)]
current_geo = GeoDataFrame(current, crs=crs, geometry=geometry)

In [124]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
final_geo = gpd.sjoin(current_geo, boundaries, how = 'right', op='within')

In [125]:
#Next, dissolve by neighborhoods to get sum of units
final_geo['REGIONID'] = final_geo['REGIONID'].astype(int)
final_geo = final_geo[['NAME','REGIONID', 'geometry', 'net_units', 'net_affordable_units']]
final_geo = final_geo.dissolve(by=['NAME', 'REGIONID'], aggfunc='sum')

final_geo['net_units'].fillna(0, inplace = True)
final_geo['net_affordable_units'].fillna(0, inplace = True)
final_geo['net_units'] = final_geo['net_units'].astype(int)
final_geo['net_affordable_units'] = final_geo['net_affordable_units'].astype(int)

final_geo['index'] = final_geo.index
final_geo['name'] = final_geo['index'].astype(str).str.split(',').str[0].str.strip('(').str.replace("'", '')
final_geo['RegionID'] = final_geo['index'].astype(str).str.split(',').str[1].str.strip(')')
final_geo.drop('index', axis = 1, inplace = True)

In [126]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if (value['net_units'] >=0) & (value['net_units']<=50): 
        field = 0
        
    elif (value['net_units'] >=51) & (value['net_units']<=200):
        field = 1
        
    elif (value['net_units'] >=201) & (value['net_units']<=500):
        field = 2
        
    elif (value['net_units'] >=501) & (value['net_units']<=2000):
        field = 3
    else: 
        field = 4
        
    return field


final_geo['unitcat'] = final_geo.apply(cats, axis = 1)

In [127]:
#export to geojson object
export_path = code_path
with open(export_path+'/Neighborhood Maps/nb_current.js', 'w') as f:
    f.write('var dataset2 = {};'.format(final_geo.to_json()))

# Section 3. Create Polygons with Non-residential development - recently completed

In [128]:
#import commercial development data
nr_df = pd.read_csv(import_path+"/pipeline_com.csv")

In [129]:
#some initial cleaning
vars = ['comm_sqft', 'comm_sqft_net']
#do some initial data cleaning
for item in vars:
    nr_df[item] = nr_df[item].fillna(0)

In [130]:
#create dataframe for construction ending over time. This is a method to identify where construction ended,
#at which point the data drops out of the pipeline
cons_end_nr = nr_df[nr_df['status'] == "CONSTRUCTION"].groupby(['lot_number'], as_index=False)['quarter_order'].max()
cons_end_nr.rename(columns = {'quarter_order': 'consdate'}, inplace = True)
#merge data together to identify quarter that projects were built
nr_df2 = nr_df.merge(cons_end_nr, on = 'lot_number', how = "outer")
nr_df2 = nr_df2[nr_df2['consdate'] == nr_df2['quarter_order']]
nr_df2 = nr_df2[nr_df2['quarter_order'] != 18]

In [131]:
#create geodataframe for current development
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(nr_df2.lon, nr_df2.lat)]
nr_geo = GeoDataFrame(nr_df2, crs=crs, geometry=geometry)

In [132]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
final_geo = gpd.sjoin(nr_geo, boundaries, how = 'right', op='within')

In [133]:
final_geo.head()

Unnamed: 0_level_0,Unnamed: 0,address,affordable_units,cie,cie_net,comm_sqft,comm_sqft_net,desc,ind,ind_net,...,lat,quarter_order,consdate,index_left,CITY,COUNTY,NAME,REGIONID,STATE,geometry
index_right,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
789,739.0,101 Donahue St,,,,0.0,0.0,,,,...,37.72,6.0,6.0,4201.0,San Francisco,San Francisco,Bayview,272885.0,CA,"POLYGON ((-122.380496615061 37.7507156475919, ..."
789,654.0,421 Hudson Av,,,,0.0,0.0,,,,...,37.73,6.0,6.0,4216.0,San Francisco,San Francisco,Bayview,272885.0,CA,"POLYGON ((-122.380496615061 37.7507156475919, ..."
789,745.0,198 Coleman St,,,,0.0,0.0,,,,...,37.73,6.0,6.0,4219.0,San Francisco,San Francisco,Bayview,272885.0,CA,"POLYGON ((-122.380496615061 37.7507156475919, ..."
789,725.0,201 Donahue St,,,,0.0,0.0,,,,...,37.73,6.0,6.0,4225.0,San Francisco,San Francisco,Bayview,272885.0,CA,"POLYGON ((-122.380496615061 37.7507156475919, ..."
789,741.0,50 Jerrold Av,,,,0.0,0.0,,,,...,37.73,6.0,6.0,4237.0,San Francisco,San Francisco,Bayview,272885.0,CA,"POLYGON ((-122.380496615061 37.7507156475919, ..."


In [134]:
#Next, dissolve by neighborhoods to get sum of units
final_geo['REGIONID'] = final_geo['REGIONID'].astype(int)
final_geo = final_geo[['NAME','REGIONID', 'geometry', 'comm_sqft_net', 'comm_sqft']]
final_geo = final_geo.dissolve(by=['NAME', 'REGIONID'], aggfunc='sum')

final_geo['comm_sqft_net'].fillna(0, inplace = True)
final_geo['comm_sqft'].fillna(0, inplace = True)
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].astype(int)
final_geo['comm_sqft'] = final_geo['comm_sqft'].astype(int)

final_geo['index'] = final_geo.index
final_geo['name'] = final_geo['index'].astype(str).str.split(',').str[0].str.strip('(').str.replace("'", '')
final_geo['RegionID'] = final_geo['index'].astype(str).str.split(',').str[1].str.strip(')')
final_geo.drop('index', axis = 1, inplace = True)

In [135]:
final_geo.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,geometry,comm_sqft_net,comm_sqft,name,RegionID
NAME,REGIONID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bayview,272885,"POLYGON ((-122.380496615061 37.7507156475919, ...",122240,99840,Bayview,272885
Bernal Heights,268020,"POLYGON ((-122.403862539662 37.7494769720709, ...",-1410,0,Bernal Heights,268020
Castro-Upper Market,276241,"POLYGON ((-122.426029676707 37.7697778521009, ...",-21008,61298,Castro-Upper Market,276241
Chinatown,114291,"POLYGON ((-122.41020215338 37.7974876723953, -...",0,0,Chinatown,114291
Crocker Amazon,273404,"POLYGON ((-122.454085201694 37.7082065558492, ...",5240,5240,Crocker Amazon,273404


In [136]:
final_geo['comm_sqft_net'].describe(percentiles = [.05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95])

count        34.00
mean     131730.71
std      414654.51
min      -34895.00
5%       -12289.55
10%       -4236.50
20%        -152.00
30%           0.00
40%           0.00
50%           0.00
60%        1598.20
70%        4026.80
80%        8979.20
90%      268676.50
95%      994280.40
max     1720114.00
Name: comm_sqft_net, dtype: float64

In [137]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if value['comm_sqft_net'] < -5000: 
        field = 0
        
    elif (value['comm_sqft_net'] >= -5000) & (value['comm_sqft_net']< 0):
        field = 1
        
    elif (value['comm_sqft_net'] >= 0) & (value['comm_sqft_net']< 5000):
        field = 2
        
    elif (value['comm_sqft_net'] >= 5000) & (value['comm_sqft_net']< 10000):
        field = 3
        
    elif (value['comm_sqft_net'] >=10000) & (value['comm_sqft_net']< 25000):
        field = 4  
        
    else: 
        field = 5
        
    return field


final_geo['unitcat'] = final_geo.apply(cats, axis = 1)

In [138]:
#make variables into a string before export
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].astype(str)

In [139]:
#export to geojson object
export_path = code_path
with open(export_path+'/Neighborhood Maps/nb_comm.js', 'w') as f:
    f.write('var dataset3 = {};'.format(final_geo.to_json()))

# Section 4. Create Polygons with Non-residential development -currently proposed

In [140]:
#isolate currently proposed development
current = nr_df[nr_df['quarter']=='Q4-2016']

In [141]:
#create geodataframe for current development
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(current.lon, current.lat)]
current_geo = GeoDataFrame(current, crs=crs, geometry=geometry)

In [142]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
final_geo = gpd.sjoin(current_geo, boundaries, how = 'right', op='within')

In [143]:
#Next, dissolve by neighborhoods to get sum of units
final_geo['REGIONID'] = final_geo['REGIONID'].astype(int)
final_geo = final_geo[['NAME','REGIONID', 'geometry', 'comm_sqft_net', 'comm_sqft']]
final_geo = final_geo.dissolve(by=['NAME', 'REGIONID'], aggfunc='sum')

final_geo['comm_sqft_net'].fillna(0, inplace = True)
final_geo['comm_sqft'].fillna(0, inplace = True)
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].astype(int)
final_geo['comm_sqft'] = final_geo['comm_sqft'].astype(int)

final_geo['index'] = final_geo.index
final_geo['name'] = final_geo['index'].astype(str).str.split(',').str[0].str.strip('(').str.replace("'", '')
final_geo['RegionID'] = final_geo['index'].astype(str).str.split(',').str[1].str.strip(')')
final_geo.drop('index', axis = 1, inplace = True)

In [144]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if value['comm_sqft_net'] < -5000: 
        field = 0
        
    elif (value['comm_sqft_net'] >= -5000) & (value['comm_sqft_net']< 0):
        field = 1
        
    elif (value['comm_sqft_net'] >= 0) & (value['comm_sqft_net']< 5000):
        field = 2
        
    elif (value['comm_sqft_net'] >= 5000) & (value['comm_sqft_net']< 10000):
        field = 3
        
    elif (value['comm_sqft_net'] >=10000) & (value['comm_sqft_net']< 25000):
        field = 4  
        
    else: 
        field = 5
        
    return field


final_geo['unitcat'] = final_geo.apply(cats, axis = 1)

In [145]:
#make variables into a string before export
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].astype(str)

In [146]:
#export to geojson object
export_path = code_path
with open(export_path+'/Neighborhood Maps/nb_comm_curr.js', 'w') as f:
    f.write('var dataset4 = {};'.format(final_geo.to_json()))