# Map Neighborhood
In this notebook, I use the development pipeline data of SF development in order to create an online map of residential construction by Zillow neighborhood. 


In [None]:
#import packages
import pandas as pd
import numpy as np
import re as re
import json    # library for working with JSON-formatted text strings
import requests  # library for accessing content from web URLs
import pprint  # library for making Python data structures readable
pp = pprint.PrettyPrinter()
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point
from geopy.distance import great_circle
# magic command to display matplotlib plots inline within the ipython notebook
%matplotlib inline

pd.options.display.float_format = '{:.2f}'.format #display series descriptions in normal format (i.e. not scientific notation)

In [None]:
#specify file paths
import_path = '../../Intermediate Files'
code_path = '../../Code/Maps/'

# Section 1: Create polygons with recently completed development

## Determine constructed units over time

First, I assume that units were constructed in the latest quarter for which the project had "construction" as a project status. I identify these observations.

In [None]:
full_df = pd.read_csv(import_path+"/pipeline.csv")

In [None]:
#(03/02/2016) dropping Hunters Point shipyard until more robust solution. See Github issues for further explanation
full_df = full_df[full_df['address'] != 'HUNTERS POINT SHIPYARD, PHASE II']

In [None]:
#create dataframe for construction ending over time. This is a method to identify where construction ended,
#at which point the data drops out of the pipeline
cons_end = full_df[full_df['status'] == "CONSTRUCTION"].groupby(['lot_number'], as_index=False)['quarter_order'].max()
cons_end.rename(columns = {'quarter_order': 'consdate'}, inplace = True)
#merge data together to identify quarter that projects were built
full_df2 = full_df.merge(cons_end, on = 'lot_number', how = "outer")
full_df2 = full_df2[full_df2['consdate'] == full_df2['quarter_order']]
full_df2 = full_df2[full_df2['quarter_order'] != 19]

## Create Point Data

In [None]:
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(full_df2.lon, full_df2.lat)]
construction = GeoDataFrame(full_df2, crs=crs, geometry=geometry)
#construction.plot();

In [None]:
construction.head()

## Import 41 Neighborhood Boundaries

In [None]:
root = '../../Raw Data'
boundaries = gpd.read_file(root+'/41_neighborhoods/41_neighborhoods.shp')

In [None]:
#dropping Treasure Island for now
boundaries = boundaries[boundaries['NHOOD']!='Treasure Island']

In [None]:
boundaries.plot()

In [None]:
#convert boundaries to geographic coordinate system to conform to points
boundaries = boundaries.to_crs({'init': 'epsg:4326'}) 

In [None]:
boundaries.plot()

In [None]:
boundaries.head()

## Combine Layers

In [None]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
nbcum = gpd.sjoin(construction, boundaries, how = 'right', op='within')

In [None]:
#Next, dissolve by neighborhoods to get sum of units
nbcum = nbcum[['NHOOD', 'geometry', 'net_units', 'net_affordable_units']]
nb_map = nbcum.dissolve(by=['NHOOD'], aggfunc='sum')

nb_map['net_units'].fillna(0, inplace = True)
nb_map['net_affordable_units'].fillna(0, inplace = True)

nb_map['net_units'] = nb_map['net_units'].astype(int)
nb_map['net_affordable_units'] = nb_map['net_affordable_units'].astype(int)

nb_map['name']=nb_map.index

In [None]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if (value['net_units'] >=0) & (value['net_units']<=50): 
        field = 0
        
    elif (value['net_units'] >=51) & (value['net_units']<=200):
        field = 1
        
    elif (value['net_units'] >=201) & (value['net_units']<=500):
        field = 2
        
    elif (value['net_units'] >=501) & (value['net_units']<=2000):
        field = 3
    else: 
        field = 4
        
    return field


nb_map['unitcat'] = nb_map.apply(cats, axis = 1)

In [None]:
#make variables into a string with commas before export
nb_map['net_units'] = nb_map['net_units'].astype(int)
nb_map['net_units'] = nb_map['net_units'].map(lambda x: '{:,}'.format(x), na_action='ignore')

In [None]:
nb_map.head()

In [None]:
#export to geojson object
export_path = code_path
with open(export_path+'/Neighborhood Maps/nb41_recent.js', 'w') as f:
    f.write('var dataset5 = {};'.format(nb_map.to_json()))

# Section 2: Create polygons with currently proposed development only

In [None]:
#isolate currently proposed development
current = full_df[full_df['quarter']=='Q1-2017']

In [None]:
#create geodataframe for current development
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(current.lon, current.lat)]
current_geo = GeoDataFrame(current, crs=crs, geometry=geometry)

In [None]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
final_geo = gpd.sjoin(current_geo, boundaries, how = 'right', op='within')

In [None]:
#Next, dissolve by neighborhoods to get sum of units
final_geo = final_geo[['NHOOD', 'geometry', 'net_units', 'net_affordable_units']]
final_geo = final_geo.dissolve(by=['NHOOD'], aggfunc='sum')

final_geo['net_units'].fillna(0, inplace = True)
final_geo['net_affordable_units'].fillna(0, inplace = True)

final_geo['net_units'] = final_geo['net_units'].astype(int)
final_geo['net_affordable_units'] = final_geo['net_affordable_units'].astype(int)

final_geo['name']=final_geo.index

In [None]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if (value['net_units'] >=0) & (value['net_units']<=50): 
        field = 0
        
    elif (value['net_units'] >=51) & (value['net_units']<=200):
        field = 1
        
    elif (value['net_units'] >=201) & (value['net_units']<=500):
        field = 2
        
    elif (value['net_units'] >=501) & (value['net_units']<=2000):
        field = 3
    else: 
        field = 4
        
    return field


final_geo['unitcat'] = final_geo.apply(cats, axis = 1)

In [None]:
#make variables into a string with commas before export
final_geo['net_units'] = final_geo['net_units'].astype(int)
final_geo['net_units'] = final_geo['net_units'].map(lambda x: '{:,}'.format(x), na_action='ignore')

In [None]:
#export to geojson object
export_path = code_path
with open(export_path+'/Neighborhood Maps/nb41_current.js', 'w') as f:
    f.write('var dataset6 = {};'.format(final_geo.to_json()))

# Section 3. Create Polygons with Non-residential development - recently completed

In [None]:
#import commercial development data
nr_df = pd.read_csv(import_path+"/pipeline_com.csv")

In [None]:
#some initial cleaning
vars = ['comm_sqft', 'comm_sqft_net']
#do some initial data cleaning
for item in vars:
    nr_df[item] = nr_df[item].fillna(0)

In [None]:
#create dataframe for construction ending over time. This is a method to identify where construction ended,
#at which point the data drops out of the pipeline
cons_end_nr = nr_df[nr_df['status'] == "CONSTRUCTION"].groupby(['lot_number'], as_index=False)['quarter_order'].max()
cons_end_nr.rename(columns = {'quarter_order': 'consdate'}, inplace = True)
#merge data together to identify quarter that projects were built
nr_df2 = nr_df.merge(cons_end_nr, on = 'lot_number', how = "outer")
nr_df2 = nr_df2[nr_df2['consdate'] == nr_df2['quarter_order']]
nr_df2 = nr_df2[nr_df2['quarter_order'] != 19]

In [None]:
#create geodataframe for current development
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(nr_df2.lon, nr_df2.lat)]
nr_geo = GeoDataFrame(nr_df2, crs=crs, geometry=geometry)

In [None]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
final_geo = gpd.sjoin(nr_geo, boundaries, how = 'right', op='within')

In [None]:
#Next, dissolve by neighborhoods to get sum of units
final_geo = final_geo[['NHOOD', 'geometry', 'comm_sqft_net', 'comm_sqft']]
final_geo = final_geo.dissolve(by=['NHOOD'], aggfunc='sum')

final_geo['comm_sqft'].fillna(0, inplace = True)
final_geo['comm_sqft_net'].fillna(0, inplace = True)

final_geo['comm_sqft'] = final_geo['comm_sqft'].astype(int)
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].astype(int)

final_geo['name']=final_geo.index

In [None]:
final_geo['comm_sqft_net'].describe(percentiles = [.05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95])

In [None]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if value['comm_sqft_net'] < -5000: 
        field = 0
        
    elif (value['comm_sqft_net'] >= -5000) & (value['comm_sqft_net']< 0):
        field = 1
        
    elif (value['comm_sqft_net'] >= 0) & (value['comm_sqft_net'] <= 5000):
        field = 2
        
    elif (value['comm_sqft_net'] >= 5001) & (value['comm_sqft_net'] <= 10000):
        field = 3
        
    elif (value['comm_sqft_net'] >=10001) & (value['comm_sqft_net'] <= 50000):
        field = 4  
        
    else: 
        field = 5
        
    return field


final_geo['unitcat'] = final_geo.apply(cats, axis = 1)

In [None]:
#make variables into a string with commas before export
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].astype(int)
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].map(lambda x: '{:,}'.format(x), na_action='ignore')

In [None]:
#export to geojson object
export_path = code_path
with open(export_path+'/Neighborhood Maps/nb41_comm.js', 'w') as f:
    f.write('var dataset7 = {};'.format(final_geo.to_json()))

# Section 4. Create Polygons with Non-residential development -currently proposed

In [None]:
#isolate currently proposed development
current = nr_df[nr_df['quarter']=='Q1-2017']

In [None]:
#create geodataframe for current development
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(current.lon, current.lat)]
current_geo = GeoDataFrame(current, crs=crs, geometry=geometry)

In [None]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
final_geo = gpd.sjoin(current_geo, boundaries, how = 'right', op='within')

In [None]:
#Next, dissolve by neighborhoods to get sum of units
final_geo = final_geo[['NHOOD', 'geometry', 'comm_sqft_net', 'comm_sqft']]
final_geo = final_geo.dissolve(by=['NHOOD'], aggfunc='sum')

final_geo['comm_sqft'].fillna(0, inplace = True)
final_geo['comm_sqft_net'].fillna(0, inplace = True)

final_geo['comm_sqft'] = final_geo['comm_sqft'].astype(int)
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].astype(int)

final_geo['name']=final_geo.index

In [None]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if value['comm_sqft_net'] < -5000: 
        field = 0
        
    elif (value['comm_sqft_net'] >= -5000) & (value['comm_sqft_net']< 0):
        field = 1
        
    elif (value['comm_sqft_net'] >= 0) & (value['comm_sqft_net'] <= 5000):
        field = 2
        
    elif (value['comm_sqft_net'] >= 5001) & (value['comm_sqft_net'] <= 10000):
        field = 3
        
    elif (value['comm_sqft_net'] >=10001) & (value['comm_sqft_net'] <= 50000):
        field = 4  
        
    else: 
        field = 5
        
    return field


final_geo['unitcat'] = final_geo.apply(cats, axis = 1)

In [None]:
#make variables into a string with commas before export
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].astype(int)
final_geo['comm_sqft_net'] = final_geo['comm_sqft_net'].map(lambda x: '{:,}'.format(x), na_action='ignore')

In [None]:
#export to geojson object
export_path = code_path
with open(export_path+'/Neighborhood Maps/nb41_comm_curr.js', 'w') as f:
    f.write('var dataset8 = {};'.format(final_geo.to_json()))

In [None]:
final_geo.head()