# Final Project Map 1
## Brian Goggin
In this assignment, I use the development pipeline data of SF development in order to create an online map of residential construction by Zillow neighborhood. 


In [142]:
#import packages
import pandas as pd
import numpy as np
import re as re
import json    # library for working with JSON-formatted text strings
import requests  # library for accessing content from web URLs
import pprint  # library for making Python data structures readable
pp = pprint.PrettyPrinter()
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point
from geopy.distance import great_circle
from scipy import ndimage
import pysal #packaged required for changing mapping schemes (e.g. quantile versus equal interval)
import os
# magic command to display matplotlib plots inline within the ipython notebook
%matplotlib inline

# Section 1: Create polygons with recently completed development

## Determine constructed units over time

First, I assume that units were constructed in the latest quarter for which the project had "construction" as a project status. I identify these observations.

In [143]:
import_path = "/Users/briangoggin/Dropbox/CP 255/SF Development Project/Intermediate Files/"
full_df = pd.read_csv(import_path+"/pipeline.csv")

In [144]:
#create dataframes for line graph of construction, BP, and BI starts over time
cons_end = full_df[full_df['status'] == "CONSTRUCTION"].groupby(['lot_number'], as_index=False)['quarter_order'].max()
cons_end.rename(columns = {'quarter_order': 'consdate'}, inplace = True)
#merge data together to identify quarter that projects were built
full_df2 = full_df.merge(cons_end, on = 'lot_number', how = "outer")
full_df2 = full_df2[full_df2['consdate'] == full_df2['quarter_order']]
full_df2 = full_df2[full_df2['quarter_order'] != 16]
full_df2 = full_df2[full_df2['net_units'].notnull()] #keep only those projects that have nonmissing net units

## Create Point Data

In [145]:
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(full_df2.lon, full_df2.lat)]
construction = GeoDataFrame(full_df2, crs=crs, geometry=geometry)
#construction.plot();

## Import Zillow Neighborhood Boundaries

In [146]:
root = '/Users/briangoggin/Dropbox/CP 255/SF Development Project/Raw Data'
boundaries = gpd.read_file(root+'/ZillowNeighborhoods-CA/ZillowNeighborhoods-CA.shp')

In [147]:
boundaries = boundaries[boundaries['COUNTY']=='San Francisco'] #keep only SF neighborhoods
#boundaries.plot();

In [148]:
#set boundaries into same geographic coordinate system as points
boundaries.crs = {'init' :'epsg:4326'}

## Combine Layers

In [149]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
nbcum = gpd.sjoin(construction, boundaries, how = 'right', op='within')

In [150]:
#Next, dissolve by neighborhoods to get sum of units
nbcum['REGIONID'] = nbcum['REGIONID'].astype(int)
nbcum = nbcum[['NAME','REGIONID', 'geometry', 'net_units', 'net_affordable_units']]
nb_map = nbcum.dissolve(by=['NAME', 'REGIONID'], aggfunc='sum')

nb_map['net_units'].fillna(0, inplace = True)
nb_map['net_affordable_units'].fillna(0, inplace = True)

nb_map['net_units'] = nb_map['net_units'].astype(int)
nb_map['net_affordable_units'] = nb_map['net_affordable_units'].astype(int)

nb_map['index'] = nb_map.index
nb_map['name'] = nb_map['index'].astype(str).str.split(',').str[0].str.strip('(').str.replace("'", '')
nb_map['RegionID'] = nb_map['index'].astype(str).str.split(',').str[1].str.strip(')')
nb_map.drop('index', axis = 1, inplace = True)
#nb_map.head(20)

In [151]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if (value['net_units'] >=0) & (value['net_units']<=50): 
        field = 0
        
    elif (value['net_units'] >=51) & (value['net_units']<=200):
        field = 1
        
    elif (value['net_units'] >=201) & (value['net_units']<=500):
        field = 2
        
    elif (value['net_units'] >=501) & (value['net_units']<=2000):
        field = 3
    else: 
        field = 4
        
    return field


nb_map['unitcat'] = nb_map.apply(cats, axis = 1)

In [152]:
#export to geojson object
export_path = "/Users/briangoggin/Dropbox/CP 255/SF Development Project/Code/Pipeline Map"
with open(export_path+'/Neighborhood Maps/nb_recent.js', 'w') as f:
    f.write('var dataset = {};'.format(nb_map.to_json()))

In [153]:
nb_map['net_units'].sum()

13950

In [154]:
#export data to csv
export_path = "/Users/briangoggin/Dropbox/CP 255/SF Development Project/Intermediate Files/"
nb_map.to_csv(export_path+'/completed.csv')

# Section 2: Create polygons with currently proposed development only

In [155]:
#isolate currently proposed development
current = full_df[full_df['quarter']=='Q22016']

In [156]:
#create geodataframe for current development
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(current.lon, current.lat)]
current_geo = GeoDataFrame(current, crs=crs, geometry=geometry)

In [157]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
final_geo = gpd.sjoin(current_geo, boundaries, how = 'right', op='within')

In [158]:
#Next, dissolve by neighborhoods to get sum of units
final_geo['REGIONID'] = final_geo['REGIONID'].astype(int)
final_geo = final_geo[['NAME','REGIONID', 'geometry', 'net_units', 'net_affordable_units']]
final_geo = final_geo.dissolve(by=['NAME', 'REGIONID'], aggfunc='sum')

final_geo['net_units'].fillna(0, inplace = True)
final_geo['net_affordable_units'].fillna(0, inplace = True)
final_geo['net_units'] = final_geo['net_units'].astype(int)
final_geo['net_affordable_units'] = final_geo['net_affordable_units'].astype(int)

final_geo['index'] = final_geo.index
final_geo['name'] = final_geo['index'].astype(str).str.split(',').str[0].str.strip('(').str.replace("'", '')
final_geo['RegionID'] = final_geo['index'].astype(str).str.split(',').str[1].str.strip(')')
final_geo.drop('index', axis = 1, inplace = True)

In [159]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if (value['net_units'] >=0) & (value['net_units']<=50): 
        field = 0
        
    elif (value['net_units'] >=51) & (value['net_units']<=200):
        field = 1
        
    elif (value['net_units'] >=201) & (value['net_units']<=500):
        field = 2
        
    elif (value['net_units'] >=501) & (value['net_units']<=2000):
        field = 3
    else: 
        field = 4
        
    return field


final_geo['unitcat'] = final_geo.apply(cats, axis = 1)

In [160]:
#export to geojson object
export_path = "/Users/briangoggin/Dropbox/CP 255/SF Development Project/Code/Pipeline Map"
with open(export_path+'/Neighborhood Maps/nb_current.js', 'w') as f:
    f.write('var dataset2 = {};'.format(final_geo.to_json()))