# Final Project Map 1
## Brian Goggin
In this assignment, I use the development pipeline data of SF development in order to create an online map of residential construction by Zillow neighborhood. 


In [44]:
#import packages
import pandas as pd
import numpy as np
import re as re
import json    # library for working with JSON-formatted text strings
import requests  # library for accessing content from web URLs
import pprint  # library for making Python data structures readable
pp = pprint.PrettyPrinter()
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point
from geopy.distance import great_circle
from scipy import ndimage
import pysal #packaged required for changing mapping schemes (e.g. quantile versus equal interval)
import os

## Determine constructed unites over time

First, I assume that units were constructed in the latest quarter for which the project had "construction" as a project status. I identify these observations.

In [45]:
import_path = "/Users/briangoggin/Dropbox/CP 255/Permit Project/Intermediate Files/"
full_df = pd.read_csv(import_path+"/pipeline.csv")

In [46]:
#create dataframes for line graph of construction, BP, and BI starts over time
cons_end = full_df[full_df['status'] == "CONSTRUCTION"].groupby(['lot_number'], as_index=False)['quarter_order'].max()
cons_end.rename(columns = {'quarter_order': 'consdate'}, inplace = True)
#merge data together to identify quarter that projects were built
full_df2 = full_df.merge(cons_end, on = 'lot_number', how = "outer")
full_df2 = full_df2[full_df2['consdate'] == full_df2['quarter_order']]
full_df2 = full_df2[full_df2['quarter_order'] != 16]
full_df2 = full_df2[full_df2['net_units'].notnull()] #keep only those projects that have nonmissing net units

# Create Point Data

In [47]:
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(full_df2.lon, full_df2.lat)]
construction = GeoDataFrame(full_df2, crs=crs, geometry=geometry)
#construction.plot();

# Import Zillow Neighborhood Boundaries

In [48]:
root = '/Users/briangoggin/Dropbox/CP 255/Permit Project/Raw Data'

boundaries = gpd.read_file(root+'/ZillowNeighborhoods-CA/ZillowNeighborhoods-CA.shp')

In [49]:
boundaries = boundaries[boundaries['COUNTY']=='San Francisco'] #keep only SF neighborhoods
#boundaries.plot();

In [50]:
boundaries.head()

Unnamed: 0,CITY,COUNTY,NAME,REGIONID,STATE,geometry
789,San Francisco,San Francisco,Bayview,272885.0,CA,"POLYGON ((-122.380496615061 37.7507156475919, ..."
790,San Francisco,San Francisco,Bernal Heights,268020.0,CA,"POLYGON ((-122.403862539662 37.7494769720709, ..."
791,San Francisco,San Francisco,Castro-Upper Market,276241.0,CA,"POLYGON ((-122.426029676707 37.7697778521009, ..."
792,San Francisco,San Francisco,Chinatown,114291.0,CA,"POLYGON ((-122.41020215338 37.7974876723953, -..."
793,San Francisco,San Francisco,Crocker Amazon,273404.0,CA,"POLYGON ((-122.454085201694 37.7082065558492, ..."


In [51]:
#set boundaries into same geographic coordinate system as points
boundaries.crs = {'init' :'epsg:4326'}

# Combine Layers

In [52]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'right' to preserve polygon geometries.
nbcum = gpd.sjoin(construction, boundaries, how = 'right', op='within')

In [53]:
#Next, dissolve by neighborhoods to get sum of units
nbcum = nbcum[['NAME', 'geometry', 'net_units', 'net_affordable_units']]
nb_map = nbcum.dissolve(by='NAME', aggfunc='sum')

nb_map['net_units'].fillna(0, inplace = True)
nb_map['net_affordable_units'].fillna(0, inplace = True)

nb_map['net_units'] = nb_map['net_units'].astype(int)
nb_map['net_affordable_units'] = nb_map['net_affordable_units'].astype(int)

nb_map['name'] = nb_map.index
#nb_map.head(20)

In [54]:
#export to geojson object
export_path = "/Users/briangoggin/Dropbox/CP 255/Permit Project/Code/Pipeline Map"
with open(export_path+'/nb_map.js', 'w') as f:
    f.write('var dataset = {};'.format(nb_map.to_json()))

In [55]:
nb_map['net_units'].describe()

count      34.000000
mean      410.294118
std      1228.877772
min         0.000000
25%        13.500000
50%        30.500000
75%       176.000000
max      6920.000000
Name: net_units, dtype: float64