# Final Project Map 2
## Brian Goggin
In this assignment, I use the development pipeline data of SF development in order to create an online map of residential construction at the point level. 


In [24]:
#import packages
import pandas as pd
import numpy as np
import re as re
import json    # library for working with JSON-formatted text strings
import requests  # library for accessing content from web URLs
import pprint  # library for making Python data structures readable
pp = pprint.PrettyPrinter()
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point
from geopy.distance import great_circle
from scipy import ndimage
import pysal #packaged required for changing mapping schemes (e.g. quantile versus equal interval)
import os

## Determine constructed unites over time

First, I assume that units were constructed in the latest quarter for which the project had "construction" as a project status. I identify these observations.

In [25]:
import_path = "/Users/briangoggin/Dropbox/CP 255/SF Development Project/Intermediate Files/"
full_df = pd.read_csv(import_path+"/pipeline.csv")

In [26]:
vars = ['net_units', 'net_affordable_units']
#do some initial data cleaning
for item in vars:
    full_df[item] = full_df[item].fillna(0)

# Section 1. Create Recent Completions Geojson File

In [27]:
#create dataframes for line graph of construction, BP, and BI starts over time
cons_end = full_df[full_df['status'] == "CONSTRUCTION"].groupby(['lot_number'], as_index=False)['quarter_order'].max()
cons_end.rename(columns = {'quarter_order': 'consdate'}, inplace = True)
#merge data together to identify quarter that projects were built
full_df2 = full_df.merge(cons_end, on = 'lot_number', how = "outer")
full_df2 = full_df2[full_df2['consdate'] == full_df2['quarter_order']]
full_df2 = full_df2[full_df2['quarter_order'] != 16]
full_df2 = full_df2[full_df2['net_units'].notnull()] #keep only those projects that have nonmissing net units

In [28]:
#Define function to create categories for javascript maps. Each category will be separate dot color
def cats(value):
    if value['net_units'] <0: 
        field = 0
        
    elif (value['net_units'] >=0) & (value['net_units']<=15):
        field = 1
        
    elif (value['net_units'] >=16) & (value['net_units']<=30):
        field = 2
        
    elif (value['net_units'] >=31) & (value['net_units']<=50):
        field = 3
        
    elif (value['net_units'] >=51) & (value['net_units']<=100):
        field = 4  
        
    else: 
        field = 5
        
    return field


full_df2['unitcat'] = full_df2.apply(cats, axis = 1)

## Create Point Data

In [29]:
#write function for dataframe
def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
    # create a new python dict to contain our geojson data, using geojson format
    geojson = {'type':'FeatureCollection', 'features':[]}

    # loop through each row in the dataframe and convert each row to geojson format
    for _, row in df.iterrows():
        # create a feature template to fill in
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}

        # fill in the coordinates
        feature['geometry']['coordinates'] = [row[lon],row[lat]]

        # for each column, get the value and add it as a new feature property
        for prop in properties:
            feature['properties'][prop] = row[prop]
        
        # add this feature (aka, converted dataframe row) to the list of features inside our dict
        geojson['features'].append(feature)
    
    return geojson

In [30]:
#finally, create dataframe
cols = ['net_units', 'net_affordable_units', 'address', 'quarter', 'zone', 'unitcat']
geojson = df_to_geojson(full_df2, cols, 'lat', 'lon')

In [31]:
# save the geojson result to a file
folder = '/Users/briangoggin/Dropbox/CP 255/SF Development Project/Intermediate Files/'
output_filename = folder+'/recent_data.js'
with open(output_filename, 'w') as output_file:
    output_file.write('var dataset = {};'.format(json.dumps(geojson, indent=4)))

# Section 2. Create Geojson for Current Development Projects in the Pipeline

In [32]:
current = full_df[full_df['quarter']=='Q22016']

In [33]:
current['unitcat'] = current.apply(cats, axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [34]:
#finally, create dataframe
cols = ['net_units', 'net_affordable_units', 'address', 'quarter', 'zone', 'unitcat', 'status']
geojson = df_to_geojson(current, cols, 'lat', 'lon')

In [35]:
# save the geojson result to a file
folder = '/Users/briangoggin/Dropbox/CP 255/SF Development Project/Intermediate Files/'
output_filename = folder+'/current_data.js'
with open(output_filename, 'w') as output_file:
    output_file.write('var dataset2 = {};'.format(json.dumps(geojson, indent=4)))