In [1]:
import pandas as pd
import logging
import dateutil
from dateutil import parser
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point
%matplotlib inline
pd.set_option('display.max_columns', 500)

In [2]:
#import dataset of final cleaned projects from "data" folder
file = "../../../data/cleaned/final_cleaned_projects.csv"
output="../../Output/Entire Sample/"
df = pd.read_csv(file)

# Merge in Geographic Information and Make Big Dataframe

In [3]:
#first, convert points to geodataframe
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(df.x, df.y)]
devs = GeoDataFrame(df, crs=crs, geometry=geometry)
devs = devs.to_crs({'init': 'epsg:4326'}) 

In [4]:
#import neighborhoods
neighborhoods = gpd.read_file('../../../data/gis/41_neighborhoods/41_neighborhoods.shp')

In [5]:
#convert boundaries to geographic coordinate system to conform to points
neighborhoods = neighborhoods.to_crs({'init': 'epsg:4326'}) 

In [6]:
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'left' to preserve all developments
df = gpd.sjoin(devs, neighborhoods, how = 'inner', op='within')
df.shape

(2474, 44)

In [7]:
#Create Big Projects Dataframe
df_big=df[df['units']>=10]
df_big['project_time_years'].describe()

count    137.000000
mean       6.255254
std        3.453810
min        0.750685
25%        3.854795
50%        5.945205
75%        8.273973
max       24.128767
Name: project_time_years, dtype: float64

In [8]:
# Top 5 shortest projects of big projects
df_big.sort_values('project_time_years', ascending = True)[0:5]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,BP_date,address_x,aff,affnet,apn,best_date,best_stat,comp_date,con_date,dbi_permit,dropped_out,first_date,first_project_record_date,firstfiled,latest_project_record_date,latest_project_status,project_dates,project_duration_days,project_statuses,report_quarter,report_year,status,units,unitsnet,x,y,zoning,zoning_simplified,bp_duplicates,project_time_years,aff_address,address_y,Low Income Units,Placed in Service (PIS) Date,iz_address,Total BMRs in this Building or Phase,Planning Approval Date,Completion Date,_merge,geometry,index_right,nhood
913,913,0,,1155 MARKET ST,0.0,0.0,3702054,2016-02-23,CONSTRUCTION,04/01/2016,2016-02-23,201507020526,False,2015-07-02,2016-02-23,2015-07-02,2016-02-23,CONSTRUCTION,"('2016-02-23',)",274.0,"('CONSTRUCTION',)",1,2016,Under Construction,11.0,11.0,-122.413409,37.779223,C-3-G,C-3-G,False,0.750685,,,,,,,,,left_only,POINT (-122.413409 37.7792227726),33,South of Market
1237,1237,0,2015-07-24,555 POST ST,0.0,0.0,306020,2016-02-29,CONSTRUCTION,04/01/2016,2016-02-29,201504224344,False,2015-04-22,2015-07-24,2015-04-22,2016-02-29,CONSTRUCTION,"('2015-07-24', '2015-07-24', '2016-01-04', '20...",345.0,"('BP ISSUED', 'BP ISSUED', 'BP ISSUED', 'CONST...",1,2016,Under Construction,17.0,17.0,-122.410943,37.787683,C-3-G,C-3-G,False,0.945205,,,,,,,,,left_only,POINT (-122.410943 37.787682827),35,Tenderloin
2256,2256,0,2013-07-23,1816 EDDY ST,,0.0,1127064,2014-02-07,CONSTRUCTION,07/01/2014,2014-02-07,201304265571,False,2013-04-26,2013-07-23,2013-04-26,2014-02-07,CONSTRUCTION,"('2013-07-23', '2013-07-23', '2014-02-07', '20...",431.0,"('BP ISSUED', 'BP ISSUED', 'CONSTRUCTION', 'CO...",2,2014,Under Construction,19.0,19.0,-122.43726,37.780796,RM-3,RM-3,False,1.180822,,,,,,,,,left_only,POINT (-122.43726 37.780796),40,Western Addition
64,64,0,2011-08-16,350 GOLDEN GATE AV,69.0,,346005,2012-02-13,CONSTRUCTION,04/01/2012,2012-01-13,201011014101,False,2010-11-01,2010-11-01,2010-11-01,2012-02-13,CONSTRUCTION,"('2010-11-01', '2010-11-01', '2010-11-01', '20...",517.0,"('BP FILED', 'BP FILED', 'BP FILED', 'BP ISSUE...",1,2012,Under Construction,19.0,19.0,-122.416512,37.781546,C-3-G,C-3-G,False,1.416438,350 GOLDEN GATE AVENUE,350 GOLDEN GATE AVENUE,69.0,2012-08-01,,,,,left_only,POINT (-122.4165125 37.781546),35,Tenderloin
1123,1123,0,2016-01-22,1300 04TH ST,0.0,0.0,8711021,2016-10-25,CONSTRUCTION,01/01/2017,2016-10-25,201507272485,False,2015-07-27,2015-07-27,2015-07-27,2016-10-25,CONSTRUCTION,"('2015-07-27', '2015-07-27', '2016-01-22', '20...",524.0,"('BP FILED', 'BP FILED', 'BP ISSUED', 'BP ISSU...",4,2016,Under Construction,143.0,143.0,-122.391772,37.771402,MB-RA,MB-RA,False,1.435616,,,,,,,,,left_only,POINT (-122.391771649 37.77140196040001),19,Mission Bay


In [9]:
#in order to use as much data as possible, I am not keeping all data and just creating time vars where we can (i.e. where they are nonnull)
def permit_time(value):
    returnval=np.nan
    if pd.notnull(value['BP_date']) & pd.notnull(value['first_date']):
        returnval=((dateutil.parser.parse(value['BP_date']) - dateutil.parser.parse(value['first_date'])).days)/365
    return returnval
def bp_time(value):
    returnval=np.nan
    if pd.notnull(value['con_date']) & pd.notnull(value['BP_date']):
        returnval=((dateutil.parser.parse(value['con_date']) - dateutil.parser.parse(value['BP_date'])).days)/365
    return returnval
def con_time(value):
    returnval=np.nan
    if pd.notnull(value['comp_date']) & pd.notnull(value['con_date']):
        returnval=((dateutil.parser.parse(value['comp_date']) - dateutil.parser.parse(value['con_date'])).days)/365
    return returnval
    
df['permit_time']=df.apply(permit_time, axis=1)
df['bp_time']=df.apply(bp_time, axis=1)
df['con_time']=df.apply(con_time, axis=1)

#per unit variables
df['ptime_per_unit']=df['permit_time']/df['units']
df['bptime_per_unit']=df['bp_time']/df['units']
df['contime_per_unit']=df['con_time']/df['units']

## Export Polygon Shapefile

In [10]:
# Export Polygon Shapefile
#First, spatial join between points and neighborhood boundaries. Set 'how' to 'left' to preserve all developments
nbdf = gpd.sjoin(devs, neighborhoods, how = 'right', op='within')

In [11]:
#in order to use as much data as possible, I am not keeping all data and just creating time vars where we can (i.e. where they are nonnull)
def permit_time(value):
    returnval=np.nan
    if pd.notnull(value['BP_date']) & pd.notnull(value['first_date']):
        returnval=((dateutil.parser.parse(value['BP_date']) - dateutil.parser.parse(value['first_date'])).days)/365
    return returnval
def bp_time(value):
    returnval=np.nan
    if pd.notnull(value['con_date']) & pd.notnull(value['BP_date']):
        returnval=((dateutil.parser.parse(value['con_date']) - dateutil.parser.parse(value['BP_date'])).days)/365
    return returnval
def con_time(value):
    returnval=np.nan
    if pd.notnull(value['comp_date']) & pd.notnull(value['con_date']):
        returnval=((dateutil.parser.parse(value['comp_date']) - dateutil.parser.parse(value['con_date'])).days)/365
    return returnval
    
nbdf['permit_time']=nbdf.apply(permit_time, axis=1)
nbdf['bp_time']=nbdf.apply(bp_time, axis=1)
nbdf['con_time']=nbdf.apply(con_time, axis=1)

In [12]:
nbdf['years_per_unit']=nbdf['project_time_years']/nbdf['units']
nbdf['ptime_per_unit']=nbdf['permit_time']/nbdf['units']

In [13]:
#Next, dissolve by neighborhoods to get sum of units
nbdf = nbdf[['nhood', 'geometry', 'years_per_unit', 'ptime_per_unit']]
nbdf_med = nbdf.dissolve(by=['nhood'], aggfunc='median')
nbdf_med.head()

Unnamed: 0_level_0,geometry,years_per_unit,ptime_per_unit
nhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bayview Hunters Point,POLYGON ((-122.3815777424142 37.75307043091241...,0.868493,0.59726
Bernal Heights,"POLYGON ((-122.403612999828 37.74933700015653,...",2.90411,1.716438
Castro/Upper Market,POLYGON ((-122.4265550005568 37.76948499984702...,2.928767,0.912329
Chinatown,POLYGON ((-122.4062259995664 37.79755900029376...,3.443836,0.364384
Excelsior,POLYGON ((-122.4239820002333 37.73155199975518...,1.615068,0.632877


In [14]:
output2="../../Output/"
nbdf_med['nbhood']=nbdf_med.index
nbdf_med.to_file(driver='ESRI Shapefile',filename=output2+"nbds.shp")

In [15]:
nbdf_med['nbhood']=nbdf_med.index
nbdf_med.head()

Unnamed: 0_level_0,geometry,years_per_unit,ptime_per_unit,nbhood
nhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bayview Hunters Point,POLYGON ((-122.3815777424142 37.75307043091241...,0.868493,0.59726,Bayview Hunters Point
Bernal Heights,"POLYGON ((-122.403612999828 37.74933700015653,...",2.90411,1.716438,Bernal Heights
Castro/Upper Market,POLYGON ((-122.4265550005568 37.76948499984702...,2.928767,0.912329,Castro/Upper Market
Chinatown,POLYGON ((-122.4062259995664 37.79755900029376...,3.443836,0.364384,Chinatown
Excelsior,POLYGON ((-122.4239820002333 37.73155199975518...,1.615068,0.632877,Excelsior
