In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx

In [2]:
sf_sites = pd.read_csv('./data/clean_data/sf_sites.csv')
sf_new_construction_all = pd.read_csv('./data/clean_data/sf_all_construction.csv')
sf_new_construction_post_2015 = pd.read_csv('./data/clean_data/sf_construction_post_2015.csv')

In [3]:
date_cols = [c for c in sf_new_construction_all.columns if 'Date' in c]
sf_new_construction_all[date_cols] = sf_new_construction_all[date_cols].apply(pd.to_datetime)
sf_new_construction_post_2015[date_cols] = sf_new_construction_post_2015[date_cols].apply(pd.to_datetime)

## Make sure the `relcapcty` column is what we want:

In [4]:
test_df = pd.DataFrame({
    'maybe_density': sf_sites['allowden'] * sf_sites['locacres'],
    'capacity': sf_sites['relcapcty'].astype(float)
}).dropna()
test_df['maybe_density_int'] = test_df['maybe_density'].round(1)
test_df['capacity_int'] = test_df['capacity'].round(1)

In [5]:
(test_df['maybe_density_int'] == test_df['capacity_int']).mean()

1.0

## Now let's do some stats:

In [6]:
sf_sites['apn'].isin(sf_new_construction_all['apn']).mean()

0.08379343942838584

In [7]:
sf_sites['apn'].isin(sf_new_construction_post_2015['apn']).mean()

0.044982137057486195

In [8]:
sf_new_construction_all['apn'].isin(sf_sites['apn']).mean()

0.09545454545454546

In [9]:
sf_new_construction_post_2015['apn'].isin(sf_sites['apn']).mean()

0.10060734090308952

## Number of units in HE:

In [10]:
sf_sites['relcapcty'].sum()

47209.0

## Overall number of units completed:

In [11]:
n_filed_ever = sf_new_construction_all[sf_new_construction_all['Filed Date'].notnull()]['Proposed Units'].sum()
n_filed_ever

172275.0

In [12]:
n_issued_ever = sf_new_construction_all[sf_new_construction_all['Issued Date'].notnull()]['Proposed Units'].sum()
n_issued_ever

123073.0

In [13]:
n_started_ever = sf_new_construction_all[
    sf_new_construction_all['First Construction Document Date'].notnull()
]['Proposed Units'].sum()
n_started_ever

31116.0

In [14]:
n_completed_ever = sf_new_construction_all[
    sf_new_construction_all['Completed Date'].notnull()
]['Proposed Units'].sum()
n_completed_ever

66284.0

In [15]:
n_filed_post_15 = sf_new_construction_post_2015[
    sf_new_construction_post_2015['Filed Date'].notnull()
]['Proposed Units'].sum()
n_filed_post_15

95144.0

In [16]:
n_issued_post_15 = sf_new_construction_post_2015[
    sf_new_construction_post_2015['Issued Date'].notnull()
]['Proposed Units'].sum()
n_issued_post_15

95144.0

In [17]:
n_started_post_15 = sf_new_construction_post_2015[
    sf_new_construction_post_2015['First Construction Document Date'].notnull()
]['Proposed Units'].sum()
n_started_post_15

25660.0

In [18]:
n_completed_post_15 = sf_new_construction_post_2015[
    sf_new_construction_post_2015['Completed Date'].notnull()
]['Proposed Units'].sum()
n_completed_post_15

44172.0

In [19]:
# started before 2015, completed after 2015
sf_new_construction_all[
    sf_new_construction_all['Completed Date'] >= '2015-01-01'
]['Proposed Units'].sum()

57865.0

In [20]:
n_completed_ever / n_issued_ever

0.5385746670675128

In [21]:
n_completed_ever / 5

13256.8

In [22]:
sf_new_construction_all[
    (sf_new_construction_all['Completed Date'] >= '2015-01-01')
    & (sf_new_construction_all['Issued Date'] < '2015-01-01')
]

Unnamed: 0,Permit Number,Permit Type,Permit Type Definition,Permit Creation Date,Block,Lot,Street Number,Street Number Suffix,Street Name,Street Suffix,...,Current Police Districts,Current Supervisor Districts,Analysis Neighborhoods,DELETE - Zip Codes,DELETE - Fire Prevention Districts,DELETE - Supervisor Districts,DELETE - Current Police Districts,DELETE - Supervisorial_Districts_Waterline_data_from_7pkg_wer3,apn,new_units
38,201406138386,1,new construction,2014-06-13,0281,003,832,,Sutter,St,...,6.0,3.0,21.0,28858.0,5.0,10.0,1.0,2.0,0281/003,20.0
40,201308204717,1,new construction,2013-08-20,4624,031,142,,West Point,Rd,...,2.0,9.0,1.0,58.0,10.0,8.0,3.0,6.0,4624/031,50.0
71,201307051190,1,new construction,2013-07-05,0811,031,101,,Polk,St,...,4.0,10.0,36.0,28852.0,7.0,9.0,6.0,3.0,0811/031,162.0
72,201404042522,1,new construction,2014-04-04,8711,031,588,,Mission Bay Blvd North,Bl,...,1.0,10.0,4.0,310.0,14.0,9.0,3.0,3.0,8711/031,200.0
85,201301319232,1,new construction,2013-01-31,3509,043,104,,09th,St,...,1.0,10.0,34.0,28853.0,8.0,9.0,2.0,3.0,3509/043,160.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7148,201407080672,8,otc alterations permit,2014-07-08,4146,019,2830,,22nd,St,...,3.0,2.0,20.0,28859.0,2.0,7.0,7.0,7.0,4146/019,1.0
7150,201407070539,8,otc alterations permit,2014-07-07,6349,021,717,V,Paris,St,...,9.0,1.0,7.0,28861.0,9.0,6.0,4.0,8.0,6349/021,1.0
7151,201407030495,8,otc alterations permit,2014-07-03,4591C,045,451,,Hudson,Av,...,2.0,9.0,1.0,58.0,10.0,8.0,3.0,6.0,4591C/045,9.0
7152,201407030493,8,otc alterations permit,2014-07-03,4591C,068,421,,Hudson,Av,...,2.0,9.0,1.0,58.0,10.0,8.0,3.0,6.0,4591C/068,9.0


# By capacity

In [23]:
sf_sites.columns

Index(['objectid', 'apn', 'locapn', 'genplan', 'zoning', 'gacres', 'locacres',
       'allowden', 'allowhigh', 'relcapcty', 'sitetype', 'pdaparcel',
       'existuse', 'Shape__Are', 'Shape__Len', 'geometry'],
      dtype='object')

In [24]:
sf_sites.shape

(6158, 16)

In [25]:
sf_sites['relcapcty'].value_counts()[:10]

1.0     1103
2.0      959
0.0      904
3.0      687
4.0      463
5.0      291
6.0      227
7.0      194
8.0      139
9.0      117
10.0     102
Name: relcapcty, dtype: int64

In [26]:
(sf_sites['relcapcty'] > 10).sum()

972

In [27]:
for n in range(1, 11):
    print(n)
    print(
        sf_sites[
            sf_sites['relcapcty'] == n
        ]['apn'].isin(sf_new_construction_all['apn']).mean()
    )
    print()

1
0.11332728921124206

2
0.08446298227320125

3
0.056768558951965066

4
0.10583153347732181

5
0.09278350515463918

6
0.08370044052863436

7
0.10824742268041238

8
0.09352517985611511

9
0.10256410256410256

10
0.0392156862745098



In [28]:
sf_sites[
    sf_sites['relcapcty'] > 10
]['apn'].isin(sf_new_construction_all['apn']).mean()

0.11625514403292181

# Merging the datasets

In [29]:
merged_df = sf_sites.merge(
    sf_new_construction_all,
    on='apn',
    how='left',
    indicator=True
)

In [37]:
merged_df['zoning_use'] = merged_df['zoning'].str.slice(0, 1)

In [38]:
merged_df['zoning_use'].value_counts()

N    2332
R    2238
P     855
M     370
C     282
U     212
S      64
T       4
Name: zoning_use, dtype: int64

In [57]:
merged_df['increase'] = merged_df['new_units'] > merged_df['relcapcty']

In [64]:
merged_df['sq_feet'] = merged_df['locacres'] * 43560
merged_df['lot_equivalents'] = merged_df['sq_feet'] / 3000

In [58]:
merged_df.groupby('zoning_use').apply(lambda g: (g['_merge'] == 'both').mean())

zoning_use
C    0.159574
M    0.110811
N    0.090909
P    0.008187
R    0.155496
S    0.015625
T    0.500000
U    0.278302
dtype: float64

In [59]:
merged_df.groupby('zoning_use').apply(lambda g: g['increase'].sum() / (g['_merge'] == 'both').sum())

zoning_use
C    0.822222
M    0.878049
N    0.433962
P    0.714286
R    0.247126
S    1.000000
T    0.000000
U    0.864407
dtype: float64

In [63]:
merged_df.groupby('zoning_use').apply(lambda g: (g['_merge'] == 'both').sum())

zoning_use
C     45
M     41
N    212
P      7
R    348
S      1
T      2
U     59
dtype: int64

In [50]:
merged_df.columns

Index(['objectid', 'apn', 'locapn', 'genplan', 'zoning', 'gacres', 'locacres',
       'allowden', 'allowhigh', 'relcapcty', 'sitetype', 'pdaparcel',
       'existuse', 'Shape__Are', 'Shape__Len', 'geometry', 'Permit Number',
       'Permit Type', 'Permit Type Definition', 'Permit Creation Date',
       'Block', 'Lot', 'Street Number', 'Street Number Suffix', 'Street Name',
       'Street Suffix', 'Unit', 'Unit Suffix', 'Description', 'Current Status',
       'Current Status Date', 'Filed Date', 'Issued Date', 'Completed Date',
       'First Construction Document Date', 'Structural Notification',
       'Number of Existing Stories', 'Number of Proposed Stories',
       'Voluntary Soft-Story Retrofit', 'Fire Only Permit',
       'Permit Expiration Date', 'Estimated Cost', 'Revised Cost',
       'Existing Use', 'Existing Units', 'Proposed Use', 'Proposed Units',
       'Plansets', 'TIDF Compliance', 'Existing Construction Type',
       'Existing Construction Type Description', 'Proposed C

In [65]:
merged_df[
    (merged_df['zoning_use'] == 'C')
    & (merged_df['_merge'] == 'both')
][['zoning', 'locacres', 'sq_feet', 'lot_equivalents', 'Street Number', 'Street Name', 'genplan', 'relcapcty', 'new_units', 'Proposed Units']]

Unnamed: 0,zoning,locacres,sq_feet,lot_equivalents,Street Number,Street Name,genplan,relcapcty,new_units,Proposed Units
15,C-2,0.036468,1588.550996,0.529517,1327.0,Columbus,Northeast,0.0,1.0,1.0
18,C-2,0.075416,3285.10081,1.095034,400.0,Bay,Northeast,1.0,13.0,13.0
139,C-3-G,0.125115,5450.01392,1.816671,240.0,Van Ness,Downtown,3.0,109.0,109.0
141,C-3-G,0.142183,6193.474875,2.064492,101.0,Hayes,Market Octavia,25.0,431.0,431.0
142,C-3-G,0.073063,3182.635341,1.060878,131.0,Hayes,Market Octavia,12.0,431.0,431.0
143,C-3-G,0.098014,4269.486785,1.423162,125.0,Hayes,Market Octavia,17.0,431.0,431.0
232,C-3-O(SD),0.21472,9353.22055,3.11774,526.0,Mission,TB Combo,16.0,156.0,156.0
502,C-2,0.133433,5812.352731,1.937451,240.0,Pacific,Northeast,1.0,33.0,33.0
519,CRNC,0.035325,1538.775453,0.512925,823.0,Pacific,Northeast,5.0,1.0,1.0
520,CRNC,0.035325,1538.775453,0.512925,821.0,Pacific,Northeast,5.0,1.0,1.0


In [80]:
# from https://sf-planning.org/sites/default/files/FileCenter/Documents/5358-Residential%20Standards%20Summary%20Table.pdf
max_densities_sq_ft_per_unit = {
    'RH-1': 3000,
    'RH-1(D)': 3000,
    'RH-1(S)': 3000,
    'RH-2': 1500,
    'RH-3': 1000,
    'RM-1': 800,
    'RM-2': 600,
    'RM-3': 400,
    'RM-4': 200,
    'RC-3': 400,
    'RC-4': 200,
    'RTO': 600,
}

merged_df['max_density'] = merged_df['sq_feet'] / merged_df['zoning'].map(max_densities_sq_ft_per_unit)

In [98]:
interesting_columns = [
    'zoning', 'locacres', 'sq_feet', 'lot_equivalents', 
    'Street Number', 'Street Name', 'genplan', 'max_density', 'relcapcty', 'new_units', 'Proposed Units'
]

In [92]:
merged_df.loc[46]['Description']

'revision to pa# 2013/12/03/3256-s (site permit). exterior building facade revisions to document exterior building material changes & balcony locations.'

In [94]:
merged_df.loc[45]['Description']

'to erect 12 stories, 2 basement, 103 residential with retail and parking.'

In [93]:
merged_df[
    (merged_df['zoning_use'] == 'R')
    & (merged_df['_merge'] == 'both')
    & (merged_df['new_units'] > merged_df['relcapcty'])
    & (merged_df['Street Name'].str.contains('Pine'))
][interesting_columns]

Unnamed: 0,zoning,locacres,sq_feet,lot_equivalents,max_density,Street Number,Street Name,genplan,relcapcty,new_units,Proposed Units
45,RC-4,0.134077,5840.400346,1.9468,29.202002,1545.0,Pine,Northeast,29.0,103.0,103.0
46,RC-4,0.134077,5840.400346,1.9468,29.202002,1545.0,Pine,Northeast,29.0,100.0,100.0


In [99]:
merged_df[
    (merged_df['zoning_use'] == 'R')
    & (merged_df['_merge'] == 'both')
    & (merged_df['new_units'] > merged_df['relcapcty'])
][interesting_columns].sample(30)

Unnamed: 0,zoning,locacres,sq_feet,lot_equivalents,Street Number,Street Name,genplan,max_density,relcapcty,new_units,Proposed Units
523,RC-4,0.181818,7919.996178,2.639999,555.0,Golden Gate,Downtown,39.599981,39.0,55.0,55.0
1042,RED,0.044738,1948.802676,0.649601,5.0,Hallam,WSoMa,,4.0,6.0,6.0
442,RH-3,0.168733,7349.998597,2.45,2675.0,Folsom,Mission,7.349999,7.0,117.0,117.0
4762,RC-3,4.490346,195599.478719,65.199826,250.0,Executive Park,Executive Park,488.998697,67.0,265.0,265.0
4772,RC-3,3.046416,132701.902594,44.233968,150.0,Executive Park,Executive Park,331.754756,45.0,305.0,305.0
785,RH-3,0.068872,3000.061056,1.00002,204.0,Union,Northeast,3.000061,3.0,9.0,9.0
1300,RM-1,0.07107,3095.818148,1.031939,1050.0,Le Conte,"BVHP Area A,B",3.869773,2.0,3.0,4.0
3890,RH-1,0.032391,1410.957064,0.470319,21.0,Joy,Bernal Heights,0.470319,0.0,2.0,2.0
4759,RC-3,4.664773,203197.502948,67.732501,5.0,Thomas Mellon,Executive Park,507.993757,69.0,174.0,174.0
5981,RH-2,0.0707,3079.671059,1.026557,456.0,27th,Central,2.053114,1.0,2.0,2.0


In [85]:
merged_df[
    (merged_df['zoning_use'] == 'R')
    & (merged_df['_merge'] == 'both')
][interesting_columns].sample(30)

Unnamed: 0,zoning,locacres,sq_feet,lot_equivalents,max_density,Street Number,Street Name,genplan,relcapcty,new_units,Proposed Units
1919,RH-1,0.066106,2879.561908,0.959854,0.959854,313.0,Cumberland,Central,1.0,1.0,1.0
2279,RH-2,0.057392,2500.008682,0.833336,1.666672,715.0,Kirkham,Inner Sunset,2.0,2.0,2.0
3636,RH-1(D),0.104146,4536.579786,1.512193,1.512193,156.0,Kensington,Inner Sunset,1.0,1.0,1.0
4654,RH-1(S),0.057392,2500.005338,0.833335,0.833335,909.0,Innes,Other S Bayshore,1.0,2.0,2.0
4184,RH-1,0.06357,2769.108243,0.923036,0.923036,1014.0,Gilman,"BVHP Area A,B",1.0,1.0,1.0
2474,RM-3,0.090651,3948.776887,1.316259,9.871942,5024.0,Fulton,Richmond,7.0,1.0,3.0
5003,RH-1,0.039783,1732.956904,0.577652,0.577652,332.0,Bradford,Bernal Heights,1.0,1.0,1.0
3212,RM-1,0.06887,2999.996782,0.999999,3.749996,473.0,24th,Richmond,2.0,1.0,2.0
6352,RH-1,0.042242,1840.043498,0.613348,0.613348,95.0,Santa Cruz,"South Central, Other",1.0,2.0,2.0
4222,RH-1,0.056998,2482.814,0.827605,0.827605,20.0,Crane,Other S Bayshore,1.0,1.0,1.0


In [82]:
merged_df[
    (merged_df['zoning_use'] == 'R')
    & (merged_df['_merge'] == 'both')
].eval('relcapcty > max_density').mean()

0.3563218390804598

In [86]:
merged_df[
    (merged_df['zoning_use'] == 'R')
    & (merged_df['_merge'] == 'both')
].query('relcapcty > max_density')[interesting_columns]

Unnamed: 0,zoning,locacres,sq_feet,lot_equivalents,max_density,Street Number,Street Name,genplan,relcapcty,new_units,Proposed Units
98,RTO,0.066908,2914.500867,0.971500,4.857501,1785.0,15th,Mission,6.0,1.0,9.0
99,RTO,0.066908,2914.500867,0.971500,4.857501,1785.0,15th,Mission,6.0,9.0,9.0
261,RH-2,0.057475,2503.618907,0.834540,1.669079,939.0,Kansas,Showpl/Potrero,2.0,1.0,1.0
262,RH-2,0.057478,2503.752181,0.834584,1.669168,935.0,Kansas,Showpl/Potrero,2.0,2.0,2.0
263,RH-2,0.057478,2503.752181,0.834584,1.669168,937.0,Kansas,Showpl/Potrero,2.0,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...
6190,RH-2,0.065082,2834.984542,0.944995,1.889990,3287.0,Mission,Bernal Heights,4.0,2.0,2.0
6251,RH-1,0.916387,39917.805659,13.305935,13.305935,65.0,Ocean,"South Central, Other",66.0,193.0,193.0
6313,RH-1,0.057709,2513.824860,0.837942,0.837942,119.0,Vernon,"Ingleside, Other",1.0,1.0,1.0
6328,RH-1,0.057955,2524.518518,0.841506,0.841506,62.0,Vernon,"Ingleside, Other",1.0,1.0,1.0


In [30]:
merged_df['Lot Size Sq Ft'] = merged_df['locacres'] * 43560.

In [31]:
increase_df = merged_df[
    merged_df['Proposed Units'] > merged_df['relcapcty']
]

In [32]:
pd.set_option('max_rows', 100)

In [33]:
increase_df[
    increase_df['relcapcty'] == 0
]

Unnamed: 0,objectid,apn,locapn,genplan,zoning,gacres,locacres,allowden,allowhigh,relcapcty,...,Current Supervisor Districts,Analysis Neighborhoods,DELETE - Zip Codes,DELETE - Fire Prevention Districts,DELETE - Supervisor Districts,DELETE - Current Police Districts,DELETE - Supervisorial_Districts_Waterline_data_from_7pkg_wer3,new_units,_merge,Lot Size Sq Ft
15,66781,0024/020,0024/020,Northeast,C-2,0.036,0.036468,0.0,0,0.0,...,6.0,32.0,28858.0,5.0,1.0,1.0,11.0,1.0,both,1588.550996
783,70115,0106/002,0106/002,Northeast,C-2,0.061,0.061228,0.0,0,0.0,...,3.0,23.0,308.0,3.0,10.0,1.0,2.0,9.0,both,2667.093866
1597,70919,3530/048,3530/048,Mission,PDR-1-G,0.14,0.140383,0.0,0,0.0,...,2.0,20.0,28853.0,8.0,7.0,7.0,7.0,1.0,both,6115.074681
1760,71104,3552/012,3552/012,Mission,PDR-1-G,0.666,0.666168,0.0,0,0.0,...,2.0,20.0,28853.0,8.0,7.0,7.0,7.0,143.0,both,29018.271842
2540,71866,5463/001G,5463/001G,Other S Bayshore,RH-1,0.034,0.033752,0.0,0,0.0,...,9.0,1.0,58.0,10.0,8.0,3.0,6.0,1.0,both,1470.234583
3333,72643,1269/167,1269/167,Buena Vista,RH-2,0.033,0.03264,0.0,0,0.0,...,11.0,3.0,29492.0,15.0,11.0,8.0,10.0,1.0,both,1421.818049
3664,72967,2627/005,2627/005,Buena Vista,RH-2,0.033,0.033129,0.0,0,0.0,...,5.0,5.0,28862.0,15.0,5.0,8.0,1.0,2.0,both,1443.086027
3665,72967,2627/005,2627/005,Buena Vista,RH-2,0.033,0.033129,0.0,0,0.0,...,5.0,5.0,28862.0,15.0,5.0,8.0,1.0,2.0,both,1443.086027
3889,73183,5577/006,5577/006,Bernal Heights,RH-1,0.032,0.032391,0.0,0,0.0,...,2.0,2.0,28859.0,2.0,7.0,4.0,7.0,1.0,both,1410.957064
3890,73183,5577/006,5577/006,Bernal Heights,RH-1,0.032,0.032391,0.0,0,0.0,...,2.0,2.0,28859.0,2.0,7.0,4.0,7.0,2.0,both,1410.957064
