In [9]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx

In [77]:
sf_sites = pd.read_csv('./clean_data/sf_sites.csv')
sf_new_construction_all = pd.read_csv('./clean_data/sf_all_construction.csv')
sf_new_construction_post_2015 = pd.read_csv('./clean_data/sf_construction_post_2015.csv')

In [78]:
date_cols = [c for c in sf_new_construction_all.columns if 'Date' in c]
sf_new_construction_all[date_cols] = sf_new_construction_all[date_cols].apply(pd.to_datetime)
sf_new_construction_post_2015[date_cols] = sf_new_construction_post_2015[date_cols].apply(pd.to_datetime)

## Make sure the `relcapcty` column is what we want:

In [79]:
test_df = pd.DataFrame({
    'maybe_density': sf_sites['allowden'] * sf_sites['locacres'],
    'capacity': sf_sites['relcapcty'].astype(float)
}).dropna()
test_df['maybe_density_int'] = test_df['maybe_density'].round(1)
test_df['capacity_int'] = test_df['capacity'].round(1)

In [80]:
(test_df['maybe_density_int'] == test_df['capacity_int']).mean()

1.0

## Now let's do some stats:

In [81]:
sf_sites['apn'].isin(sf_new_construction_all['apn']).mean()

0.08379343942838584

In [82]:
sf_sites['apn'].isin(sf_new_construction_post_2015['apn']).mean()

0.044982137057486195

In [83]:
sf_new_construction_all['apn'].isin(sf_sites['apn']).mean()

0.09545454545454546

In [84]:
sf_new_construction_post_2015['apn'].isin(sf_sites['apn']).mean()

0.10060734090308952

## Number of units in HE:

In [85]:
sf_sites['relcapcty'].sum()

47209.0

## Overall number of units completed:

In [86]:
n_filed_ever = sf_new_construction_all[sf_new_construction_all['Filed Date'].notnull()]['Proposed Units'].sum()
n_filed_ever

172275.0

In [87]:
n_issued_ever = sf_new_construction_all[sf_new_construction_all['Issued Date'].notnull()]['Proposed Units'].sum()
n_issued_ever

123073.0

In [88]:
n_started_ever = sf_new_construction_all[
    sf_new_construction_all['First Construction Document Date'].notnull()
]['Proposed Units'].sum()
n_started_ever

31116.0

In [89]:
n_completed_ever = sf_new_construction_all[
    sf_new_construction_all['Completed Date'].notnull()
]['Proposed Units'].sum()
n_completed_ever

66284.0

In [90]:
n_filed_post_15 = sf_new_construction_post_2015[
    sf_new_construction_post_2015['Filed Date'].notnull()
]['Proposed Units'].sum()
n_filed_post_15

95144.0

In [91]:
n_issued_post_15 = sf_new_construction_post_2015[
    sf_new_construction_post_2015['Issued Date'].notnull()
]['Proposed Units'].sum()
n_issued_post_15

95144.0

In [92]:
n_started_post_15 = sf_new_construction_post_2015[
    sf_new_construction_post_2015['First Construction Document Date'].notnull()
]['Proposed Units'].sum()
n_started_post_15

25660.0

In [93]:
n_completed_post_15 = sf_new_construction_post_2015[
    sf_new_construction_post_2015['Completed Date'].notnull()
]['Proposed Units'].sum()
n_completed_post_15

44172.0

In [94]:
# started before 2015, completed after 2015
sf_new_construction_all[
    sf_new_construction_all['Completed Date'] >= '2015-01-01'
]['Proposed Units'].sum()

57865.0

In [95]:
n_completed_ever / n_issued_ever

0.5385746670675128

In [96]:
n_completed_ever / 5

13256.8

In [97]:
sf_new_construction_all[
    (sf_new_construction_all['Completed Date'] >= '2015-01-01')
    & (sf_new_construction_all['Issued Date'] < '2015-01-01')
]

Unnamed: 0,Permit Number,Permit Type,Permit Type Definition,Permit Creation Date,Block,Lot,Street Number,Street Number Suffix,Street Name,Street Suffix,...,Current Police Districts,Current Supervisor Districts,Analysis Neighborhoods,DELETE - Zip Codes,DELETE - Fire Prevention Districts,DELETE - Supervisor Districts,DELETE - Current Police Districts,DELETE - Supervisorial_Districts_Waterline_data_from_7pkg_wer3,apn,new_units
38,201406138386,1,new construction,2014-06-13,0281,003,832,,Sutter,St,...,6.0,3.0,21.0,28858.0,5.0,10.0,1.0,2.0,0281/003,20.0
40,201308204717,1,new construction,2013-08-20,4624,031,142,,West Point,Rd,...,2.0,9.0,1.0,58.0,10.0,8.0,3.0,6.0,4624/031,50.0
71,201307051190,1,new construction,2013-07-05,0811,031,101,,Polk,St,...,4.0,10.0,36.0,28852.0,7.0,9.0,6.0,3.0,0811/031,162.0
72,201404042522,1,new construction,2014-04-04,8711,031,588,,Mission Bay Blvd North,Bl,...,1.0,10.0,4.0,310.0,14.0,9.0,3.0,3.0,8711/031,200.0
85,201301319232,1,new construction,2013-01-31,3509,043,104,,09th,St,...,1.0,10.0,34.0,28853.0,8.0,9.0,2.0,3.0,3509/043,160.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7148,201407080672,8,otc alterations permit,2014-07-08,4146,019,2830,,22nd,St,...,3.0,2.0,20.0,28859.0,2.0,7.0,7.0,7.0,4146/019,1.0
7150,201407070539,8,otc alterations permit,2014-07-07,6349,021,717,V,Paris,St,...,9.0,1.0,7.0,28861.0,9.0,6.0,4.0,8.0,6349/021,1.0
7151,201407030495,8,otc alterations permit,2014-07-03,4591C,045,451,,Hudson,Av,...,2.0,9.0,1.0,58.0,10.0,8.0,3.0,6.0,4591C/045,9.0
7152,201407030493,8,otc alterations permit,2014-07-03,4591C,068,421,,Hudson,Av,...,2.0,9.0,1.0,58.0,10.0,8.0,3.0,6.0,4591C/068,9.0


# By capacity

In [98]:
sf_sites.columns

Index(['objectid', 'apn', 'locapn', 'genplan', 'zoning', 'gacres', 'locacres',
       'allowden', 'allowhigh', 'relcapcty', 'sitetype', 'pdaparcel',
       'existuse', 'Shape__Are', 'Shape__Len', 'geometry'],
      dtype='object')

In [99]:
sf_sites.shape

(6158, 16)

In [100]:
sf_sites['relcapcty'].value_counts()[:10]

1.0     1103
2.0      959
0.0      904
3.0      687
4.0      463
5.0      291
6.0      227
7.0      194
8.0      139
9.0      117
10.0     102
Name: relcapcty, dtype: int64

In [101]:
(sf_sites['relcapcty'] > 10).sum()

972

In [102]:
for n in range(1, 11):
    print(n)
    print(
        sf_sites[
            sf_sites['relcapcty'] == n
        ]['apn'].isin(sf_new_construction_all['apn']).mean()
    )
    print()

1
0.11332728921124206

2
0.08446298227320125

3
0.056768558951965066

4
0.10583153347732181

5
0.09278350515463918

6
0.08370044052863436

7
0.10824742268041238

8
0.09352517985611511

9
0.10256410256410256

10
0.0392156862745098



In [103]:
sf_sites[
    sf_sites['relcapcty'] > 10
]['apn'].isin(sf_new_construction_all['apn']).mean()

0.11625514403292181

# Merging the datasets

In [104]:
merged_df = sf_sites.merge(
    sf_new_construction_all,
    on='apn',
    how='left',
    indicator=True
)

In [105]:
merged_df['Lot Size Sq Ft'] = merged_df['locacres'] * 43560.

In [106]:
increase_df = merged_df[
    merged_df['Proposed Units'] > merged_df['relcapcty']
]

In [107]:
pd.set_option('max_rows', 100)

In [108]:
increase_df[
    increase_df['relcapcty'] == 0
]

Unnamed: 0,objectid,apn,locapn,genplan,zoning,gacres,locacres,allowden,allowhigh,relcapcty,...,Current Supervisor Districts,Analysis Neighborhoods,DELETE - Zip Codes,DELETE - Fire Prevention Districts,DELETE - Supervisor Districts,DELETE - Current Police Districts,DELETE - Supervisorial_Districts_Waterline_data_from_7pkg_wer3,new_units,_merge,Lot Size Sq Ft
15,66781,0024/020,0024/020,Northeast,C-2,0.036,0.036468,0.0,0,0.0,...,6.0,32.0,28858.0,5.0,1.0,1.0,11.0,1.0,both,1588.550996
783,70115,0106/002,0106/002,Northeast,C-2,0.061,0.061228,0.0,0,0.0,...,3.0,23.0,308.0,3.0,10.0,1.0,2.0,9.0,both,2667.093866
1597,70919,3530/048,3530/048,Mission,PDR-1-G,0.14,0.140383,0.0,0,0.0,...,2.0,20.0,28853.0,8.0,7.0,7.0,7.0,1.0,both,6115.074681
1760,71104,3552/012,3552/012,Mission,PDR-1-G,0.666,0.666168,0.0,0,0.0,...,2.0,20.0,28853.0,8.0,7.0,7.0,7.0,143.0,both,29018.271842
2540,71866,5463/001G,5463/001G,Other S Bayshore,RH-1,0.034,0.033752,0.0,0,0.0,...,9.0,1.0,58.0,10.0,8.0,3.0,6.0,1.0,both,1470.234583
3333,72643,1269/167,1269/167,Buena Vista,RH-2,0.033,0.03264,0.0,0,0.0,...,11.0,3.0,29492.0,15.0,11.0,8.0,10.0,1.0,both,1421.818049
3664,72967,2627/005,2627/005,Buena Vista,RH-2,0.033,0.033129,0.0,0,0.0,...,5.0,5.0,28862.0,15.0,5.0,8.0,1.0,2.0,both,1443.086027
3665,72967,2627/005,2627/005,Buena Vista,RH-2,0.033,0.033129,0.0,0,0.0,...,5.0,5.0,28862.0,15.0,5.0,8.0,1.0,2.0,both,1443.086027
3889,73183,5577/006,5577/006,Bernal Heights,RH-1,0.032,0.032391,0.0,0,0.0,...,2.0,2.0,28859.0,2.0,7.0,4.0,7.0,1.0,both,1410.957064
3890,73183,5577/006,5577/006,Bernal Heights,RH-1,0.032,0.032391,0.0,0,0.0,...,2.0,2.0,28859.0,2.0,7.0,4.0,7.0,2.0,both,1410.957064


In [109]:
increase_df['relcapcty'].value_counts().sort_index()

0.0      15
1.0      61
2.0      25
3.0      30
4.0      22
5.0      14
6.0      13
7.0      17
8.0       4
9.0      11
10.0      2
11.0      7
12.0      5
13.0      3
14.0      5
15.0      3
16.0      2
17.0      2
18.0      3
19.0      2
20.0      4
21.0      4
22.0      3
23.0      4
24.0     15
25.0      1
26.0      4
27.0      4
29.0      2
33.0      1
34.0      1
35.0      1
36.0      4
38.0      2
39.0      2
40.0      1
42.0      1
43.0      2
45.0      3
46.0      2
47.0      1
53.0      1
56.0      2
57.0      2
58.0      1
59.0      2
65.0      3
66.0      1
67.0      4
69.0      6
72.0      1
73.0      2
75.0      3
77.0      1
78.0      1
83.0      3
88.0      3
97.0      1
108.0     3
118.0     1
122.0     3
141.0     1
154.0     1
Name: relcapcty, dtype: int64