In [3]:
import numpy as np
import pandas as pd
import geopandas as gpd

## Load sites data, and clean data

In [4]:
sites_df = gpd.read_file(
    './data/raw_data/housing_sites/xn--Bay_Area_Housing_Opportunity_Sites_Inventory__20072023_-it38a.shp'
)

In [5]:
sj_sites = sites_df.query('jurisdict == "San Jose" and rhnacyc == "RHNA5"').copy()
sj_sites.fillna(value=np.nan, inplace=True)
sj_sites['allowden'] = sj_sites['allowden'].astype(float)
sj_sites['relcapcty'] = sj_sites['relcapcty'].astype(float)

In [6]:
is_constant = ((sj_sites == sj_sites.iloc[0]).all())
constant_cols = is_constant[is_constant].index.values
constant_cols

array(['rhnacyc', 'rhnayrs', 'county', 'jurisdict', 'currunits',
       'allowlow', 'num_vl', 'num_l', 'num_m', 'num_am'], dtype=object)

In [7]:
sj_sites.drop(constant_cols, axis=1, inplace=True)

In [8]:
sj_sites.dropna(how='all', axis=1, inplace=True)

In [9]:
sj_sites.shape

(599, 18)

## Load building permits data

In [10]:
# This data was pulled on 2021-03-08
expired_permits = gpd.read_file('data/raw_data/san_jose/sj_expired_building_permits.shp')
active_permits = gpd.read_file('data/raw_data/san_jose/sj_active_building_permits.shp')

In [11]:
expired_permits.dropna(how='all', axis='columns', inplace=True)
active_permits.dropna(how='all', axis='columns', inplace=True)

In [12]:
expired_permits['ISSUEDATE'] = pd.to_datetime(expired_permits['ISSUEDATE'])
expired_permits['ISSUEDATEU'] = pd.to_datetime(expired_permits['ISSUEDATEU'])
expired_permits['LASTUPDATE'] = pd.to_datetime(expired_permits['LASTUPDATE'])

In [13]:
expired_permits.groupby(pd.Grouper(key='ISSUEDATE', freq='10Y')).size()

ISSUEDATE
1938-12-31        1
1948-12-31        1
1958-12-31        0
1968-12-31        1
1978-12-31       11
1988-12-31       28
1998-12-31       47
2008-12-31    12541
2018-12-31    33210
2028-12-31       27
Freq: 10A-DEC, dtype: int64

In [14]:
active_permits['ISSUEDATE'] = pd.to_datetime(active_permits['ISSUEDATE'])
active_permits['ISSUEDATEU'] = pd.to_datetime(active_permits['ISSUEDATEU'])
active_permits['LASTUPDATE'] = pd.to_datetime(active_permits['LASTUPDATE'])

In [15]:
active_permits.groupby(pd.Grouper(key='ISSUEDATE', freq='10Y')).size()

ISSUEDATE
1997-12-31        2
2007-12-31     1421
2017-12-31     2901
2027-12-31    17625
Freq: 10A-DEC, dtype: int64

In [16]:
expired_permits.iloc[1]

OBJECTID                                              2.0
APPLICANT                                            NONE
CONTRACTOR                                           None
FACILITYID                                              2
INTID                                                 2.0
ADDRESS        1490  REDMOND AV  , SAN JOSE CA 95120-4858
APN                                              57528005
WORKDESC                            Additions/Alterations
SUBDESC                                     Single-Family
PERMITAPPR                                           None
ISSUEDATE                             1994-03-16 16:00:00
ISSUEDATEU                            1994-03-17 00:00:00
FINALDATE                                            None
FINALDATEU                                           None
DWELLINGUN                                            0.0
PERMITVALU                                            0.0
SQUAREFOOT                                            0.0
FOLDERNUM     

In [17]:
expired_new_construction = expired_permits[
    expired_permits['WORKDESC'] == 'New Construction'
]

In [18]:
expired_new_construction['SUBDESC'].value_counts()

Single-Family                   207
Apt/Condo/Townhouse             184
2nd Unit Added                   75
Retail                           42
Office                           19
Mixed Use                        14
Townhouse                        14
Condo                            14
Undefined                        12
Duplex                           10
Hotel/Motel                      10
School/Daycare                   10
Warehouse/Storage                 7
Open Public Parking Garage        6
Restaurant                        6
Manufactured Home                 5
Industrial Plant                  5
Tilt Up Warehouse                 3
Recreation Building               2
Assembly                          2
Medical/Dental Clinic             2
Church                            2
Service Station                   2
Multiple Use                      2
Manufacturing                     2
Closed Public Parking Garage      1
Apartment                         1
Bank                        

In [19]:
expired_new_construction['DWELLINGUN'].value_counts().sort_index()

0.0      246
1.0      261
2.0        8
3.0        5
4.0       14
5.0       14
6.0       22
7.0       30
8.0       18
9.0       11
10.0       4
11.0       1
12.0       5
13.0       1
14.0       1
17.0       1
22.0       5
40.0       1
45.0       1
48.0       1
80.0       1
86.0       1
109.0      1
113.0      1
117.0      1
148.0      1
165.0      1
205.0      1
243.0      1
271.0      1
330.0      1
357.0      1
403.0      1
Name: DWELLINGUN, dtype: int64

In [20]:
expired_new_construction[
    expired_new_construction['DWELLINGUN'] == 403
]

Unnamed: 0,OBJECTID,APPLICANT,CONTRACTOR,FACILITYID,INTID,ADDRESS,APN,WORKDESC,SUBDESC,PERMITAPPR,...,FINALDATE,FINALDATEU,DWELLINGUN,PERMITVALU,SQUAREFOOT,FOLDERNUM,LASTUPDATE,LASTEDITOR,ENTERPRISE,geometry
34475,34507.0,HANOVER RS CONSTRUCTION LLC,HANOVER RS CONSTRUCTION LLC,34503,34503.0,"415 E TAYLOR ST , SAN JOSE CA 95112-3136",24909001,New Construction,Apt/Condo/Townhouse,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,,403.0,0.0,390695.0,2016-132495-MF,2021-01-09 02:28:09,FME,PLN-PBEX-0000034503,POINT (6157176.189 1954347.642)


In [21]:
active_new_construction = active_permits[
    active_permits['WORKDESC'] == 'New Construction'
]

In [22]:
active_new_construction['SUBDESC'].value_counts()

2nd Unit Added                   541
Single-Family                    457
Apt/Condo/Townhouse              225
Retail                            43
Office                            38
Mixed Use                         30
Warehouse/Storage                 19
Hotel/Motel                       19
Condo                             12
Closed Public Parking Garage       9
Recreation Building                8
Apartment                          7
Restaurant                         5
Duplex                             5
Townhouse                          3
Service Station                    3
Open Public Parking Garage         3
Medical/Dental Clinic              3
Data Center                        2
Undefined                          2
High Rise                          2
School/Daycare                     2
Assembly                           2
Industrial Plant                   2
Address Assignment                 1
Health Club                        1
Commercial/Industrial              1
C

In [23]:
pd.set_option('max_rows', 70)

In [25]:
expired_new_construction['APN'].isin(active_new_construction['APN']).mean()

0.1975867269984917

Okay, so there is some duplication. We can choose the active one over the expired one.

In [26]:
combined_df = pd.concat(
    [
        expired_new_construction.assign(state='expired'),
        active_new_construction.assign(state='active'),
    ], 
    ignore_index=True
).drop_duplicates('APN', keep='last')

In [146]:
combined_df['SUBDESC'].value_counts()

2nd Unit Added                   607
Single-Family                    131
Apt/Condo/Townhouse               79
Retail                            47
Office                            28
Mixed Use                         27
Warehouse/Storage                 18
Hotel/Motel                       15
Duplex                             9
Condo                              9
Townhouse                          7
School/Daycare                     7
Recreation Building                6
Closed Public Parking Garage       6
Industrial Plant                   6
Apartment                          6
Medical/Dental Clinic              5
Restaurant                         5
Undefined                          5
Service Station                    4
Open Public Parking Garage         4
Assembly                           3
Data Center                        2
Manufactured Home                  2
Church                             2
Commercial/Industrial              1
Antenna/Cell Site                  1
A

In [149]:
combined_df[
    combined_df['DWELLINGUN'] > 0
]['SUBDESC'].value_counts()

2nd Unit Added                  607
Single-Family                   119
Apt/Condo/Townhouse              56
Duplex                            8
Townhouse                         6
Condo                             6
Apartment                         5
Manufactured Home                 2
Hotel/Motel                       2
Industrial Plant                  1
Office                            1
Retail                            1
Closed Public Parking Garage      1
Tilt Up Warehouse                 1
Name: SUBDESC, dtype: int64

In [160]:
recent_new_construction_df = combined_df[
    (combined_df['WORKDESC'] == 'New Construction')
    & (combined_df['DWELLINGUN'] > 0)
    & (combined_df['ISSUEDATE'] >= '2015-01-01')
    & ~recent_new_construction_df['SUBDESC'].isin(['2nd Unit Added', 'Single-Family'])  # let's ignore ADUs and SFH rebuilds for now
]

In [161]:
recent_new_construction_df

Unnamed: 0,OBJECTID,APPLICANT,CONTRACTOR,FACILITYID,INTID,ADDRESS,APN,WORKDESC,SUBDESC,PERMITAPPR,...,FINALDATEU,DWELLINGUN,PERMITVALU,SQUAREFOOT,FOLDERNUM,LASTUPDATE,LASTEDITOR,ENTERPRISE,geometry,state
459,25993.0,KB HOME SO BAY INC,KB HOME SO BAY INC,25975,25975.0,"3086 MANUEL ST 1 , SAN JOSE CA 95125-0000",46701008,New Construction,Apt/Condo/Townhouse,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,5.0,0.0,11466.0,2015-013646-MF,2021-01-09 02:27:03,FME,PLN-PBEX-0000025975,POINT (6167628.299 1928795.341),expired
477,27423.0,DIABLO VENTURES INC JOSIAH DENMARK,,27409,27409.0,"646 WILLOW ST , SAN JOSE CA 95125-5714",43403093,New Construction,Manufactured Home,"B-Complete, E-Complete, P-Complete",...,,1.0,0.0,2637.0,2015-015210-RS,2021-01-09 02:27:15,FME,PLN-PBEX-0000027409,POINT (6156854.105 1939466.964),expired
507,30442.0,TNHC HW S J LLC,,30430,30430.0,"966 ARROWLEAF PL , SAN JOSE CA 95131-2431",23703080,New Construction,Apt/Condo/Townhouse,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,7.0,0.0,15030.0,2015-031887-MF,2021-01-09 02:27:38,FME,PLN-PBEX-0000030430,POINT (6155354.261 1964082.169),expired
524,32982.0,LIN CONNIE,,32976,32976.0,"886 ALMADEN AV , SAN JOSE CA 95110-3038",26436054,New Construction,Duplex,"B-Complete, E-Complete, M-Complete, P-Complete",...,,2.0,0.0,5448.0,2016-138942-RS,2021-01-09 02:27:56,FME,PLN-PBEX-0000032976,POINT (6159379.567 1942349.576),expired
539,34507.0,HANOVER RS CONSTRUCTION LLC,HANOVER RS CONSTRUCTION LLC,34503,34503.0,"415 E TAYLOR ST , SAN JOSE CA 95112-3136",24909001,New Construction,Apt/Condo/Townhouse,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,403.0,0.0,390695.0,2016-132495-MF,2021-01-09 02:28:09,FME,PLN-PBEX-0000034503,POINT (6157176.189 1954347.642),expired
549,36191.0,PULTE HOME CORPORATION,PULTE HOME CORPORATION,36202,36202.0,"2002 MAHURON CL , SAN JOSE CA 95133-1702",25404080,New Construction,Apt/Condo/Townhouse,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,13.0,0.0,26339.0,2016-142298-MF,2021-01-09 02:28:23,FME,PLN-PBEX-0000036202,POINT (6165316.583 1959376.410),expired
583,40804.0,PULTE HOME CORPORATION,PULTE HOME CORPORATION,40907,40907.0,"651 BLACKBURY LN A , SAN JOSE CA 95133-0000",25404079,New Construction,Apt/Condo/Townhouse,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,12.0,0.0,26310.0,2017-025823-MF,2021-01-09 02:29:00,FME,PLN-PBEX-0000040907,POINT (6164680.350 1958707.292),expired
625,45548.0,PULTE HOME CORP,,39032,39032.0,"1901 NEVETS LN , SAN JOSE CA 95133-0000",25455006,New Construction,Apt/Condo/Townhouse,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,10.0,0.0,23113.0,2017-031913-MF,2021-01-09 02:28:45,FME,PLN-PBEX-0000039032,POINT (6164688.226 1959336.416),expired
659,45894.0,LAZARI DESIGN EDICK LAZARI,,39978,39978.0,"68 N 33RD ST , SAN JOSE CA 95116-1213",48113051,New Construction,Duplex,"B-Complete, E-Complete, M-Complete, P-Complete",...,,1.0,0.0,3019.0,2017-028720-RS,2021-01-30 02:28:58,FME,PLN-PBEX-0000039978,POINT (6166322.600 1953816.604),expired
932,16954.0,"TJGA, LLC DBA AR CONSTRUCTION","TJGA, LLC DBA AR CONSTRUCTION",19939,19939.0,"1151 RANCHERO WY , SAN JOSE CA",29916016,New Construction,Apt/Condo/Townhouse,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,9.0,0.0,15690.0,2016-133177-MF,2021-01-09 02:22:53,FME,PLN-PBAC-0000019939,POINT (6133073.505 1936796.297),active


In [162]:
recent_new_construction_df['APN'].isin(sj_sites['locapn']).mean()

0.11428571428571428

In [164]:
recent_new_construction_df['DWELLINGUN'].sum()

1416.0

In [167]:
recent_new_construction_df[
    recent_new_construction_df['APN'].isin(sj_sites['locapn'])
]['DWELLINGUN'].sum()

712.0

In [169]:
712 / 1416

0.5028248587570622

In [155]:
combined_df[
    (combined_df['DWELLINGUN'] == 0)
    & (combined_df['ISSUEDATE'] >= '2015-01-01'
]['APN'].isin(sj_sites['locapn']).mean()

0.1

In [156]:
combined_df[
    (combined_df['DWELLINGUN'] == 0)
    & (combined_df['ISSUEDATE'] >= '2015-01-01')
]

Unnamed: 0,OBJECTID,APPLICANT,CONTRACTOR,FACILITYID,INTID,ADDRESS,APN,WORKDESC,SUBDESC,PERMITAPPR,...,FINALDATEU,DWELLINGUN,PERMITVALU,SQUAREFOOT,FOLDERNUM,LASTUPDATE,LASTEDITOR,ENTERPRISE,geometry,state
461,26017.0,TYPOGRAFIKS INC ELIE HABIB,,25999,25999.0,"2317 STEVENS CREEK BL , SAN JOSE CA 95128-1609",27457008,New Construction,Mixed Use,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,0.0,0.0,3884.0,2015-013690-CI,2021-01-09 02:27:03,FME,PLN-PBEX-0000025999,POINT (6144396.316 1943438.743),expired
464,26309.0,SAVIDGE CONSTRUCTION INC,SAVIDGE CONSTRUCTION INC,26293,26293.0,"5422 CAMDEN AV , SAN JOSE CA 95124-5825",56916016,New Construction,Service Station,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,0.0,0.0,6662.0,2014-045061-CI,2021-01-09 02:27:07,FME,PLN-PBEX-0000026293,POINT (6153094.228 1913140.937),expired
495,28911.0,LUSARDI CONSTRUCTION CO,LUSARDI CONSTRUCTION CO,28898,28898.0,"6280 AMERICA CENTER DR , SAN JOSE CA",01545037,New Construction,Office,"B-2. Fnd only, E-2. Fnd only, P-2. Fnd only",...,,0.0,0.0,0.0,2015-037613-CI,2021-01-09 02:27:25,FME,PLN-PBEX-0000028898,POINT (6132283.829 1978526.065),expired
509,30576.0,VER CONSULTANTS VINCENT RIVERO,SAN JOSE CONSTRUCTION CO INC,30563,30563.0,"708 RIDDER PARK DR 22 , SAN JOSE CA 95131-0000",23705016,New Construction,Mixed Use,"B-2. Fnd only, E-4. Complete, P-4. Complete",...,,0.0,0.0,162500.0,2016-118310-CI,2021-01-09 02:27:39,FME,PLN-PBEX-0000030563,POINT (6154540.656 1962144.342),expired
516,31890.0,"M.A.K. ASSOCIATES, INC RICHARD MACDERMOTT","M.A.K. ASSOCIATES, INC",31881,31881.0,"585 W ALMA AV , SAN JOSE CA 95125-1743",43414075,New Construction,School/Daycare,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,0.0,0.0,5100.0,2016-100195-CI,2021-01-09 02:27:48,FME,PLN-PBEX-0000031881,POINT (6158130.170 1938051.886),expired
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1968,27270.0,B & T SERVICE STATION CONTRACTOR,B & T SERVICE STATION CONTRACTOR,21086,21086.0,"1433 BERRYESSA RD , SAN JOSE CA 95133",24104022,New Construction,Service Station,"B-4. Complete, E-4. Complete, P-4. Complete, M...",...,,0.0,0.0,8920.0,2020-148965-CI,2021-01-09 02:23:15,FME,PLN-PBAC-0000021086,POINT (6160614.082 1959720.896),active
1986,27581.0,DANIELLE WILSON,,21424,21424.0,"6402 SANTA TERESA BL 4 , SAN JOSE CA 95119",70603016,New Construction,Warehouse/Storage,B-4. Complete,...,,0.0,0.0,991.0,2020-149161-CI,2021-01-09 02:23:17,FME,PLN-PBAC-0000021424,POINT (6185553.311 1911667.306),active
2021,28839.0,DURAIRAJ SUNDAR AND SUNDAR JASMINE N TRU,,4231,4231.0,"1114 HUNTINGDON DR , SAN JOSE CA 95129-3123",37811014,New Construction,Address Assignment,,...,,0.0,0.0,0.0,2021-011589-IR,2021-01-15 02:20:47,FME,PLN-PBAC-0000004231,POINT (6124665.085 1937077.492),active
2076,30538.0,REPUBLIC URBAN PROPERTIES LLC,,2509,2509.0,"1096 LINCOLN AV , SAN JOSE CA 95125-3110",26456082,New Construction,Mixed Use,"B-2. Fnd only, E-2. Fnd only, P-2. Fnd only, M...",...,,0.0,0.0,0.0,2021-015100-CI,2021-03-05 02:20:36,FME,PLN-PBAC-0000002509,POINT (6154244.413 1937988.522),active


In [27]:
combined_df.groupby('APN').size().value_counts()

1    1050
dtype: int64

In [106]:
recent_combined_df = combined_df[
    (combined_df['ISSUEDATE'] >= '2015-01-01')
    & (combined_df['ISSUEDATE'] < '2021-01-01')
]

In [107]:
recent_combined_df.shape

(726, 23)

In [108]:
recent_combined_df_through_2019 = combined_df[
    (combined_df['ISSUEDATE'] >= '2015-01-01')
    & (combined_df['ISSUEDATE'] < '2020-01-01')
]

In [110]:
recent_combined_df_through_2019.shape

(398, 23)

In [137]:
recent_combined_df[
    recent_combined_df['ISSUEDATE'] >= '2020-01-01'
]['APN'].isin(sj_sites['locapn']).mean()

0.003048780487804878

In [142]:
recent_combined_df['APN'].isin(sj_sites['locapn']).mean()

0.04269972451790634

# Merge DataFrames

In [85]:
recent_combined_df['APN'].value_counts().value_counts()

1    819
Name: APN, dtype: int64

In [87]:
sj_sites['locapn'].value_counts().value_counts()

1    581
2      6
3      2
Name: locapn, dtype: int64

In [90]:
sj_sites_deduped = sj_sites.sort_values('relcapcty').drop_duplicates('locapn', keep='last')

In [91]:
sj_sites_deduped['locapn'].value_counts().value_counts()

1    589
Name: locapn, dtype: int64

In [111]:
merged_df = sj_sites_deduped.merge(
    recent_combined_df,
    left_on='locapn',
    right_on='APN',
    how='left'
)

In [113]:
merged_df['state'].notnull().mean()

0.05263157894736842

In [141]:
8/6 * merged_df['state'].notnull().mean()

0.07017543859649122

In [115]:
merged_df[
    merged_df['state'].notnull()
][['ADDRESS', 'allowden', 'allowhigh', 'relcapcty', 'DWELLINGUN', 'zoning']].head(12)

Unnamed: 0,ADDRESS,allowden,allowhigh,relcapcty,DWELLINGUN,zoning
24,"734 N 12TH ST , SAN JOSE CA 95112",8.0,8,1.0,1.0,R-2
64,"2842 MOORPARK AV , SAN JOSE CA 95128-3156",4.9,5,2.0,1.0,A(PD)
108,"1364 TIFFANY CANYON CT , SAN JOSE CA 95120-0000",0.2,0,2.0,1.0,A(PD)
135,"370 NEILSON CT , SAN JOSE CA 95111-3813",2.3,2,3.0,1.0,A(PD)
146,"222 FLOYD ST , SAN JOSE CA 95110-3344",11.8,12,3.0,1.0,A(PD)
147,"646 WILLOW ST , SAN JOSE CA 95125-5714",8.2,8,3.0,1.0,A(PD)
253,"423 DOUGLAS PL , SAN JOSE CA 95126-3221",14.0,14,6.0,1.0,A(PD)
254,"1097 JAYDEN LN , SAN JOSE CA 95120-1506",6.9,7,6.0,1.0,A(PD)
259,"6462 ALMADEN RD , SAN JOSE CA 95120-1901",8.0,8,7.0,1.0,R-1-8
278,"7020 LIVERY LN , SAN JOSE CA",1.2,1,7.0,1.0,A(PD)


Okay, looks like the data isn't super complete, so we can't really determine how the planned densities compare to the actually built densities.
We can still look at development probabilities though.

In [116]:
merged_df['locapn'].value_counts().value_counts()

1    589
Name: locapn, dtype: int64

In [117]:
merged_df.dropna(subset=['state'])['zoning'].value_counts()

A(PD)    20
DC        4
LI        2
CP        2
R-2       1
HI        1
R-1-8     1
Name: zoning, dtype: int64

* A(PD) means Agricultural, but with a Planned Development overlay
* DC means Downtown Primary Commercial District
* CP means Commercial Pedestrian
* LI means Light Industrial
* R-1-8 means residential, 8 dwelling units/acre
* R-2 is residential, higher density than R-1
* HI means Heavy Industrial

In [118]:
zone_mappings = {
    'A(PD)': 'Agricultural',
    'R-1': 'Residential',
    'R-2': 'Residential',
    'R-M': 'Residential',
    'DC': 'Commercial',
    'CG': 'Commercial',
    'CP': 'Commercial',
    'CN': 'Commercial',
    'MS': 'Commercial',
    'CO': 'Commercial',
    'OS': 'Open Space',
    'LI': 'Industrial',
    'IP': 'Industrial',
    'HI': 'Industrial',
    'A': 'Agricultural',
    'County': 'County/Other',
}

In [119]:
def get_zone_mapping(zone):
    for zone_match_string, zone_type in zone_mappings.items():
        if zone_match_string in zone:
            return zone_type
    return None

merged_df_with_types = merged_df.copy()
merged_df_with_types['zoning_type'] = merged_df_with_types['zoning'].map(get_zone_mapping)

In [120]:
merged_df_with_types[
    merged_df_with_types['zoning_type'].isnull()
]['zoning'].value_counts()

Series([], Name: zoning, dtype: int64)

Nice, so we labeled all of them.

In [122]:
merged_df_with_types.groupby('zoning_type').apply(lambda g: g['state'].notnull().mean())

zoning_type
Agricultural    0.114943
Commercial      0.035714
County/Other    0.000000
Industrial      0.058824
Open Space      0.000000
Residential     0.013158
dtype: float64

In [138]:
8/6 * merged_df_with_types.groupby('zoning_type').apply(lambda g: g['state'].notnull().mean())

zoning_type
Agricultural    0.153257
Commercial      0.047619
County/Other    0.000000
Industrial      0.078431
Open Space      0.000000
Residential     0.017544
dtype: float64

## Same analysis, through 2019

In [123]:
merged_df_through_2019 = sj_sites_deduped.merge(
    recent_combined_df_through_2019,
    left_on='locapn',
    right_on='APN',
    how='left'
)

In [124]:
merged_df_through_2019['state'].notnull().mean()

0.050933786078098474

In [140]:
8/5 * merged_df_through_2019['state'].notnull().mean()

0.08149405772495756

In [129]:
merged_df_through_2019_with_types = merged_df_through_2019.copy()
merged_df_through_2019_with_types['zoning_type'] = merged_df_through_2019_with_types['zoning'].map(get_zone_mapping)

In [130]:
merged_df_through_2019_with_types.groupby('zoning_type').apply(lambda g: g['state'].notnull().mean())

zoning_type
Agricultural    0.109195
Commercial      0.035714
County/Other    0.000000
Industrial      0.058824
Open Space      0.000000
Residential     0.013158
dtype: float64

In [139]:
8/5 * merged_df_through_2019_with_types.groupby('zoning_type').apply(lambda g: g['state'].notnull().mean())

zoning_type
Agricultural    0.174713
Commercial      0.057143
County/Other    0.000000
Industrial      0.094118
Open Space      0.000000
Residential     0.021053
dtype: float64