In [132]:
import numpy as np
import pandas as pd
import geopandas as gpd
from datetime import timedelta  


## Load sites data, and clean data

In [227]:
los_altos = pd.read_csv('../data/raw_data/los_altos/los_altos_permits.csv', sep='\t')

In [228]:
# make all column names a single lower case word
los_altos.rename({'Issued Date': 'issued', 
                  'Applied Date': 'applied', 
                  'Finaled Date': 'final', 
                  'Site APN': 'apn', 
                  "Site Address": 'address',
                  'Permit Type': 'type',
                  'Permit Subtype': 'subtype',
                  'Permit Number': 'id'}, axis=1, inplace=True)
los_altos.columns = map(str.lower, los_altos.columns)

In [229]:
los_altos.head()

Unnamed: 0,id,type,subtype,status,description,address,apn,applied,issued,final
0,1995-627536,DEMOLITION,,ISSUED,,382 FIRST ST,16741005,3/22/1995,3/22/1995,
1,1995-627599,DEMOLITION,,ISSUED,PERMIT.,300 SPAGNOLI CT,16724033,4/4/1995,1/1/1980,
2,1995-627667,DEMOLITION,,FINALED,Int Remodel -111 & 113,300 SECOND ST,16740043,4/17/1995,4/17/1995,4/24/1994
3,1995-627692,DEMOLITION,,FINALED,SFD,170 LOS ALTOS AV,16735074,4/20/1995,9/19/1995,4/11/1996
4,1995-627762,DEMOLITION,,FINALED,Demo,422 CHERRY AV,16727051,5/2/1995,5/2/1995,6/5/1995


In [230]:
los_altos.type.value_counts()

DEMOLITION      2128
PHOTOVOLTAIC    2002
SFD              630
MULTI FAMILY     232
ADU               84
MIXED USE         17
Name: type, dtype: int64

Remove non-housing

In [231]:
los_altos = los_altos.query('type != "PHOTOVOLTAIC"')

### Correct data type

In [232]:
date_cols = ['applied', 'issued', 'final']
los_altos[date_cols] = los_altos[date_cols].apply(pd.to_datetime)

In [233]:
los_altos.apn = los_altos.apn.str.replace(r"[\-a-zA-Z|.+,;:/]", '', regex=True)
los_altos.apn = pd.to_numeric(los_altos.apn)

In [234]:
demolitions = los_altos.query('(issued >= "2013" and type == "DEMOLITION")')

In [235]:
homes = los_altos.query('issued >= 2015 and type != "DEMOLITION"')

### Flag rebuild

In [236]:
homes['rebuild'] = False

In [237]:
def find_rebuild(demolition, homes):
    """For a demolition permit, find the soonest rebuild. Print delay."""
    apn, address, pid, date = demolition.apn, demolition.address, demolition.id, demolition.issued
    by_date = date + timedelta(days=365*2)
    matches = homes.query('(apn == @apn or address == @address) and id != @pid and type != "DEMOLITION" and applied < @by_date').copy()
    matches.sort_values('applied', inplace=True)
    if len(matches.index):
        return matches.index[0]
    return False

In [238]:
homes.shape

(358, 11)

In [239]:
for i, demolition in demolitions.iterrows():
    idx = find_rebuild(demolition, homes)
    if idx:
        homes.rebuild[idx] = True

In [240]:
homes[homes.type == 'SFD'].rebuild.sum() / homes.rebuild.sum()

0.9421487603305785

In [243]:
homes.to_csv('../data/clean_data/los_altos_permits.csv', index=False)