In [2]:
import arcpy
from arcpy import env
import os
from arcgis import GIS
from arcgis.features import GeoAccessor
import pandas as pd
import numpy as np

arcpy.env.overwriteOutput = True
arcpy.env.parallelProcessingFactor = "90%"

# show all columns
pd.options.display.max_columns = None

# pd.DataFrame.spatial.from_featureclass(???)
# df.spatial.to_featureclass(location=???,sanitize_columns=False)

In [3]:
# Inputs
parcels = r".\Inputs\remm_base_year_2019.gdb\parcels_2019"
parcels_sdf = pd.DataFrame.spatial.from_featureclass(parcels)
job_spaces = pd.read_csv(r".\Inputs\Job_Spaces_by_Parcel.csv")

In [4]:
parcels_sdf.columns

Index(['OBJECTID', 'parcel_id_REMM', 'CO_NAME', 'TAZID', 'COUNTY_ID',
       'PARCEL_ID', 'TOTAL_MKT_VALUE', 'land_value', 'residential_units',
       'building_sqft', 'FLOORS_CNT', 'year_built', 'EFFBUILT_YR', 'IS_OUG',
       'max_height', 'type1', 'type2', 'type3', 'type4', 'type5', 'type6',
       'type7', 'type8', 'agriculture', 'basebldg', 'NoBuild',
       'redev_friction', 'building_type_id', 'x', 'y', 'parcel_acres', 'Split',
       'Split_Factor', 'parcel_id_REMM_old', 'building_type',
       'residential_price', 'non_residential_price', 'non_residential_sqft',
       'TAZID_832', 'TAZID_900', 'max_dua', 'max_far', 'SHAPE'],
      dtype='object')

## agriculture

In [5]:
ag_parcels = parcels_sdf[parcels_sdf['agriculture']==1]
ag_parcels = ag_parcels[['parcel_id_REMM','agriculture']].copy()
ag_parcels.columns = ['parcel_id','Agriculture']
ag_parcels.to_csv(r'.\Outputs\agriculture.csv', index=False)

## zoning_baseline

In [6]:
zb = parcels_sdf[['parcel_id_REMM', 'max_dua', 'max_far', 'max_height', 'type1', 'type2', 'type3', 'type4', 'type5', 'type6', 'type7', 'type8']].copy()
zb['max_height'] = 999
zb.columns = ['parcel_id', 'max_dua', 'max_far', 'max_height', 'type1', 'type2', 'type3', 'type4', 'type5', 'type6', 'type7', 'type8']

# set res types allowed to false if no max dua is present - may not be necessary
zb.loc[(zb['max_dua'].isna()) |  (zb['max_dua']==0), 'type1'] = 'f'
zb.loc[(zb['max_dua'].isna()) |  (zb['max_dua']==0), 'type2'] = 'f'

# set non res types allowed to false if no max far is present - may not be necessary
zb.loc[(zb['max_far'].isna()) |  (zb['max_far']==0), 'type3'] = 'f'
zb.loc[(zb['max_far'].isna()) |  (zb['max_far']==0), 'type4'] = 'f'
zb.loc[(zb['max_far'].isna()) |  (zb['max_far']==0), 'type5'] = 'f'
zb.loc[(zb['max_far'].isna()) |  (zb['max_far']==0), 'type6'] = 'f'
zb.loc[(zb['max_far'].isna()) |  (zb['max_far']==0), 'type7'] = 'f'
zb.loc[(zb['max_far'].isna()) |  (zb['max_far']==0), 'type8'] = 'f'

zb.to_csv(r'.\Outputs\zoning_baseline.csv', index=False)

## Buildings

In [11]:
parcels_sdf.loc[(parcels_sdf['building_type_id'].isin([1,2])), 'residential_price'] = parcels_sdf['TOTAL_MKT_VALUE'] - parcels_sdf['land_value']
parcels_sdf.loc[~(parcels_sdf['building_type_id'].isin([1,2])), 'non_residential_price'] = parcels_sdf['TOTAL_MKT_VALUE'] - parcels_sdf['land_value']
parcels_sdf.loc[~(parcels_sdf['building_type_id'].isin([1,2])), 'non_residential_sqft'] = parcels_sdf['building_sqft']

buildings = parcels_sdf[[ 'parcel_id_REMM', 'building_sqft','building_type_id','non_residential_sqft', 'FLOORS_CNT','residential_units', 'year_built', 'TOTAL_MKT_VALUE','residential_price']] 
buildings = buildings.merge(job_spaces, left_on='parcel_id_REMM',right_on='parcel_id_REMM', how = 'left')
buildings['note'] = 'base'
buildings['res_price_per_sqft'] = buildings['residential_price'] / buildings['building_sqft']
buildings['unit_price_non_residential'] = np.nan
buildings['building_id'] = buildings['parcel_id_REMM']
buildings['FLOORS_CNT'].fillna(1, inplace=True)
buildings['FLOORS_CNT'] = buildings['FLOORS_CNT'].round(0)
buildings = buildings[['building_id','building_sqft','building_type_id','non_residential_sqft','note',
                       'parcel_id_REMM','residential_units','FLOORS_CNT','unit_price_non_residential','year_built',
                       'res_price_per_sqft','job_spaces']].copy()

buildings.columns = ['building_id','building_sqft','building_type_id','non_residential_sqft','note',
                     'parcel_id','residential_units','stories',	'unit_price_non_residential','year_built',
                     'res_price_per_sqft','job_spaces']

In [12]:
buildings_lu = {0:None,
                1:1,
                2:2,
                3:3,
                4:4,
                5:5,
                6:6,
                7:7,
                8:8,
                9:6, 
                10:8,
                11:8,
                12:None,
                13:5,
                14:None,
                15:None,
                16:None,
                99:None}

# remap building types
buildings['building_type_id'] = buildings['building_type_id'].map(buildings_lu)

######################################
# fill in some fake square footag and years (comment out later)
######################################

# buildings.loc[(buildings['year_built'].isna() ==True) | (buildings['year_built'] == 0), 'year_built'] =  np.random.randint(1900, 2019, buildings[(buildings['year_built'].isna() ==True) | (buildings['year_built'] == 0)].shape[0])

# buildings.loc[((buildings['building_sqft'].isna() ==True) | (buildings['building_sqft'] == 0)) & (buildings['building_type_id'] == 3), 
#               'building_sqft'] =  np.random.randint(10000, 200000, buildings[((buildings['building_sqft'].isna() ==True) | (buildings['building_sqft'] == 0)) & (buildings['building_type_id'] == 3)].shape[0])

# buildings.loc[((buildings['building_sqft'].isna() ==True) | (buildings['building_sqft'] == 0)) & (buildings['building_type_id'] != 3), 
#               'building_sqft'] =  np.random.randint(1400, 14000, buildings[((buildings['building_sqft'].isna() ==True) | (buildings['building_sqft'] == 0)) & (buildings['building_type_id'] != 3)].shape[0])

# buildings.loc[((buildings['non_residential_sqft'].isna() ==True) | (buildings['non_residential_sqft'] == 0)) & (buildings['building_type_id'].isin([1,2]) == False), 
#               'non_residential_sqft'] = buildings['building_sqft']

# buildings.loc[((buildings['non_residential_sqft'].isna() ==True) | (buildings['non_residential_sqft'] == 0)) & (buildings['building_type_id'].isin([1,2]) == True), 
#               'non_residential_sqft'] = 0

# buildings.loc[(buildings['residential_units'].isna() ==True), 
#               'residential_units'] =  0

# buildings.loc[(buildings['unit_price_non_residential'].isna() ==True), 
#               'unit_price_non_residential'] =  0

# mean = buildings['res_price_per_sqft'].mean()
# buildings.loc[((buildings['res_price_per_sqft'].isna() ==True) | (buildings['res_price_per_sqft'] == 0)) & (buildings['building_type_id'].isin([1,2]) == True), 
#               'res_price_per_sqft'] = mean
              
# buildings.loc[((buildings['res_price_per_sqft'].isna() ==True) | (buildings['res_price_per_sqft'] == 0)) & (buildings['building_type_id'].isin([1,2]) == False), 
#               'res_price_per_sqft'] = 0

##################################################################              

buildings.replace([np.inf, -np.inf], 0, inplace=True)




# subset to buildings with a building type and year_built
buildings = buildings[(buildings['building_type_id'] >= 1) & buildings['year_built'] > 0)]
buildings.columns

Index(['building_id', 'building_sqft', 'building_type_id',
       'non_residential_sqft', 'note', 'parcel_id', 'residential_units',
       'stories', 'unit_price_non_residential', 'year_built',
       'res_price_per_sqft', 'job_spaces'],
      dtype='object')

In [9]:
buildings.to_csv(r'.\Outputs\buildings_20220513.csv', index=False)