In [100]:
import arcpy
from arcpy import env
import os
from arcgis import GIS
from arcgis.features import GeoAccessor
import pandas as pd
import numpy as np

arcpy.env.overwriteOutput = True
arcpy.env.parallelProcessingFactor = "80%"

# show all columns
pd.options.display.max_columns = None

# pd.DataFrame.spatial.from_featureclass(???)
# df.spatial.to_featureclass(location=???,sanitize_columns=False)

In [101]:
if not os.path.exists('Outputs'):
    os.makedirs('Outputs')
    
outputs = ['.\\Outputs', "job_spaces_2019_v2.gdb"]
gdb = os.path.join(outputs[0], outputs[1])

if not arcpy.Exists(gdb):
    arcpy.CreateFileGDB_management(outputs[0], outputs[1])

In [102]:
# store paths
parcels = r"E:\Projects\REMM-Input-Data-Prep-2019\Parcels\2020-WFRC\Outputs\remm_base_year_2019.gdb\parcels_2019"
dws_pts = r'E:\Projects\REMM-Input-Data-Prep-2019\Job-Space-Distribution\Inputs\RegionDWS_REMM.gdb\RegionDWS_Employment'
taz = r".\Inputs\WF_v9.0_TAZ\TAZ.shp" 
jobs_spreadsheet = '.\Inputs\Job Space Calculation 20220405.xlsx'

# Part 1: Distribute Job Spaces

In [103]:
# read in the parcels
parcels_df = pd.DataFrame.spatial.from_featureclass(parcels)
parcels_df['job_spaces'] = 0
parcels_df['ind_job_spaces'] = 0
parcels_df['non_ind_job_spaces'] = 0
parcels_df.loc[(parcels_df['building_type_id'].isin([4,5,6,7,8,9,10,11,13]) == True), 'building_type2'] = 'non_ind'
parcels_df.loc[(parcels_df['building_type_id'] == 3), 'building_type2'] = 'ind'
parcels_df['building_type2'].value_counts()

non_ind    32601
ind        11305
Name: building_type2, dtype: int64

In [104]:
# read in taz shapefile
taz_df = pd.DataFrame.spatial.from_featureclass(taz)

# read in EXPORT table from job spaces spreadsheet
js_df = pd.read_excel(jobs_spreadsheet, index_col=0, sheet_name='EXPORT') 
# js_df[js_df['job_spaces']>0].head()

In [105]:
# add job spaces to TAZ geometry
taz_jobs = taz_df.merge(js_df, left_on='TAZID', right_on='TAZID', how='left')
# taz_jobs = taz_jobs[['TAZID','CO_NAME','ind_job_space','non_ind_job_space', 'job_spaces','SHAPE']].copy()
taz_jobs = taz_jobs[['TAZID','CO_NAME','ind_space_in_ind_bldg','ind_space_in_non_ind_bldg','non_ind_space_in_ind_bldg', 'non_ind_space_in_non_ind_bldg', 'job_spaces','SHAPE']].copy()
taz_jobs = taz_jobs.fillna(0)
taz_jobs = taz_jobs.sort_values('job_spaces', ascending=False)
taz_jobs['non_ind_jobs_no_sqft'] = 0
taz_jobs['ind_jobs_no_sqft'] = 0

# cast to int
taz_jobs['TAZID'] = taz_jobs['TAZID'].astype(int)
# taz_jobs['ind_job_space'] = taz_jobs['ind_job_space'].round(0).astype(int)
# taz_jobs['non_ind_job_space'] = taz_jobs['non_ind_job_space'].round(0).astype(int)
taz_jobs['ind_space_in_ind_bldg'] = taz_jobs['ind_space_in_ind_bldg']
taz_jobs['ind_space_in_non_ind_bldg'] = taz_jobs['ind_space_in_non_ind_bldg']
taz_jobs['non_ind_space_in_ind_bldg'] = taz_jobs['non_ind_space_in_ind_bldg']
taz_jobs['non_ind_space_in_non_ind_bldg'] = taz_jobs['non_ind_space_in_non_ind_bldg']
taz_jobs['job_spaces'] = taz_jobs['job_spaces'].round(0).astype(int)

# instantiate this variable
taz_ids = None

# preview
taz_jobs.head(10)

Unnamed: 0,TAZID,CO_NAME,ind_space_in_ind_bldg,ind_space_in_non_ind_bldg,non_ind_space_in_ind_bldg,non_ind_space_in_non_ind_bldg,job_spaces,SHAPE,non_ind_jobs_no_sqft,ind_jobs_no_sqft
622,1051,SALT LAKE,0.0,0.0,0.0,22770.0,22770,"{'rings': [[[429438.48000000045, 4512160.47000...",0,0
2739,2939,UTAH,0.0,0.0,0.0,17594.5,17594,"{'rings': [[[445297.9031999996, 4456446.1358],...",0,0
549,978,SALT LAKE,3097.1,0.0,809.0,10908.0,14814,"{'rings': [[[415774.7999999998, 4513909.199999...",0,0
578,1007,SALT LAKE,0.0,0.0,0.0,13123.0,13123,"{'rings': [[[429055.63999999966, 4513895.88000...",0,0
598,1027,SALT LAKE,0.0,1113.0,0.0,10680.1,11793,"{'rings': [[[423832.5, 4513789.4], [423832.400...",0,0
536,965,SALT LAKE,6877.0,0.0,0.0,2850.0,9727,"{'rings': [[[418959.4748999998, 4515209.6987],...",0,0
1397,1826,SALT LAKE,0.0,112.0,0.0,8990.0,9102,"{'rings': [[[431769.5999999996, 4498209.699999...",0,0
673,1102,SALT LAKE,102.0,186.0,0.0,7879.2,8167,"{'rings': [[[430426.0800000001, 4513256.390000...",0,0
3099,2978,UTAH,0.0,12.0,0.0,7615.0,7627,"{'rings': [[[443697.31319999974, 4455780.1458]...",0,0
1205,1634,SALT LAKE,113.0,0.0,30.0,7251.0,7394,"{'rings': [[[424913.2999999998, 4501714.199999...",0,0


In [106]:
# create a list of tuples containing (tazid, industrial space, non industrial space, job1, jobsn) from table
taz_tuples = list(taz_jobs[['TAZID','CO_NAME','ind_space_in_ind_bldg','ind_space_in_non_ind_bldg','non_ind_space_in_ind_bldg','non_ind_space_in_non_ind_bldg','job_spaces']].to_records(index=False))

In [107]:
# subset for testing. comment this out for full run
# taz_tuples = taz_tuples[0:50]
# taz_ids = [x[0] for x in taz_tuples]

In [108]:
for t in taz_tuples:
    
    # get values from tuple
    tazid = t[0]
    ind_spaces_ind_bldg = t[2]
    ind_spaces_non_ind_bldg = t[3]
    
    non_ind_spaces_ind_bldg = t[4]
    non_ind_spaces_non_ind_bldg = t[5]


    # get the sums for ind and non-ind
    total_taz_nonind_sqft =  parcels_df[(parcels_df['building_type2'] == 'non_ind') & (parcels_df['TAZID_900'] == tazid)]['building_sqft'].sum()
    total_taz_ind_sqft =  parcels_df[(parcels_df['building_type2'] == 'ind') & (parcels_df['TAZID_900'] == tazid)]['building_sqft'].sum()
    
    taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'total_taz_nonind_sqft'] = total_taz_nonind_sqft
    taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'total_taz_ind_sqft'] = total_taz_ind_sqft

    # calculate ratio value (building square footage relative to total square footage in taz)
    parcels_df.loc[(parcels_df['building_type2'] == 'non_ind') & (parcels_df['TAZID_900'] == tazid), 'non_ind_factor'] = parcels_df['building_sqft'] / total_taz_nonind_sqft
    parcels_df.loc[(parcels_df['building_type2'] == 'ind') & (parcels_df['TAZID_900'] == tazid), 'ind_factor'] = parcels_df['building_sqft'] / total_taz_ind_sqft

    # use ratio to calculate job spaces
    # parcels_df.loc[(parcels_df['building_type2'] == 'non_ind') & (parcels_df['TAZID'] == tazid), 'non_ind_spaces'] = round(nonind_spaces * parcels_df['non_ind_factor'], 0)
    # parcels_df.loc[(parcels_df['building_type2'] == 'ind') & (parcels_df['TAZID'] == tazid), 'ind_spaces'] = round(ind_spaces * parcels_df['ind_factor'], 0)
    parcels_df.loc[(parcels_df['building_type2'] == 'non_ind') & (parcels_df['TAZID_900'] == tazid), 'non_ind_spaces'] = non_ind_spaces_non_ind_bldg * parcels_df['non_ind_factor']
    parcels_df.loc[(parcels_df['building_type2'] == 'non_ind') & (parcels_df['TAZID_900'] == tazid), 'ind_spaces'] = ind_spaces_non_ind_bldg * parcels_df['non_ind_factor']
    
    parcels_df.loc[(parcels_df['building_type2'] == 'ind') & (parcels_df['TAZID_900'] == tazid), 'ind_spaces'] = ind_spaces_ind_bldg * parcels_df['ind_factor']
    parcels_df.loc[(parcels_df['building_type2'] == 'ind') & (parcels_df['TAZID_900'] == tazid), 'non_ind_spaces'] = non_ind_spaces_ind_bldg * parcels_df['ind_factor']

    # # if there is no non-industrial square footage to place the non-industrial jobs, distribute them anyway
    # if nonind_spaces > 0 and total_taz_nonind_sqft == 0:
    #     taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'non_ind_jobs_no_sqft'] = 1
        
    #     # parcels_df.loc[(parcels_df['building_type_id'] == 3) & (parcels_df['TAZID'] == tazid), 'non_ind_factor'] = parcels_df['building_sqft'] / total_taz_nonind_sqft
    #     # parcels_df.loc[(parcels_df['building_type_id'] == 3) & (parcels_df['TAZID'] == tazid), 'non_ind_spaces'] = round(nonind_spaces * parcels_df['non_ind_factor'], 0)
        
    # # if there is no industrial square footage to place the industrial jobs
    # if ind_spaces > 0 and total_taz_ind_sqft == 0:
    #     taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'ind_jobs_no_sqft'] = 1

    # count how many job spaces were distributed
    non_ind_spaces_distributed = parcels_df[(parcels_df['TAZID_900'] == tazid)]['non_ind_spaces'].sum()
    ind_spaces_distributed = parcels_df[(parcels_df['TAZID_900'] == tazid)]['ind_spaces'].sum()
    # non_ind_spaces_distributed = parcels_df[(parcels_df['building_type_id'].isin([4,5,6,7,8,9,10,11,13,15,16]) == True) & (parcels_df['TAZID'] == tazid)]['non_ind_spaces'].sum()
    # ind_spaces_distributed = parcels_df[(parcels_df['building_type_id'] == 3) & (parcels_df['TAZID'] == tazid)]['ind_spaces'].sum()
    taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'ind_spaces_distributed'] = ind_spaces_distributed
    taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'non_ind_spaces_distributed'] = non_ind_spaces_distributed

    # track stray job spaces
    # taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'ind_space_diff'] = ind_spaces -ind_spaces_distributed
    # taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'non_ind_space_diff'] = nonind_spaces - non_ind_spaces_distributed
    taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'ind_space_diff'] = (ind_spaces_ind_bldg + ind_spaces_non_ind_bldg)  -ind_spaces_distributed
    taz_jobs.loc[(taz_jobs['TAZID'] == tazid), 'non_ind_space_diff'] = (non_ind_spaces_ind_bldg + non_ind_spaces_non_ind_bldg) - non_ind_spaces_distributed

    # set the sums back to zero
    ind_spaces_distributed = 0
    non_ind_spaces_distributed = 0
    total_taz_nonind_sqft = 0
    total_taz_ind_sqft = 0

In [119]:
# Export
result = parcels_df[['parcel_id_REMM', 'CO_NAME','TAZID_900','building_sqft','building_type_id', 'building_type','building_type2','non_ind_factor','non_ind_spaces','ind_factor','ind_spaces','SHAPE']].copy()
result['non_ind_spaces'] = result['non_ind_spaces'].round(0)
result['ind_spaces'] = result['ind_spaces'].round(0)
export = True

if export:
    if taz_ids:
        print('exporting subset')
        result = result[result['TAZID_900'].isin(taz_ids)==True].copy()
        taz_jobs = taz_jobs[taz_jobs['TAZID'].isin(taz_ids)==True].copy()
        result.spatial.to_featureclass(location=os.path.join(gdb,'_01_parcels_jobs_2019_SUBSET'), sanitize_columns=False)
        taz_jobs.spatial.to_featureclass(location=os.path.join(gdb,'_01_taz_jobs_2019_SUBSET'), sanitize_columns=False)

    else:
        print('exporting full result')
        result.spatial.to_featureclass(location=os.path.join(gdb,'_01_parcels_jobs_2019'), sanitize_columns=False)
        taz_jobs.spatial.to_featureclass(location=os.path.join(gdb,'_01_taz_jobs_2019'), sanitize_columns=False)

exporting full result
