In [16]:
import arcpy
from arcpy import env
import os
import numpy as np
from arcgis import GIS
from arcgis.features import GeoAccessor
from arcgis.features import GeoSeriesAccessor
import pandas as pd
import glob

arcpy.env.overwriteOutput = True
arcpy.env.parallelProcessingFactor = "90%"

# show all columns
pd.options.display.max_columns = None

# pd.DataFrame.spatial.from_featureclass(???)  
# df.spatial.to_featureclass(location=???,sanitize_columns=False)  

# gsa = arcgis.features.GeoSeriesAccessor(df['SHAPE'])  
# df['AREA'] = gsa.area  # KNOW YOUR UNITS

In [17]:
# fill NA values in Spatially enabled dataframes (ignores SHAPE column)
def fill_na_sedf(df_with_shape_column, fill_value=0):
    if 'SHAPE' in list(df_with_shape_column.columns):
        df = df_with_shape_column.copy()
        shape_column = df['SHAPE'].copy()
        del df['SHAPE']
        return df.fillna(fill_value).merge(shape_column,left_index=True, right_index=True, how='inner')
    else:
        raise Exception("Dataframe does not include 'SHAPE' column")

In [18]:
outputs = r'.\\Outputs\progession_metrics_average'
if not os.path.exists(outputs):
    os.makedirs(outputs)

In [19]:
# Parcel Equivalency Table
eq = pd.read_csv(r".\Inputs\parcel_eq_v5.csv")
centers_eq_ids = eq[eq['CENTER_NAME'].isna() == False]['parcel_id'].to_list()

# centers shape
centers_sdf = pd.DataFrame.spatial.from_featureclass(r".\Inputs\WC_2050_Centers.shp")

In [20]:
remm_folder_1 = r'E:\Projects\REMM2_For_Python3_Internal_Use'
remm_folder_2 = r'E:\Projects\REMM2_For_Python3_Internal_Use'
remm_folder_3 = r'E:\Projects\REMM2_For_Python3_Internal_Use'
remm_folder_4 = r'E:\Projects\REMM2_For_Python3_Internal_Use'
remm_folder_5 = r'E:\Projects\REMM2_For_Python3_Internal_Use'
remm_folder_6 = r'E:\Projects\REMM2_For_Python3_Internal_Use'

In [21]:
remm_folders = [remm_folder_1, remm_folder_2, remm_folder_3, remm_folder_4, remm_folder_5, remm_folder_6]
remm_progression_folders = [os.path.join(x, 'REMMRun\Progression_Metrics') for x in remm_folders]

In [22]:
def get_table_ignore_base(path, year):
    csvs = glob.glob(os.path.join(path, f'run_*_year_{year}_parcel_progression_metrics.csv'))
    csvs = [csv for csv in csvs if 'base'not in csv]
    if len(csvs) > 1:
        print('warning multiple tables were globbed; only the first will be returned')
    return pd.read_csv(csvs[0])

In [28]:
def prepare_df(df, year):
    df = df.set_index('parcel_id')
    df.loc[(df['is_sf']==1), 'sf_units'] = df['residential_units']
    df.loc[(df['is_mf']==1), 'mf_units'] = df['residential_units']
    df['industrial_jobs'] = df['jobs_wholesale'] + df['jobs_manuf']
    df['retail_jobs'] = df['jobs_retail'] + df['jobs_accom_food']
    df['office_jobs'] = df['jobs_office'] + df['jobs_gov_edu'] + df['jobs_health'] + df['jobs_other']
    df.loc[(df['has_buildings'] != 1), 'vacant_acres'] = df['parcel_acres']
    df.loc[(df['has_buildings'] != 1) & (df['developable'] == 1), 'vacant_devacres'] = df['parcel_acres']
    df['vacant_acres'].fillna(0, inplace=True)
    df['vacant_devacres'].fillna(0, inplace=True)
    df['households'] = df['households_count']
    df = df[['sf_units', 'mf_units', 'households', 'job_spaces', 'industrial_jobs', 'retail_jobs', 'office_jobs', 'vacant_acres', 'vacant_devacres']].copy()
    return df.fillna(0)

In [29]:
# base = centers_sdf[['CenterName', 'DEVACRES', 'SHAPE']].copy()
for year in range(2019,2050):
    dfs_current_year = [get_table_ignore_base(f, year) for f in remm_progression_folders]
    dfs_processed = [prepare_df(df, year) for df in dfs_current_year] 

    # stack average the 6 runs together
    data_stack = pd.concat(dfs_processed)
    average = data_stack.groupby(data_stack.index).mean().reset_index().round().astype(int)
    average['residential_units'] = average['sf_units'] + average['mf_units']
    average['total_jobs'] = average['office_jobs'] + average['retail_jobs'] + average['industrial_jobs']
    average.to_pickle(os.path.join(outputs, f'averaged_parcel_progression_metrics_{year}.pkl'))



In [31]:
# test
unpickled_df = pd.read_pickle(r".\Outputs\progession_metrics_average\averaged_parcel_progression_metrics_2035.pkl")
unpickled_df.head(5)

Unnamed: 0,parcel_id,sf_units,mf_units,households,job_spaces,industrial_jobs,retail_jobs,office_jobs,vacant_acres,vacant_devacres,residential_units,total_jobs
0,1,0,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,1122,1,409,712,0,0,0,1122
2,3,0,0,0,7,4,0,2,0,0,0,6
3,4,0,0,0,14,5,3,6,0,0,0,14
4,5,0,0,0,28,22,0,6,0,0,0,28
