In [44]:
import pandas as pd 
import os
import numpy as np
import arcpy
from arcgis import GIS
from arcgis.features import GeoAccessor, GeoSeriesAccessor
arcpy.env.overwriteOutput = True
# show all columns
pd.options.display.max_columns = None

## Create output GDB

In [45]:
# create output gdb
outputs = '.\\Outputs'
gdb = os.path.join(outputs, "REMM_SpatialOutputs.gdb")
if not arcpy.Exists(gdb):
    arcpy.CreateFileGDB_management(outputs, "REMM_SpatialOutputs.gdb")

## Assign tables and geospatial data to variables

In [46]:
county_ind_2015 = ".\\Inputs\\REMMRun\\county_indicators_59_2015.csv"
county_ind_2020 = ".\\Inputs\\REMMRun\\county_indicators_59_2020.csv"
county_ind_2030 = ".\\Inputs\\REMMRun\\county_indicators_59_2030.csv"
county_ind_2040 = ".\\Inputs\\REMMRun\\county_indicators_59_2040.csv"
county_ind_2050 = ".\\Inputs\\REMMRun\\county_indicators_59_2050.csv"

zone_ind_2015 = ".\\Inputs\\REMMRun\\zone_indicators_5_2015.csv"
zone_ind_2020 = ".\\Inputs\\REMMRun\\zone_indicators_5_2020.csv"
zone_ind_2030 = ".\\Inputs\\REMMRun\\zone_indicators_5_2030.csv"
zone_ind_2040 = ".\\Inputs\\REMMRun\\zone_indicators_5_2040.csv"
zone_ind_2050 = ".\\Inputs\\REMMRun\\zone_indicators_5_2050.csv"

all_buildings_2015 = ".\\Inputs\\REMMRun\\run5year2015allbuildings.csv"
all_buildings_2020 = ".\\Inputs\\REMMRun\\run5year2020allbuildings.csv"
all_buildings_2030 = ".\\Inputs\\REMMRun\\run5year2030allbuildings.csv"
all_buildings_2040 = ".\\Inputs\\REMMRun\\run5year2040allbuildings.csv"
all_buildings_2050 = ".\\Inputs\\REMMRun\\run5year2050allbuildings.csv"

zonal_variables_2015 = ".\\Inputs\\REMMRun\\run5year2015zonalvariables.csv"
zonal_variables_2020 = ".\\Inputs\\REMMRun\\run5year2020zonalvariables.csv"
zonal_variables_2030 = ".\\Inputs\\REMMRun\\run5year2030zonalvariables.csv"
zonal_variables_2040 = ".\\Inputs\\REMMRun\\run5year2040zonalvariables.csv"
zonal_variables_2050 = ".\\Inputs\\REMMRun\\run5year2050zonalvariables.csv"

In [47]:
county_ind_2015_df = pd.read_csv(county_ind_2015)
county_ind_2020_df = pd.read_csv(county_ind_2020)
county_ind_2030_df = pd.read_csv(county_ind_2030)
county_ind_2040_df = pd.read_csv(county_ind_2040)
county_ind_2050_df = pd.read_csv(county_ind_2050)

zone_ind_2015_df = pd.read_csv(zone_ind_2015)
zone_ind_2020_df = pd.read_csv(zone_ind_2020)
zone_ind_2030_df = pd.read_csv(zone_ind_2030)
zone_ind_2040_df = pd.read_csv(zone_ind_2040)
zone_ind_2050_df = pd.read_csv(zone_ind_2050)

all_buildings_2015_df= pd.read_csv(all_buildings_2015)
all_buildings_2020_df= pd.read_csv(all_buildings_2020)
all_buildings_2030_df= pd.read_csv(all_buildings_2030)
all_buildings_2040_df= pd.read_csv(all_buildings_2040)
all_buildings_2050_df= pd.read_csv(all_buildings_2050)

zonal_variables_2015_df = pd.read_csv(zonal_variables_2015)
zonal_variables_2020_df = pd.read_csv(zonal_variables_2020)
zonal_variables_2030_df = pd.read_csv(zonal_variables_2030)
zonal_variables_2040_df = pd.read_csv(zonal_variables_2040)
zonal_variables_2050_df = pd.read_csv(zonal_variables_2050)

## add year field to the dfs
used to combine the tables of different years into one file

In [48]:
county_ind_2015_df["year"]= 2015
county_ind_2020_df["year"]= 2020
county_ind_2030_df["year"]= 2030
county_ind_2040_df["year"]= 2040
county_ind_2050_df["year"]= 2050
#concatenate all the tables together to one file
county_tables_df = [county_ind_2015_df,county_ind_2020_df,county_ind_2030_df,county_ind_2040_df,county_ind_2050_df]
all_county_ind_df = pd.concat(county_tables_df)

In [49]:
zone_ind_2015_df["year"]= 2015
zone_ind_2020_df["year"]= 2020
zone_ind_2030_df["year"]= 2030
zone_ind_2040_df["year"]= 2040
zone_ind_2050_df["year"]= 2050
#concatenate all the tables together to one file
zone_inds_df = [zone_ind_2015_df,zone_ind_2020_df,zone_ind_2030_df,zone_ind_2040_df,zone_ind_2050_df]
all_zone_inds_df = pd.concat(zone_inds_df)


In [50]:
all_buildings_2015_df["year"] = 2015
all_buildings_2020_df["year"] = 2020
all_buildings_2030_df["year"] = 2030
all_buildings_2040_df["year"] = 2040
all_buildings_2050_df["year"] = 2050
#concatenate all the tables together to one file
all_buildings_df_list = [all_buildings_2015_df,all_buildings_2020_df,all_buildings_2030_df,
                    all_buildings_2040_df,all_buildings_2050_df]
all_buildings_df = pd.concat(all_buildings_df_list)

In [51]:
zonal_variables_2015_df["year"] = 2015
zonal_variables_2020_df["year"] = 2020
zonal_variables_2030_df["year"] = 2030
zonal_variables_2040_df["year"] = 2040
zonal_variables_2050_df["year"] = 2050
#concatenate all the tables together to one file
zonal_variables_list = [zonal_variables_2015_df,zonal_variables_2020_df,zonal_variables_2030_df,
                    zonal_variables_2040_df,zonal_variables_2050_df]
all_zone_vars_df = pd.concat(zonal_variables_list)

In [52]:
#checking the number of fields vs columns for combined dfs
print (all_county_ind_df.shape)
print (all_zone_inds_df.shape)
#print (all_buildings_df.shape)

(20, 28)
(13625, 30)


## Import shapefiles

In [53]:
counties_shp = ".\\Inputs\\Boundaries\\Counties.shp"
REMM_Parcels_shp = ".\\Inputs\\Boundaries\\REMM_Parcels_2015.shp"
TAZ_shp = ".\\Inputs\\Boundaries\\TAZ_WFRC_MAG.shp"

## Convert shps to spatial dataframes

In [54]:
counties_sdf = pd.DataFrame.spatial.from_featureclass(counties_shp)
REMM_Parcels_sdf = pd.DataFrame.spatial.from_featureclass(REMM_Parcels_shp)
TAZ_sdf = pd.DataFrame.spatial.from_featureclass(TAZ_shp)

In [55]:
#create a copy of the table
counties_sdf= counties_sdf[["FIPS","SHAPE"]].copy()
TAZ_sdf = TAZ_sdf[["TAZID","SHAPE"]].copy()

## Merge the tables with the spatial dataframe

In [56]:
all_county_ind_sdf= counties_sdf.merge(all_county_ind_df,left_on="FIPS",right_on="county_id",
how="inner")

all_zone_inds_sdf= TAZ_sdf.merge(all_zone_inds_df,left_on="TAZID",right_on="zone_id",
how="inner")

all_zone_vars_sdf= TAZ_sdf.merge(all_zone_vars_df,left_on="TAZID",right_on="zone_id",
how="inner")

#checking the shape
print (all_county_ind_sdf.shape)
print (all_zone_inds_sdf.shape)
print (all_zone_vars_sdf.shape)

(20, 30)
(13625, 32)
(13625, 95)


## geocode building tables

In [57]:
#pre-named output feature class created
out_featureclass = os.path.join(gdb,"buildings_ALL")

#converting the dbf back to csv
all_buildings_csv = os.path.join(outputs,"all_buildings.csv")
all_buildings_df.to_csv(all_buildings_csv,index = True)


#get spatial reference from already existing dataset
spatial_ref = arcpy.Describe(REMM_Parcels_shp).spatialReference


#geocoding, and inputting table into output feature class, using pre-set spatial reference
arcpy.management.XYTableToPoint(all_buildings_csv, out_featureclass,
                                "utmxi", "utmyi","",
                                spatial_ref)

## Export the new joined spatial feature to output gdb

In [58]:
all_county_ind_sdf.spatial.to_featureclass(location=os.path.join(gdb,"county_indicators_ALL"))

all_zone_inds_sdf.spatial.to_featureclass(location=os.path.join(gdb,"zone_indicators_ALL"))

all_zone_vars_sdf.spatial.to_featureclass(location=os.path.join(gdb,"zonal_variables_ALL"))


'C:\\Projects\\REMM-Analysis-Tools\\Spatial_Outputs\\Outputs\\REMM_SpatialOutputs.gdb\\zonal_variables_ALL'