In [30]:
import os
from arcgis import GIS
from arcgis.features import GeoAccessor
import pandas as pd
import arcpy

# show all columns
pd.options.display.max_columns = None

# pd.DataFrame.spatial.from_featureclass(???)
# df.spatial.to_featureclass(location=???,sanitize_columns=False)

## Store paths to inputs data

In [31]:
parcels_buildings = ".\\Inputs\\Parcels_Subset.gdb\\parcels_buildings"
parcels_zoning = ".\\Inputs\\Parcels_Subset.gdb\\parcels_zoning"
parcel_taz_lu = ".\\Inputs\\parcel_id_taz_lu_table.csv"

## Setup Outputs environment

In [32]:
# create output folder, if it doesn't exist
output_folder = '.\\Outputs'
if not os.path.exists('Outputs'): 
    os.makedirs('Outputs') 

# create output geodatabase if it doesn't exist
gdb = os.path.join(output_folder, "results.gdb")
if not arcpy.Exists(gdb): 
    arcpy.CreateFileGDB_management(output_folder, 'results.gdb')

## Load feature classes and csvs into Pandas Dataframes

In [33]:
parcels_buildings_df = pd.DataFrame.spatial.from_featureclass(parcels_buildings)
parcels_zoning_df = pd.DataFrame.spatial.from_featureclass(parcels_zoning)
parcel_taz_lu_df = pd.read_csv(parcel_taz_lu)

## Inspect dataframe

In [34]:
# preview dataframe
parcels_buildings_df.head(5)

Unnamed: 0,OBJECTID,parcel_id_REMM,TOTAL_MKT_VALUE,land_value,residential_units,building_sqft,FLOORS_CNT,year_built,building_type_id,parcel_acres,building_type,SHAPE
0,1,27303,224100.0,167900.0,0.0,2210,1.0,1977.0,10.0,0.170335,Church,"{""rings"": [[[425482.0663999999, 4511535.4605],..."
1,2,27304,35000.0,35000.0,0.0,0,0.0,0.0,10.0,0.231649,Church,"{""rings"": [[[425812.89329999965, 4511714.13130..."
2,3,27305,1224900.0,339000.0,0.0,11780,0.0,1983.0,10.0,2.225053,Church,"{""rings"": [[[425827.5959999999, 4511814.5721],..."
3,4,27306,1689300.0,80600.0,0.0,280,0.0,1987.0,10.0,0.531271,Church,"{""rings"": [[[425968.0197999999, 4511774.556399..."
4,5,27307,13700.0,13700.0,0.0,0,0.0,0.0,10.0,0.093493,Church,"{""rings"": [[[425968.0197999999, 4511774.556399..."


In [35]:
# list out the columns
parcels_buildings_df.columns

Index(['OBJECTID', 'parcel_id_REMM', 'TOTAL_MKT_VALUE', 'land_value',
       'residential_units', 'building_sqft', 'FLOORS_CNT', 'year_built',
       'building_type_id', 'parcel_acres', 'building_type', 'SHAPE'],
      dtype='object')

In [36]:
# check number of records (# of rows, # of Columns)
parcels_buildings_df.shape

(3664, 12)

In [37]:
# remove unneeded columns
del parcels_buildings_df['OBJECTID']
del parcels_zoning_df['OBJECTID']

# the other parcels table already has 'SHAPE', so we'll remove it to avoid confusion
del parcels_zoning_df['SHAPE']

## Join tables

dataframe.merge joins to tables together, the arguments are:  
- dataframe to join with
- left_on: column to join on from base dataframe
- right_on: column to join on from join dataframe
- how: type of join e.g. (left, right, inner, outer)

[Documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html)  

In [38]:
# join the parcels with building data, with the other parcels with zoning data, using "parcel_id_REMM" column
new_parcels = parcels_buildings_df.merge(parcels_zoning_df, left_on='parcel_id_REMM', right_on='parcel_id_REMM', how='left')

## Inspect the joined result

In [39]:
# check number of records to make sure nothing weird happened
new_parcels.shape

(3664, 23)

In [40]:
# preview the joined table
new_parcels.tail(3)

Unnamed: 0,parcel_id_REMM,TOTAL_MKT_VALUE,land_value,residential_units,building_sqft,FLOORS_CNT,year_built,building_type_id,parcel_acres,building_type,SHAPE,CO_NAME,max_dua,max_far,type1,type2,type3,type4,type5,type6,type7,type8,NoBuild
3661,461342,9067500.0,1491400.0,98.0,82330,,1979.0,2.0,1.249838,Multi Family Res,"{'rings': [[[425386.5906999996, 4512009.9351],...",Salt Lake,85.0,0.5,f,t,f,f,f,f,f,f,
3662,461546,1955600.0,627500.0,30.0,20450,,1989.0,2.0,0.688894,Multi Family Res,"{'rings': [[[426008.59970000014, 4511812.87099...",Salt Lake,30.0,0.5,t,t,f,f,f,f,f,f,
3663,461550,5087800.0,1291000.0,70.0,122280,,1983.0,2.0,1.316267,Multi Family Res,"{'rings': [[[425879.86110000033, 4511447.27580...",Salt Lake,0.0,0.5,f,f,f,f,f,f,f,f,


## Join the current dataframe to our dataframe with the taz ids

In [41]:
new_parcels2 = new_parcels.merge(parcel_taz_lu_df, left_on='parcel_id_REMM', right_on='parcel_id_REMM', how='left')

In [42]:
# check columns
new_parcels2.columns

Index(['parcel_id_REMM', 'TOTAL_MKT_VALUE', 'land_value', 'residential_units',
       'building_sqft', 'FLOORS_CNT', 'year_built', 'building_type_id',
       'parcel_acres', 'building_type', 'SHAPE', 'CO_NAME', 'max_dua',
       'max_far', 'type1', 'type2', 'type3', 'type4', 'type5', 'type6',
       'type7', 'type8', 'NoBuild', 'TAZID_832', 'TAZID_900', 'Tract_GEOID'],
      dtype='object')

## export as a feature class to geodatabase

In [43]:
#name output file
outfile = os.path.join(gdb, 'parcels_with_new_data')
new_parcels2.spatial.to_featureclass(location=outfile, sanitize_columns=False)

In [44]:
#name output file
outfile = '.\\Outputs\\parcels_with_new_data.shp'
new_parcels2.spatial.to_featureclass(location=outfile, sanitize_columns=True)

## export to csv (non spatial)

In [45]:
# delete SHAPE column first because its not needed
del new_parcels2['SHAPE']
new_parcels2.to_csv('.\\Outputs\\new_parcels.csv', index=False)

## Final tips:
- Be careful joining tables with the same column names, pandas will rename them on the fly
- in order to export a datframe to feature class or shapefile, it must have "SHAPE" column
- check number of rows after to make sure nothing weird happened