In [1]:
import arcpy
import os
import pandas as pd
from arcgis import GIS
import numpy as np
from arcgis.features import GeoAccessor, GeoSeriesAccessor
arcpy.env.overwriteOutput = True

# show all columns
pd.options.display.max_columns = None

#pd.DataFrame.spatial.from_featureclass(???)

In [2]:
def add_leading_zeroes(parcel_id_str):
    if len(parcel_id_str) == 8:
        return "0{}".format(str(parcel_id_str))
    if len(parcel_id_str) == 7:
        return "00{}".format(str(parcel_id_str))
    else:
        return parcel_id_str

In [16]:
# Load Extended Descriptions
df = pd.read_csv(r".\Inputs\Davis_Extended_Descriptions_January.csv", dtype={'ACCOUNTNO':str, 'BLTASDESCRIPTION':str})

# format account numbers so that they are all 9 characters long
df['ACCOUNTNO'] = df['ACCOUNTNO'].astype(str)
df['ACCOUNTNO'] = df['ACCOUNTNO'].map(add_leading_zeroes)
df['ID_LEN'] = df['ACCOUNTNO'].map(len)

# add field to indicate source
df['source'] = 'xlsx' 

# collapse extended description into long list
df_unique = df.groupby('ACCOUNTNO')['BLTASDESCRIPTION'].apply(list).reset_index(name='des_all')

# add a count
temp = pd.DataFrame(df['ACCOUNTNO'].value_counts()).reset_index()
temp.columns = ['ACCOUNTNO', 'count']
new_df = df_unique.merge(temp,left_on='ACCOUNTNO',right_on='ACCOUNTNO', how='inner')

new_df['ACCOUNTNO'] = new_df['ACCOUNTNO'].apply('="{}"'.format)
new_df.head(10)

Unnamed: 0,ACCOUNTNO,des_all,count
0,"=""010010001""",[nan],1
1,"=""010020001""",[nan],1
2,"=""010030001""",[nan],1
3,"=""010040001""",[nan],1
4,"=""010040002""",[nan],1
5,"=""010040003""",[nan],1
6,"=""010040004""",[nan],1
7,"=""010040005""",[nan],1
8,"=""010040006""",[nan],1
9,"=""010040007""",[nan],1


In [17]:
new_df.to_csv('.\\Outputs\\davis_extended2.csv')

In [13]:
new_df[new_df['count']>1]

Unnamed: 0,ACCOUNTNO,des_all,count
336,010220018,"[Light Commercial Utility, Light Commercial Ut...",3
337,010220020,"[Bi Level, 2 Story]",2
339,010220022,"[Barn, Barn, Barn]",3
398,010220111,"[Ranch 1 Story, Ranch 1 Story]",2
635,010280010,"[Barn, Ranch 1 Story, Barn]",3
...,...,...,...
113242,145110001,"[2 Story, Bath Houses]",2
113275,145190001,"[Service Garage, Single Wide]",2
113513,145390102,"[Ranch 1 Story, Ranch 1 Story]",2
113599,145470001,"[Mini Warehouse, Mini Warehouse, Mini Warehous...",5


## Review, then proceed

In [29]:
parcels = r'E:\Projects\REMM-Input-Data-Prep-2019\Parcels\2020-Davis\Inputs\Davis_County_LIR_Parcels.gdb\Parcels_Davis_LIR'
parcels = pd.DataFrame.spatial.from_featureclass(parcels)
csv = pd.read_csv(r"E:\Projects\REMM-Input-Data-Prep-2019\Parcels\2020-Davis\Outputs\davis_extended_qaqc.csv")
csv['ACCOUNTNO'] = csv['ACCOUNTNO'].astype(str).map(add_leading_zeroes)
merged = parcels.merge(csv, left_on='PARCEL_ID', right_on='ACCOUNTNO', how='left')

In [30]:
merged = merged[['PARCEL_ID', 'ACCOUNTNO','PROP_CLASS', 'PRIMARY_RES', 'TAXEXEMPT_TYPE', 'des_all', 'count', 'class', 'notes']]
merged['ACCOUNTNO'] = merged['ACCOUNTNO'].astype(str)
merged['ACCOUNTNO'] = merged['ACCOUNTNO'].apply('="{}"'.format)
merged.to_csv('.\\Outputs\\davis_extended_qaqc2.csv')

## Review, then proceed

In [18]:
# read in csv
parcels = pd.read_csv(r"E:\Projects\REMM-Input-Data-Prep-2019\Parcels\2020-Davis\Outputs\davis_extended_qaqc_reviewed.csv",
                      dtype={'class': str, 'notes': str})

# unclasses parcels
parcels_unclassed = parcels[parcels['class'].isnull()].copy()

In [19]:
# apartment,industrial,single_family,retail,office,open_space,government,churches,group_quarters,healthcare,agriculture,
#mobile_home_park

#(parcels_unclassed['class'].isnull() == True) &

# agriculture
agriculture = ['Arch-Rib (Quonset) Farm Utility Bldg']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['PROP_CLASS'] == 'Commercial') &
                      (parcels_unclassed['des_all'].str.contains('|'.join(agriculture)) == True), 'class'] = "agriculture"

agriculture = ['Barn']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['PROP_CLASS'] == 'Vacant Land') &
                      (parcels_unclassed['des_all'].str.contains('|'.join(agriculture)) == True), 'class'] = "agriculture"

# churches
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                       (parcels_unclassed['TAXEXEMPT_TYPE'] == 'YES') & 
                      (parcels_unclassed['des_all'].str.contains('Church')== True), 'class'] = "churches"

# industrial
industrial = ['Service Garage','Mini Warehouse','Storage Warehouse','Distribution Warehouse',
              'Industrial Flex Mall Building','Storage Hanger','Indust Lght Manufacturing','Light Commercial Utility',
              'Loft - Industrial','Maintenance Hanger']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                      parcels_unclassed['des_all'].str.contains('|'.join(industrial)) == True, 'class'] = "industrial"

# single family
single_family = ['Ranch 1 Story', '2 Story','2 Story Split', 'Bi Level', 'Split Level','1.5 Story Fin']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['PROP_CLASS'] == 'Residential') & 
                      (parcels_unclassed['des_all'].str.contains('|'.join(single_family)) == True), 
                      'class'] = "single_family"
# duplex
duplex = ['Duplex One Story','Duplex Two Story']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['PROP_CLASS'] == 'Residential') & 
                      (parcels_unclassed['des_all'].str.contains('|'.join(duplex)) == True), 'class'] = "duplex"

# townhouse
duplex = ['Townhouse One Story','Townhouse Two Story']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['PROP_CLASS'] == 'Residential') & 
                      (parcels_unclassed['des_all'].str.contains('|'.join(duplex)) == True), 'class'] = "townhouse"

# retail
retail = ['Neighborhood Shopping Center','Retail Store', 'Complete Auto Dealership','Car Wash - Drive thru',
          'Car Wash - Self Service','Mini-Mart Convenience Stores','Health Club', 'Bank', 'Restaurant','Bar/Tavern',
          'Automotive Center','Day Care Center','Car Wash - Full Service','Fast Food Restaurant','Barber/Beauty Shop']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) &
                      (parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['des_all'].str.contains('|'.join(retail)) == True), 'class'] = "retail"

# healthcare
healthcare = ['Hospital', 'Medical Offices']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) &
                      (parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['des_all'].str.contains('|'.join(healthcare)) == True), 'class'] = "healthcare"

# group home
group_home =  ['Group Care Homes','Convlsnt Hosp Nursing Home']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) &
                      (parcels_unclassed['class'].isnull() == True) &
                      (parcels_unclassed['des_all'].str.contains('|'.join(group_home)) == True), 'class'] = "group_home"

# apartment
apartment =  ['Multiple - Residential']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) &
                      (parcels_unclassed['PROP_CLASS'] == 'Commercial') &
                      (parcels_unclassed['des_all'].str.contains('|'.join(apartment)) == True), 'class'] = "apartment"  
# government
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) &
                      (parcels_unclassed['PROP_CLASS'] == 'Commercial') &
                      (parcels_unclassed['TAXEXEMPT_TYPE'] == 'YES') & 
                      (parcels_unclassed['des_all'].str.contains('Church') == False), 'class'] = "government"
# utility 
utility = ['Light Commercial Utility']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) &
                      (parcels_unclassed['TAXEXEMPT_TYPE'] == 'YES') & 
                      (parcels_unclassed['des_all'].str.contains('|'.join(utility)) == True), 'class'] = "utility"


# mobile_home_park
mobile_home_park = ['Mobile Home Parks  *CODE']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['PROP_CLASS'] == 'Commercial') &
                      (parcels_unclassed['des_all'].str.contains('|'.join(mobile_home_park)) == True), 
                      'class'] = "mobile_home_park"

# vacant
mobile_home_park = ['Mobile Home Parks  *CODE']
parcels_unclassed.loc[(parcels_unclassed['class'].isnull() == True) & 
                      (parcels_unclassed['PROP_CLASS'] == 'Vacant Land'), 
                      'class'] = "vacant"


  return func(self, *args, **kwargs)


In [16]:
parcels_unclassed.to_csv('.\\Outputs\\davis_extended_qaqc3.csv')

In [17]:
#check remaining
parcels_unclassed[parcels_unclassed['class'].isnull()]

Unnamed: 0,PARCEL_ID,ACCOUNTNO,PROP_CLASS,PRIMARY_RES,TAXEXEMPT_TYPE,des_all,count,class,notes
656,11310036.0,11310036.0,Commercial,N,NO,"['Indust Lght Manufacturing', 'Loft - Industri...",2.0,,
667,60830047.0,60830047.0,Commercial,N,NO,"['Storage Hanger', 'Storage Hanger']",2.0,,
669,60940012.0,60940012.0,Commercial,N,NO,"['Office Building', 'Storage Garage']",2.0,,
674,60860001.0,60860001.0,Commercial,N,NO,"['Light Commercial Utility', 'Light Commercial...",2.0,,
675,60880072.0,60880072.0,Commercial,N,NO,"['Storage Hanger', 'Office Building']",2.0,,
...,...,...,...,...,...,...,...,...,...
119352,,,,,,,,,
119353,,,,,,,,,
119354,,,,,,,,,
119355,,,,,,,,,
