[Google Slides](https://docs.google.com/presentation/d/1z1-FUk38IkFvajrvxrwLaWH1sBXmr-ON3qIRbmMULOA/edit#slide=id.p)

In [23]:
import arcpy
from arcpy import env
import os
import numpy as np
from arcgis import GIS
from arcgis.features import GeoAccessor
from arcgis.features import GeoSeriesAccessor
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

arcpy.env.overwriteOutput = True
arcpy.env.parallelProcessingFactor = "90%"

# show all columns
pd.options.display.max_columns = None

# read in parcel data

In [None]:
parcels = pd.DataFrame.spatial.from_featureclass(r'E:\Tasks\Housing_Unit_Inventory_Update\2024-Davis\Outputs\parcels_davis_2024.gdb\davis_new_housing_2022_2023_2024')
parcels.head()

# get list of parcel ids

In [None]:
parcels_no_year = parcels[(parcels['BUILT_YR'].isna() == True) & (parcels['PARCEL_ID'].isna() == False) & (parcels['KEEP'] == 1)]
ids = list(set(parcels_no_year['PARCEL_ID'].to_list()))
len(ids)

In [14]:
# base_url = r'https://webportal.daviscountyutah.gov/App/PropertySearch/dashboard/parcel/'
# parcel_id = '086770328'
# full_url = base_url + parcel_id
# full_url

'https://webportal.daviscountyutah.gov/App/PropertySearch/dashboard/parcel/086770328'

# main

In [50]:
# remove duplicates
ids = list(set(parcels_no_year['PARCEL_ID'].to_list()))

# store base url
base_url = r'https://webportal.daviscountyutah.gov/App/PropertySearch/dashboard/parcel/'

#empty list
results = []

for i in ids:

    try:
        full_url = base_url + i
        service = Service(executable_path='./chromedriver.exe')
        options = webdriver.ChromeOptions()
        driver = webdriver.Chrome(service=service, options=options)
        driver.get(full_url)
        
        # wait for dynamic elements to load (seconds)
        driver.implicitly_wait(10)

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        # used copy full xpath in the console to get this
        builtas = driver.find_element(By.XPATH, '''/html/body/app/vertical-layout-1/div/div/div/div/content/app-parcel-dashboard/div/div/div/div/div/fuse-widget[1]/div[1]/div[2]/table/tbody/tr[2]/td[2]/span''')
        agsqft = driver.find_element(By.XPATH, '''/html/body/app/vertical-layout-1/div/div/div/div/content/app-parcel-dashboard/div/div/div/div/div/fuse-widget[1]/div[1]/div[2]/table/tbody/tr[4]/td[2]/span''')
        year = driver.find_element(By.XPATH, """/html/body/app/vertical-layout-1/div/div/div/div/content/app-parcel-dashboard/div/div/div/div/div/fuse-widget[1]/div[1]/div[2]/table/tbody/tr[12]/td[2]/span""")
        
        # add results to empty list
        results.append([i, year.text, builtas.text, agsqft.text])

        
    except:
        print(f'something went wrong with {i}')

# read into pandas and export
df = pd.DataFrame(results, columns=['parcel_id', 'year', 'description', 'sqft'])
df.to_csv('davis_data.csv', index=False)

In [51]:
results

[['063441047', '2013', '2 Story', '2802'],
 ['063461121', '2013', '2 Story', '2891'],
 ['062710429', '2007', 'Bi Level', '1495'],
 ['151310021', '2024', 'Permit Value', '1522'],
 ['063490069', '2014', '2 Story', '1380'],
 ['151340011', '1972', 'Bi Level', '1253'],
 ['118970103', '2022', '2 Story', '2463'],
 ['062810845', '2009', 'Ranch 1 Story', '1445'],
 ['119140232', '2023', '2 Story', '3074'],
 ['064010133', '2023', 'Ranch 1 Story', '2774'],
 ['145860315', '2022', 'Ranch 1 Story', '2016'],
 ['062810817', '2009', '2 Story', '2396'],
 ['063371029', '2013', '2 Story', '2539'],
 ['062790123', '2007', '2 Story', '1879'],
 ['063511049', '2013', '2 Story', '2842'],
 ['086620001', '2022', 'Ranch 1 Story', '2861'],
 ['064210022', '2021', '2 Story', '1955'],
 ['015060222', '2023', '2 Story', '3220'],
 ['062810807', '2009', 'Ranch 1 Story', '1522'],
 ['064420721', '2022', '2 Story', '2417'],
 ['062790144', '2009', '2 Story', '2258'],
 ['063601203', '2014', '2 Story', '3052'],
 ['119140210', '2

# merge results back to parcel

In [71]:
parcels_new = parcels.merge(df, on='PARCEL_ID', how='left')
parcels_new['YEAR_NEW'] = pd.to_numeric(parcels_new['YEAR_NEW'], errors='coerce').astype('Int32')
parcels_new['SQFT_NEW'] = pd.to_numeric(parcels_new['SQFT_NEW'], errors='coerce').astype('Int32')

parcels_new.loc[((parcels_new['BUILT_YR'].isna()==True) | (parcels_new['BUILT_YR'] == '')) &  ((parcels_new['YEAR_NEW'].isna()==False) | (parcels_new['YEAR_NEW'] != '')), 'BUILT_YR'] = parcels_new['YEAR_NEW']
parcels_new.loc[((parcels_new['BLDG_SQFT'].isna()==True) | (parcels_new['BLDG_SQFT'] == '')) &  (parcels_new['SQFT_NEW'].isna()==False) | ((parcels_new['SQFT_NEW'] != '')), 'BLDG_SQFT'] = parcels_new['SQFT_NEW']

parcels_new.spatial.to_featureclass(location=r'E:\Tasks\Housing_Unit_Inventory_Update\2024-Davis\Outputs\parcels_davis_2024.gdb\davis_new_housing_v2024',sanitize_columns=False)  

'E:\\Tasks\\Housing_Unit_Inventory_Update\\2024-Davis\\Outputs\\parcels_davis_2024.gdb\\davis_new_housing_v2024'