### Annual Sales Tax Revenue Analysis

#### [Data Source](https://tax.utah.gov/econstats/sales)

#### Notes:


In [2]:
import arcpy
from arcpy import env
import os
import numpy as np
from arcgis import GIS
from arcgis.features import GeoAccessor
from arcgis.features import GeoSeriesAccessor
import pandas as pd
import glob

arcpy.env.overwriteOutput = True
arcpy.env.parallelProcessingFactor = "90%"

# show all columns
pd.options.display.max_columns = None

# pd.DataFrame.spatial.from_featureclass(???)  
# df.spatial.to_featureclass(location=???,sanitize_columns=False)  

# gsa = arcgis.features.GeoSeriesAccessor(df['SHAPE'])  
# df['AREA'] = gsa.area  # KNOW YOUR UNITS

In [3]:
if not os.path.exists('Outputs'):
    os.makedirs('Outputs')
    
outputs = ['.\\Outputs', 'Utah.gdb']
gdb = os.path.join(outputs[0], outputs[1])

if not arcpy.Exists(gdb):
    arcpy.CreateFileGDB_management(outputs[0], outputs[1])


In [4]:
# read in cities as spatial dataframe
global cities_sdf
cities_sdf = pd.DataFrame.spatial.from_featureclass(r".\Inputs\Cities_v2.shp")
cities_sdf = cities_sdf[['NAME','POPLASTCEN','POPLASTEST', 'ACRES','SHAPE']].copy()
cities_sdf.rename({'NAME':'City'}, axis=1, inplace=True)

In [5]:
# dictionary for renaming tax revenue sectors
global sector_lu
sector_lu= {
              'ACCOMMODATION (721)':'L_ACCOMMODATION',
              'ADMIN. & SUPPORT & WASTE MANAG. & REMED. SERVICES (56)':'S_ADMIN_SUPPORT',
              'AGRICULTURE, FORESTRY, FISHING & HUNTING (11)':'A_AG_WILDLIFE',
              'ARTS, ENTERTAINMENT AND RECREATION (71)':'L_CULTURAL_REC', 
              'CONSTRUCTION (23)':'C_CONSTRUCTION',
              'EDUCATIONAL SERVICES (61)':'E_EDUCATION', 
              'FINANCE & INSURANCE (52)':'S_FINANCIAL',
              'FOOD SERVICES & DRINKING PLACES (722)':'L_RSTRNT_BAR',
              'HEALTH CARE & SOCIAL ASSISTANCE (62)':'H_HEALTH_CARE', 
              'INFORMATION (51)':'S_IT_DATA_20',
              'MANAGEMENT OF COMPANIES & ENTERPRISES (55)':'S_CORPORATE_MGMT', 
              'MANUFACTURING (31-33)':'M_MANUFACTURING',
              'MINING, QUARRYING, & OIL & GAS EXTRACTION (21)':'E_EXTRACTION',
              'OTHER SERVICES-EXCEPT PUBLIC ADMINISTRATION (81)':'O_OTHER',
              'PRIOR-PERIOD PAYMENTS & REFUNDS':'X_ADJUSTMENTS', 
              'PRIVATE MOTOR VEHICLE SALES':'R_AUTO_PRIVATE',
              'PROFESSIONAL, SCIENTIFIC & TECHNICAL SERVICES (54)':'S_PROF_TECH_SRV',
              'PUBLIC ADMINISTRATION (92)':'G_GOVERNMENT', 
              'REAL ESTATE, RENTAL & LEASING (53)':'S_RENTAL_LEASING',
              'RETAIL-BUILD. MATERIAL, GARDEN EQUIP. & SUPPLIES DEALERS (444)':'R_BUILDING_SUPPLY',
              'RETAIL-CLOTHING & CLOTHING ACCESSORIES STORES (448)':'R_CLOTHING',
              'RETAIL-CLOTHING & CLOTHING ACCESSORIES STORES (448,458)':'R_CLOTHING',
              'RETAIL-ELECTRONICS & APPLIANCE STORES (443)':'R_ELECTRONICS',
              'RETAIL-ELECTRONICS & APPLIANCE STORES (443,4492)':'R_ELECTRONICS',
              'RETAIL-FOOD & BEVERAGE STORES (445)':'R_GROCERY_BEV',
              'RETAIL-FURNITURE & HOME FURNISHINGS STORES (442)':'R_FURNITURE',
              'RETAIL-FURNITURE & HOME FURNISHINGS STORES (442,4491)':'R_FURNITURE',
              'RETAIL-GASOLINE STATIONS (447)':'R_GAS_STATIONS',
              'RETAIL-GASOLINE STATIONS (447,457)': 'R_GAS_STATIONS',
              'RETAIL-GENERAL MERCHANDISE STORES (452)':'R_GENERAL_RETAIL',
              'RETAIL-GENERAL MERCHANDISE STORES (452,455)':'R_GENERAL_RETAIL',
              'RETAIL-HEALTH & PERSONAL CARE STORES (446)':'R_HEALTH_RETAIL',
              'RETAIL-HEALTH & PERSONAL CARE STORES (446,456)':'R_HEALTH_RETAIL',
              'RETAIL-MISCELLANEOUS STORE RETAILERS (453)':'R_OTHER_RETAIL',
              'RETAIL-MISCELLANEOUS STORE RETAILERS (453,4593-4599)':'R_OTHER_RETAIL',
              'RETAIL-MOTOR VEHICLE & PARTS DEALERS (441)':'R_AUTO_RETAIL',
              'RETAIL-NONSTORE RETAILERS (454)':'R_NONSTORE_RETAIL',
              'RETAIL-SPORTING GOODS, HOBBY, MUSIC & BOOK STORES (451)':'R_SPORT_HOBBY',
              'RETAIL-SPORTING GOODS, HOBBY, MUSIC & BOOK STORES (451,4591,4592)':'R_SPORT_HOBBY',
              'SPECIAL EVENT SALES':'L_SPECIAL_EVENT', 
              'TRANSPORTATION & WAREHOUSING (48-49)':'W_DISTRIBUTION',
              'UNKNOWN/NONCLASSIFIABLE':'X_UNKNOWN', 
              'UTILITIES (22)':'U_UTILITIES',
              'WHOLESALE TRADE-DURABLE GOODS (423)':'W_WHLSALE_DURABLE',
              'WHOLESALE TRADE-ELECTRONIC MARKETS (425)':'S_WHLSLE_ETRADE',
              'WHOLESALE TRADE-AGENTS & BROKERS (425)':'S_WHLSLE_ETRADE',
              'WHOLESALE TRADE-NONDURABLE GOODS (424)':'W_WHLSLE_NDURABLE'            
}

In [6]:
# fill NA values in Spatially enabled dataframes (ignores SHAPE column)
def fill_na_sedf(df_with_shape_column, fill_value=0):
    if 'SHAPE' in list(df_with_shape_column.columns):
        df = df_with_shape_column.copy()
        shape_column = df['SHAPE'].copy()
        del df['SHAPE']
        return df.fillna(fill_value).merge(shape_column,left_index=True, right_index=True, how='inner')
    else:
        raise Exception("Dataframe does not include 'SHAPE' column")

In [7]:
## This version export each year individually
# def process_annual_sales_to_shape(_xlsx, _gdb):
    
#     year = os.path.split(_xlsx)[-1][:4]
#     tsr = pd.read_excel(_xlsx, sheet_name='Table 9', header=5)

#     # Drop last empty two rows
#     tsr.drop(tsr.tail(2).index,inplace=True)

#     # forward fill values from merged cells
#     tsr['County'] = tsr['County'].fillna(method='ffill')
#     tsr['Location Code'] = tsr['Location Code'].fillna(method='ffill')
#     tsr['City'] = tsr['City'].fillna(method='ffill')

#     # Figures with less than 10 taxpayers have been rounded up per Tax Commission disclosure rules.
#     # Rename column and convert values to boolean
#     tsr.rename(columns={'Unnamed: 5':'Rounded_Up'}, inplace=True)
#     crosswalk = {'*':True, np.nan:False}
#     tsr['Rounded_Up'] = tsr['Rounded_Up'].map(crosswalk)

#     # pivot the table to get sectors as columns with tax values by city
#     tsr_by_sector = pd.pivot_table(tsr,values = f'CY{year}', index ='City', columns = 'Economic Sector (NAICS Code)', 
#                                     aggfunc='first').reset_index()

#     # fill NAs with 0
#     tsr_by_sector = tsr_by_sector.fillna(0)

#     # rename sectors
#     _sector_lu = sector_lu
#     _sector_lu = {k:v + f"_{year[-2:]}" for (k,v) in _sector_lu.items()}
#     tsr_by_sector.rename(_sector_lu, axis=1, inplace=True)

#     # get the total sales tax revenue
#     tsr_by_sector['TOTAL'] = tsr_by_sector[_sector_lu.values()].sum(axis=1)

#     # export
#     tsr_by_sector_sdf = cities_sdf.merge(tsr_by_sector, on='City', how='inner')
#     return tsr_by_sector_sdf.spatial.to_featureclass(location=os.path.join(_gdb,f'Taxable_Sales_by_City_{year}'),sanitize_columns=False)


In [8]:
## this version is for horizontal concatenation of records 
# def process_annual_sales_to_df(_xlsx):
    
#     year = os.path.split(_xlsx)[-1][:4]
#     tsr = pd.read_excel(_xlsx, sheet_name='Table 9', header=5)

#     # Drop last empty two rows
#     tsr.drop(tsr.tail(2).index,inplace=True)

#     # forward fill values from merged cells
#     tsr['County'] = tsr['County'].fillna(method='ffill')
#     tsr['Location Code'] = tsr['Location Code'].fillna(method='ffill')
#     tsr['City'] = tsr['City'].fillna(method='ffill')

#     # Figures with less than 10 taxpayers have been rounded up per Tax Commission disclosure rules.
#     # Rename column and convert values to boolean
#     tsr.rename(columns={'Unnamed: 5':'Rounded_Up'}, inplace=True)
#     crosswalk = {'*':True, np.nan:False}
#     tsr['Rounded_Up'] = tsr['Rounded_Up'].map(crosswalk)

#     # pivot the table to get sectors as columns with tax values by city
#     tsr_by_sector = pd.pivot_table(tsr,values = f'CY{year}', index ='City', columns = 'Economic Sector (NAICS Code)', 
#                                     aggfunc='first').reset_index()

#     # fill NAs with 0
#     tsr_by_sector = tsr_by_sector.fillna(0)

#     # rename sectors
#     _sector_lu = sector_lu
#     _sector_lu = {k:v + f"_{year[-2:]}" for (k,v) in _sector_lu.items()}
#     tsr_by_sector.rename(_sector_lu, axis=1, inplace=True)

#     # get the total sales tax revenue
#     tsr_by_sector[f'TOTAL_{year[-2:]}'] = tsr_by_sector[_sector_lu.values()].sum(axis=1)

#     # export
#     return tsr_by_sector

In [18]:
## this version is for vertical concatenation of records 
def process_annual_sales_to_df2(_xlsx):
    
    year = os.path.split(_xlsx)[-1][:4]
    tsr = pd.read_excel(_xlsx, sheet_name='Table 9', header=5)

    # Drop last empty two rows
    tsr.drop(tsr.tail(2).index,inplace=True)

    # forward fill values from merged cells
    tsr['County'] = tsr['County'].fillna(method='ffill')
    tsr['Location Code'] = tsr['Location Code'].fillna(method='ffill')
    tsr['City'] = tsr['City'].fillna(method='ffill')

    # Figures with less than 10 taxpayers have been rounded up per Tax Commission disclosure rules.
    # Rename column and convert values to boolean
    tsr.rename(columns={'Unnamed: 5':'Rounded_Up'}, inplace=True)
    crosswalk = {'*':True, np.nan:False}
    tsr['Rounded_Up'] = tsr['Rounded_Up'].map(crosswalk)

    # pivot the table to get sectors as columns with tax values by city
    tsr_by_sector = pd.pivot_table(tsr,values = f'CY{year}', index ='City', columns = 'Economic Sector (NAICS Code)', 
                                    aggfunc='first').reset_index()

    # fill NAs with 0
    tsr_by_sector = tsr_by_sector.fillna(0)

    # rename sectors
    tsr_by_sector.rename(sector_lu, axis=1, inplace=True)

    # get the total sales tax revenue
    tsr_by_sector['TOTAL'] = tsr_by_sector[sector_lu.values()].sum(axis=1)

    # add the year
    tsr_by_sector['YEAR'] = year

    # export
    tsr_by_sector_sdf = cities_sdf.merge(tsr_by_sector, on='City', how='inner')
    return tsr_by_sector_sdf

In [21]:
# gather annual taxable sales reports
reports = glob.glob('.\\Inputs\\*-annual-sales.xlsx')
reports = [r for r in reports if '~' not in r] # in case the spreadsheet is open

base = cities_sdf

# export to shape
tsr_dataframes = [process_annual_sales_to_df2(r) for r in reports]
tsr_complete = pd.concat(tsr_dataframes)
tsr_complete.spatial.to_featureclass(location=os.path.join(gdb,'Taxable_Sales_by_City'),sanitize_columns=False)

'e:\\Projects\\UT_Sales_Tax_Analysis\\Outputs\\Utah.gdb\\Taxable_Sales_by_City'

In [20]:
tsr_complete

Unnamed: 0,City,POPLASTCEN,POPLASTEST,ACRES,SHAPE,L_ACCOMMODATION,S_ADMIN_SUPPORT,A_AG_WILDLIFE,L_CULTURAL_REC,C_CONSTRUCTION,E_EDUCATION,S_FINANCIAL,L_RSTRNT_BAR,H_HEALTH_CARE,S_IT_DATA_20,S_CORPORATE_MGMT,M_MANUFACTURING,E_EXTRACTION,O_OTHER,X_ADJUSTMENTS,R_AUTO_PRIVATE,S_PROF_TECH_SRV,G_GOVERNMENT,S_RENTAL_LEASING,R_BUILDING_SUPPLY,R_CLOTHING,R_ELECTRONICS,R_GROCERY_BEV,R_FURNITURE,R_GAS_STATIONS,R_GENERAL_RETAIL,R_HEALTH_RETAIL,R_OTHER_RETAIL,R_AUTO_RETAIL,R_NONSTORE_RETAIL,R_SPORT_HOBBY,L_SPECIAL_EVENT,W_DISTRIBUTION,X_UNKNOWN,U_UTILITIES,S_WHLSLE_ETRADE,W_WHLSALE_DURABLE,W_WHLSLE_NDURABLE,TOTAL,YEAR
0,Nephi,6443,6600,3511.590088,"{'rings': [[[-111.85610495042422, 39.710138593...",2750000.0,62128.0,0.0,250000.0,969216.0,4000.0,178241.0,10174507.0,200000.0,3465416.0,60000.0,4045836.0,80000.0,2508399.0,2065629.0,3000000.0,184695.0,4500000.0,1403138.0,2718280.0,525328.0,119370.0,10056645.0,396823.0,9500000.0,4645022.0,148502.0,1866350.0,8448341.0,2466674.0,677585.0,250000.0,15000.0,0.0,500000.0,20000.0,3023290.0,1341995.0,1.005194e+08,2017
1,South Ogden,17488,17541,2502.370117,"{'rings': [[[-111.95641781765417, 41.176085854...",25000.0,300521.0,200000.0,2500000.0,7568127.0,100000.0,656035.0,36982415.0,4244108.0,15222850.0,0.0,3714471.0,1000.0,6553704.0,1856372.0,5750000.0,1602551.0,5000.0,3313691.0,70000.0,1823488.0,911011.0,26283651.0,4486662.0,3250000.0,172820745.0,1737007.0,8167461.0,49553110.0,5538798.0,2719989.0,150000.0,15000.0,0.0,14500000.0,20000.0,4054262.0,700923.0,5.833343e+08,2017
2,Riverdale,9343,9409,2947.399902,"{'rings': [[[-112.01546541078616, 41.181683699...",1750000.0,327429.0,1000.0,1250000.0,1131932.0,6000.0,1687729.0,55271889.0,9629.0,13131730.0,1000.0,5053907.0,0.0,1912138.0,2454521.0,3500000.0,1007146.0,0.0,45415404.0,65505952.0,48070361.0,40125454.0,5071044.0,62318521.0,4250000.0,199003188.0,10620818.0,20674399.0,156976670.0,6225350.0,39189588.0,600000.0,40000.0,-18000.0,10250000.0,70000.0,6271222.0,1123893.0,1.234602e+09,2017
3,Midvale,36028,35938,3746.040039,"{'rings': [[[-111.90340997367319, 40.628726352...",18251958.0,2473465.0,7000.0,20159033.0,8354659.0,179552.0,5407958.0,101902851.0,1249804.0,31826022.0,6000.0,22750659.0,250000.0,29679678.0,12280599.0,8000000.0,5202265.0,25000.0,9056381.0,43196018.0,32795083.0,26414122.0,88625036.0,12738091.0,30750000.0,52019070.0,9756049.0,25069602.0,59776219.0,15491328.0,42575783.0,200000.0,600000.0,1000.0,29500000.0,297087.0,57072433.0,9288546.0,1.045643e+09,2017
4,Salt Lake City,199723,200478,71275.101562,"{'rings': [[[-111.89114449805619, 40.821831692...",347524422.0,46772157.0,455914.0,155675064.0,143331120.0,38220978.0,77715338.0,824218048.0,13406445.0,331810903.0,1546907.0,445673486.0,13838364.0,216038850.0,98142350.0,67250000.0,220041587.0,7250000.0,448832180.0,308258125.0,270865828.0,141201329.0,517907488.0,86439051.0,114692578.0,515030173.0,94911760.0,303144765.0,772896502.0,187721729.0,90774895.0,17089750.0,19837201.0,1188070.0,264250000.0,9662632.0,858531748.0,157235544.0,9.856106e+09,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,Morgan City,4071,4223,1895.109985,"{'rings': [[[-111.69708858883418, 41.045760146...",900000.0,166174.0,10000.0,200000.0,1521770.0,80737.0,251397.0,2558668.0,200000.0,3315058.0,15000.0,986938.0,300000.0,4552532.0,409457.0,4000000.0,1308374.0,1750000.0,2265823.0,7410515.0,1868983.0,530739.0,15371313.0,228674.0,1500000.0,1806442.0,465215.0,2486657.0,49711376.0,11076435.0,647481.0,40000.0,15000.0,0.0,1250000.0,95701.0,2420707.0,507810.0,1.318549e+08,2021
61,Provo,115162,114084,28262.099609,"{'rings': [[[-111.63320497106623, 40.208391175...",37141422.0,9666963.0,101638.0,15817929.0,20130702.0,16133460.0,7089440.0,186736985.0,8377550.0,87537630.0,200000.0,35376433.0,600000.0,64314398.0,30394986.0,40250000.0,93998369.0,8500000.0,32729527.0,141038649.0,52964523.0,44917238.0,110693889.0,15849180.0,31586723.0,155628525.0,22255015.0,78153497.0,190187917.0,209657960.0,28578700.0,2568483.0,6267871.0,0.0,70000000.0,3320336.0,112957943.0,42811126.0,2.447789e+09,2021
62,West Bountiful,5917,5957,2109.620117,"{'rings': [[[-111.92724476200317, 40.903454465...",2250000.0,81387.0,200000.0,1500000.0,401827.0,40135.0,343247.0,24215413.0,45000.0,6929122.0,15000.0,3136166.0,4000.0,2692505.0,2614079.0,4250000.0,1486867.0,3000.0,2140361.0,35622706.0,20798302.0,561888.0,4000.0,7146871.0,2500000.0,178907131.0,1992061.0,9185691.0,46804866.0,9467907.0,5819609.0,0.0,200000.0,0.0,5250000.0,326573.0,2555212.0,813590.0,6.075426e+08,2021
63,Kearns Township,36723,36747,2961.679932,"{'rings': [[[-111.98660579700216, 40.667567316...",0.0,499883.0,3000.0,1500000.0,878205.0,52601.0,757706.0,20215897.0,30000.0,6950504.0,20000.0,21323856.0,35000.0,4148036.0,1779533.0,12750000.0,3980229.0,0.0,3086827.0,4455546.0,1842419.0,3168332.0,49799524.0,397346.0,10500000.0,9445608.0,707646.0,2361519.0,5595955.0,37544735.0,765662.0,0.0,80000.0,0.0,17500000.0,211045.0,3092878.0,730165.0,2.556092e+08,2021


# Graveyard

In [11]:
# # read in taxable sales report (excel format)
# xlsx = '.\\Inputs\\2021-annual-sales.xlsx'
# year = os.path.split(xlsx)[-1][:4]
# tsr = pd.read_excel(xlsx, sheet_name='Table 9', header=5)

In [12]:
# # Drop last empty two rows
# tsr.drop(tsr.tail(2).index,inplace=True)

# # forward fill values from merged cells
# tsr['County'] = tsr['County'].fillna(method='ffill')
# tsr['Location Code'] = tsr['Location Code'].fillna(method='ffill')
# tsr['City'] = tsr['City'].fillna(method='ffill')

In [13]:
# # Figures with less than 10 taxpayers have been rounded up per Tax Commission disclosure rules.
# # Rename column and convert values to boolean
# tsr.rename(columns={'Unnamed: 5':'Rounded_Up'}, inplace=True)
# crosswalk = {'*':True, np.nan:False}
# tsr['Rounded_Up'] = tsr['Rounded_Up'].map(crosswalk)

In [14]:
# # pivot the table to get sectors as columns with tax values by city
# tsr_by_sector = pd.pivot_table(tsr,values = f'CY{year}', index ='City', columns = 'Economic Sector (NAICS Code)', 
#                                  aggfunc='first').reset_index()

# # fill NAs with 0
# tsr_by_sector = tsr_by_sector.fillna(0)

In [15]:
# # rename sectors
# tsr_by_sector.rename(sector_lu, axis=1, inplace=True)

# # get the total sales tax revenue
# tsr_by_sector['TOTAL'] = tsr_by_sector[sector_lu.values()].sum(axis=1)
# tsr_by_sector

In [16]:
# cities_sdf = pd.DataFrame.spatial.from_featureclass(r".\Inputs\Cities_v2.shp")
# cities_sdf = cities_sdf[['NAME','POPLASTCEN','POPLASTEST','SHAPE']].copy()
# cities_sdf.rename({'NAME':'City'}, axis=1, inplace=True)
# tsr_by_sector_sdf = cities_sdf.merge(tsr_by_sector, on='City', how='inner')
# tsr_by_sector_sdf.spatial.to_featureclass(location=os.path.join(gdb2,f'Taxable_Sales_by_City_{year}'),sanitize_columns=False)

### Calculate the differences 
*2020 - 2019 = Diff*