In [81]:
import pandas as pd
import os
import geopandas as gpd
#import pyproj


In [95]:
# GLOBAL VARIABLES

# excel spreadsheet copied to data/udot folder from https://drive.google.com/file/d/1rDXm0ObugGR1zXgWUuVbzWHNt-Xs1xru/view
fnExcelAADTHistory = 'data/udot/AADTHistory.xlsx'

# Replace 'path/to/your/shapefile.shp' with the actual path to your shapefile, copied from 'A:/1 - TDM/3 - Model Dev/1 - WF/1 - Official Release/v9x/v9.0/WF TDM v9.0 - official/1_Inputs/6_Segment/Segments_WF - 2023-08-01.shp'
fnSegmentsShapefile = 'data/segments/Segments_WF - 2023-08-01.shp'

# filter by PLANAREA in segments shapefile
filterPlanArea = ['WFRC','MAG']

# Get AADT from Segment Shapefile

In [114]:
# read in segment shapefile
gdfSegments = gpd.read_file(fnSegmentsShapefile)
display(gdfSegments.head(2))

# show columns
print(gdfSegments.columns.tolist())

Unnamed: 0,Id,SEGID,BMP,EMP,DISTANCE,FULLNAME,FNCLASS,FC_CODE,FC_GROUP,FC_NAME,...,ForecastSo,Shape_Leng,NAME,Shape_Le_1,OBJECTID,DIRECTION,LINE,ROUTE,Shape_Le_2,geometry
0,0,0006_000.0,0.0,0.665,0.666641,HWY 6,Arterial,3,Arterial,Other Principal Arterial,...,Utah_Statewide_Traffic_Volume_Historic_and_For...,1072.856212,,0.0,0,,,,0.0,"LINESTRING (236177.700 4327541.250, 237241.180..."
1,0,0006_000.7,0.665,16.022,15.369839,HWY 6,Arterial,3,Arterial,Other Principal Arterial,...,Utah_Statewide_Traffic_Volume_Historic_and_For...,24735.407823,,0.0,0,,,,0.0,"LINESTRING (237241.180 4327399.720, 238150.600..."


['Id', 'SEGID', 'BMP', 'EMP', 'DISTANCE', 'FULLNAME', 'FNCLASS', 'FC_CODE', 'FC_GROUP', 'FC_NAME', 'CO_FIPS', 'CO_NAME', 'PLANAREA', 'X', 'Y', 'ONLRS', 'T_MODEL', 'USE', 'RT_NUM', 'AADT2019', 'AADT2018', 'AADT2017', 'AADT2016', 'AADT2015', 'AADT2014', 'AADT2013', 'AADT2012', 'AADT2011', 'AADT2010', 'AADT2009', 'AADT2008', 'AADT2007', 'AADT2006', 'AADT2005', 'AADT2004', 'AADT2003', 'AADT2002', 'AADT2001', 'AADT2000', 'AADT1999', 'AADT1998', 'AADT1997', 'AADT1996', 'AADT1994', 'AADT1993', 'AADT1992', 'AADT1991', 'AADT1990', 'AADT1989', 'AADT1988', 'AADT1987', 'AADT1986', 'AADT1985', 'AADT1984', 'AADT1983', 'AADT1982', 'AADT1981', 'CCSGROUP', 'FAC_MON', 'FAC_TUE', 'FAC_WED', 'FAC_THU', 'FAC_FRI', 'FAC_SAT', 'FAC_SUN', 'FAC_WDAVG', 'FAC_WEAVG', 'FAC_WEMAX', 'FAC_JAN', 'FAC_FEB', 'FAC_MAR', 'FAC_APR', 'FAC_MAY', 'FAC_JUN', 'FAC_JUL', 'FAC_AUG', 'FAC_SEP', 'FAC_OCT', 'FAC_NOV', 'FAC_DEC', 'FAC_WIN', 'FAC_SPR', 'FAC_SUM', 'FAC_FAL', 'FAC_MAXMO', 'FAC_MAX', 'FACMANADJ', 'SUTRUCKS', 'CUTRUCKS',

In [116]:
# filter segments by PLANAREA
gdfPaSegments = gdfSegments[gdfSegments['PLANAREA'].isin(filterPlanArea)].copy()
display(gdfPaSegments.head(2))

Unnamed: 0,Id,SEGID,BMP,EMP,DISTANCE,FULLNAME,FNCLASS,FC_CODE,FC_GROUP,FC_NAME,...,ForecastSo,Shape_Leng,NAME,Shape_Le_1,OBJECTID,DIRECTION,LINE,ROUTE,Shape_Le_2,geometry
23,0,0006_146.9,146.868,149.902,3.039023,HWY 6,Arterial,4,Arterial,Minor Arterial,...,Utah_Statewide_Traffic_Volume_Historic_and_For...,4890.842458,,0.0,0,,,,0.0,"LINESTRING (413442.550 4422753.728, 413459.600..."
24,0,0006_149.9,149.902,150.58,0.677983,HWY 6,Arterial,4,Arterial,Minor Arterial,...,Utah_Statewide_Traffic_Volume_Historic_and_For...,1091.109505,,0.0,0,,,,0.0,"LINESTRING (418330.800 4422866.000, 418629.100..."


In [129]:
# get columns with AADT at beginning of name
aadt_columns = [col for col in gdfPaSegments.columns if col.startswith('AADT')]

# get dataframe for just segids and aadt columns
_df = gdfPaSegments[['SEGID'] + aadt_columns]

# melt to make long
_df = _df.melt(id_vars='SEGID', var_name='YEAR', value_name='AADT')

# filter out zero volumes
_df = _df[_df['AADT']>0]

# set AADT to int
_df['AADT'] = _df['AADT'].astype(int)

_df

Unnamed: 0,SEGID,YEAR,AADT
0,0006_146.9,AADT2019,1517
1,0006_149.9,AADT2019,2441
2,0006_150.6,AADT2019,2441
3,0006_152.6,AADT2019,2417
4,0006_152.9,AADT2019,3759
...,...,...,...
164120,2899_002.4,AADT1981,2635
164121,2899_003.5,AADT1981,2635
164122,2899_004.1,AADT1981,850
165368,0186_004.6,AADT1981,34200


# Get Historic Data from AADTHistory UDOT Spreadsheet

In [96]:
#import excel spreadsheet
xl_file1 = pd.ExcelFile(fnExcelAADTHistory)
dfs1 = {sheet_name: xl_file1.parse(sheet_name) for sheet_name in xl_file1.sheet_names}

  for idx, row in parser.parse():


In [97]:
#show sheet
display(xl_file1.sheet_names)
dfs1['AADT_2021']

['AADT_2021', 'AADT_Rounded', 'AADT_Unrounded']

Unnamed: 0,ROUTE_ID,FROM_MEASURE,TO_MEASURE,AADT_2021
0,0006PM,0.000,46.038,474
1,0006PM,46.038,77.556,424
2,0006PM,77.556,82.897,609
3,0006PM,82.897,83.911,2272
4,0006PM,83.911,87.694,3852
...,...,...,...,...
4568,0015PC29402,0.000,0.199,8886
4569,0015PC29501,0.000,0.166,1000
4570,0015PC30554,0.000,1.135,35025
4571,0092NC00101,0.000,3.294,12201


In [98]:
# PREPARE 2021 DATA

df2021 = dfs1['AADT_2021']

df2021['YEAR'] = 2021
df2021.rename(columns={'AADT_2021':'AADT'}, inplace=True)
df2021

Unnamed: 0,ROUTE_ID,FROM_MEASURE,TO_MEASURE,AADT,YEAR
0,0006PM,0.000,46.038,474,2021
1,0006PM,46.038,77.556,424,2021
2,0006PM,77.556,82.897,609,2021
3,0006PM,82.897,83.911,2272,2021
4,0006PM,83.911,87.694,3852,2021
...,...,...,...,...,...
4568,0015PC29402,0.000,0.199,8886,2021
4569,0015PC29501,0.000,0.166,1000,2021
4570,0015PC30554,0.000,1.135,35025,2021
4571,0092NC00101,0.000,3.294,12201,2021


In [99]:
# GET AADTS FOR YEARS PRIOR TO 2021

# melt AADT columns to rows
df = dfs1['AADT_Unrounded']

# get all columns that start with 'AADT'
aadt_cols = [col for col in df.columns if col.startswith('AADT')]

# define the id columns that you want to keep
id_vars = ['Route', 'Beg MP', 'End MP']

# melt the DataFrame
dfUnder2021 = df.melt(id_vars=id_vars, value_vars=aadt_cols, var_name='YEAR', value_name='AADT')

# replace 'AADT' in the 'YEAR' column and convert to integer
dfUnder2021['YEAR'] = dfUnder2021['YEAR'].str.replace('AADT', '').astype(int)

# rename columns to match pre-2021 data
dfUnder2021.rename(columns={'Route':'ROUTE_ID','Beg MP':'FROM_MEASURE','End MP':'TO_MEASURE'}, inplace=True)

display(dfUnder2021)

Unnamed: 0,ROUTE_ID,FROM_MEASURE,TO_MEASURE,YEAR,AADT
0,0006PM,0.000,46.038,2020,430
1,0006PM,46.038,77.556,2020,385
2,0006PM,77.556,82.897,2020,552
3,0006PM,82.897,83.911,2020,2061
4,0006PM,83.911,87.694,2020,3409
...,...,...,...,...,...
181475,3468PM,0.000,5.125,1981,0
181476,3469PM,0.000,6.930,1981,0
181477,3470PM,0.000,1.039,1981,0
181478,3478PM,0.000,2.040,1981,0


In [100]:
# combine 2021 and under 2021
dfAADT = pd.concat([dfUnder2021,df2021])

# melt for easier joining
dfAADT = pd.pivot_table(dfAADT,index=('ROUTE_ID','FROM_MEASURE','TO_MEASURE'),columns='YEAR',values='AADT')

dfAADT.reset_index(inplace=True)
dfAADT.fillna('0',inplace=True)

# Filter rows with 'ROUTE_ID' containing 'PM' - Positive Direction & Mainline?
dfAADT_filtered = dfAADT[dfAADT['ROUTE_ID'].str.contains('PM')].copy()

# Trim 'ROUTE_ID' down to the first four characters in order to match with SEGID
dfAADT_filtered['ROUTE_ID'] = dfAADT_filtered['ROUTE_ID'].str[:4]

dfAADT_filtered

YEAR,ROUTE_ID,FROM_MEASURE,TO_MEASURE,1981,1982,1983,1984,1985,1986,1987,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,0006,0.000,46.038,325.0,335.0,430.0,580.0,585.0,585.0,595.0,...,325.0,330.0,350.0,375.0,399.0,409.0,412.0,415.0,430.0,474.0
1,0006,46.038,77.556,325.0,335.0,430.0,580.0,585.0,585.0,595.0,...,340.0,340.0,360.0,390.0,412.0,366.0,369.0,372.0,385.0,424.0
2,0006,77.556,82.897,520.0,535.0,630.0,780.0,790.0,790.0,800.0,...,420.0,420.0,450.0,480.0,512.0,525.0,529.0,533.0,552.0,609.0
3,0006,82.897,83.911,0.0,1350.0,1450.0,1650.0,1675.0,1600.0,1525.0,...,1570.0,1575.0,1675.0,1800.0,1913.0,1961.0,1975.0,1991.0,2061.0,2272.0
4,0006,83.911,87.694,0.0,2650.0,2750.0,3000.0,3050.0,2920.0,2790.0,...,3270.0,3340.0,3495.0,3660.0,3846.0,3468.0,3496.0,3573.0,3409.0,3852.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4755,3468,0.000,5.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2015.0,2085.0,2120.0,2245.0,2354.0,2452.0,2550.0,2581.0,2578.0,2764.0
4756,3469,0.000,6.930,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,490.0,505.0,515.0,545.0,572.0,333.0,346.0,350.0,350.0,375.0
4757,3470,0.000,1.039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1653.0,1694.0,1724.0,1540.0,1669.0
4758,3478,0.000,2.040,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1055.0,1090.0,1110.0,1175.0,1232.0,865.0,900.0,911.0,910.0,976.0


In [101]:
# CHECK FOR DUPLICATES

# Define the columns you want to check for duplicates
columns_to_check = ['ROUTE_ID', 'FROM_MEASURE', 'TO_MEASURE']

# Use the duplicated method to find duplicates in those columns
duplicates = dfAADT_filtered.duplicated(subset=columns_to_check, keep=False)

# Filter the DataFrame to only include the duplicates
dfAADT_duplicates = dfAADT_filtered[duplicates]

display(dfAADT_duplicates)

YEAR,ROUTE_ID,FROM_MEASURE,TO_MEASURE,1981,1982,1983,1984,1985,1986,1987,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021


In [93]:
# join based on route and then filter with between

# calculate midpoint for 
gdfPaSegments['MIDPOINT_MP'] = (gdfPaSegments['BMP'] + gdfPaSegments['EMP']) / 2
gdfPaSegments['ROUTE_ID'] = gdfPaSegments['SEGID'].str.split('_').str[0]
display(gdfPaSegments.head(5))

# get only data needed to join
dfPaSegments = gdfPaSegments[['SEGID','ROUTE_ID','MIDPOINT_MP']]

# create temp dataframe to merge to segments data
_df = pd.DataFrame.merge(dfPaSegments, dfAADT_filtered, on='ROUTE_ID')

# filter for only segments with midpoints between AADT FROM_MEASURE and TO_MEASURE
_df = _df[_df['MIDPOINT_MP'].between(_df['FROM_MEASURE'],_df['TO_MEASURE'])]

# give me only columns with years
cols_to_keep = [col for col in df.columns if str(col).isdigit() and 1981 <= int(col) <= 2021]

# filter by only segid and columns with years
_df = _df[['SEGID']+cols_to_keep]

# melt to make table long
_df = _df.melt(id_vars='SEGID',var_name="YEAR",value_name='AADT')

# convert AADT to int
_df['AADT'] = _df['AADT'].astype(int)

# add source
_df['SOURCE'] = 'Manually Join to Historic Data'

# filter out an zero data
_df = _df[_df['AADT']>0]

_df

Unnamed: 0,SEGID,YEAR,AADT,SOURCE
