In [115]:
import pandas as pd
import os
import geopandas as gpd
import re # regular expression
import numpy as np

In [116]:
# GLOBAL VARIABLES

# don't update these that have been locked
lstLockPlanArea = ['WFRC', 'MAG', 'Summit', 'Iron', 'Dixie', 'Cache']

# excel spreadsheet copied to data/udot folder from https://drive.google.com/file/d/1rDXm0ObugGR1zXgWUuVbzWHNt-Xs1xru/view
fnExcelAADTHistory = 'data/udot/AADTHistory_2023.xlsx'
#fnAADT2022 = 'data/udot/Traffic on Utah Highways 2022.xlsx - AADT2022.csv' # added Feb 16 2024

sourceExcelAADTSegData = 'Segments_State_20231221_Draft.shp'

# segment shapefile with AADT and previous forecasts - copied from 'A:/1 - TDM/3 - Model Dev/1 - WF/1 - Official Release/v9x/v9.0/WF TDM v9.0 - official/1_Inputs/6_Segment/Segments_WF - 2023-08-01.shp'
fnSegmentsShapefiles = [
    'data/segments/Segments_State_20231221_Draft.shp',
    'data/segments/WFv901_Segments_20240226_Draft.shp',
    'data/segments/WF_Segments_20240326_Draft.shp'
]
# filter by PLANAREA in segments shapefile
#filterPlanArea = ['WFRC','MAG'] # must be an array... if only single item, the still include []

# Get AADT and Previous Forecasts from Segment Shapefiles

In [117]:
# read in segment shapefile

gdfSegments = pd.DataFrame()

for file in fnSegmentsShapefiles:
    _df = gpd.read_file(file)
    _df['SOURCE'] = os.path.basename(file)
    gdfSegments = pd.concat([gdfSegments, _df])

display(gdfSegments)

# show columns
print(gdfSegments.columns.tolist())

gdfSegments.to_csv('intermediate/segments.gdf')

Unnamed: 0,Id,SEGID,BMP,EMP,DISTANCE,PLANAREA,AADT2019,SUBAREAID,CO_FIPS,F_AREA,...,SUTRK2014,CUTRK2014,SUTRK2013,CUTRK2013,SUTRK2012,CUTRK2012,SUTRK2011,CUTRK2011,SUTRK2010,CUTRK2010
0,0,0006_000.0,0.000,0.665,0.666642,UDOT,415.0,0.0,27.0,UDOT,...,,,,,,,,,,
1,0,0006_000.7,0.665,16.022,15.369870,UDOT,415.0,0.0,27.0,UDOT,...,,,,,,,,,,
2,0,0006_016.0,16.022,46.017,30.002021,UDOT,415.0,0.0,27.0,UDOT,...,,,,,,,,,,
3,0,0006_046.0,46.017,60.218,14.194335,UDOT,372.0,0.0,27.0,UDOT,...,,,,,,,,,,
4,0,0006_060.2,60.218,77.545,17.323272,UDOT,372.0,0.0,27.0,UDOT,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5338,0,UTA_7593,0.000,0.000,0.894464,WFRC,0.0,0.0,35.0,,...,,,,,0.0,,,,,
5339,0,UTA_7594,0.000,0.000,0.665073,WFRC,0.0,0.0,35.0,,...,,,,,0.0,,,,,
5340,0,WFRC_8430,0.000,0.000,1.135216,WFRC,0.0,1.0,35.0,,...,0,0,0,0,0.0,0,0,0,0,0
5341,0,UTA_9556,0.000,0.000,3.869188,WFRC,0.0,1.0,35.0,,...,0,0,0,0,0.0,0,0,0,0,0


['Id', 'SEGID', 'BMP', 'EMP', 'DISTANCE', 'PLANAREA', 'AADT2019', 'SUBAREAID', 'CO_FIPS', 'F_AREA', 'CCSGROUP19', 'FACMANADJ', 'FAC_APR', 'FAC_AUG', 'FAC_DEC', 'FAC_FAL', 'FAC_FEB', 'FAC_FRI', 'FAC_JAN', 'FAC_JUL', 'FAC_JUN', 'FAC_MAR', 'FAC_MAX', 'FAC_MAXMO', 'FAC_MAY', 'FAC_MON', 'FAC_NOV', 'FAC_OCT', 'FAC_SAT', 'FAC_SEP', 'FAC_SPR', 'FAC_SUM', 'FAC_SUN', 'FAC_THU', 'FAC_TUE', 'FAC_WDAVG', 'FAC_WEAVG', 'FAC_WED', 'FAC_WEMAX', 'FAC_WIN', 'geometry', 'SOURCE', 'ROUTE', 'F2019', 'F2023', 'F2028', 'F2032', 'F2042', 'F2050', 'CH19TO50', 'CH19TO23', 'CH23TO28', 'CH28TO32', 'CH32TO42', 'CH42TO50', 'AADTSTN', 'AADT2022', 'AADT2021', 'AADT2020', 'AADT2018', 'AADT2017', 'AADT2016', 'AADT2015', 'AADT2014', 'AADT2013', 'AADT2012', 'AADT2011', 'AADT2010', 'AADT2009', 'AADT2008', 'AADT2007', 'AADT2006', 'AADT2005', 'AADT2004', 'AADT2003', 'AADT2002', 'AADT2001', 'AADT2000', 'AADT1999', 'AADT1998', 'AADT1997', 'AADT1996', 'AADT1995', 'AADT1994', 'AADT1993', 'AADT1992', 'AADT1991', 'AADT1990', 'AADT

In [118]:
segid_planarea_df = gdfSegments.groupby(['SEGID'], as_index=False).agg(PLANAREA=('PLANAREA','first'), F_AREA=('F_AREA','first'))
segid_planarea_df

Unnamed: 0,SEGID,PLANAREA,F_AREA
0,0006_000.0,UDOT,UDOT
1,0006_000.7,UDOT,UDOT
2,0006_016.0,UDOT,UDOT
3,0006_046.0,UDOT,UDOT
4,0006_060.2,UDOT,UDOT
...,...,...,...
9717,WFRC_8472,WFRC,WFRC
9718,WFRC_8473,WFRC,WFRC
9719,WFRC_8474,WFRC,
9720,WFRC_8475,WFRC,


In [119]:
lstLockedSegments = gdfSegments[gdfSegments['PLANAREA'].isin(lstLockPlanArea)]['SEGID'].to_list()
lstLockedSegments

['0006_146.9',
 '0006_149.9',
 '0006_150.6',
 '0006_152.6',
 '0006_152.9',
 '0006_155.8',
 '0006_155.9',
 '0006_157.3',
 '0006_157.6',
 '0006_158.5',
 '0006_159.6',
 '0006_159.8',
 '0006_160.3',
 '0006_173.4',
 '0006_173.7',
 '0006_174.0',
 '0006_174.4',
 '0006_174.9',
 '0006_175.6',
 '0006_176.1',
 '0006_177.0',
 '0006_177.2',
 '0006_177.5',
 '0006_177.9',
 '0007_000.0',
 '0007_000.1',
 '0007_000.3',
 '0007_001.5',
 '0007_002.4',
 '0007_003.4',
 '0007_005.1',
 '0007_006.1',
 '0007_007.3',
 '0007_008.3',
 '0007_009.3',
 '0007_010.5',
 '0007_012.5',
 '0007_015.0',
 '0008_000.0',
 '0008_000.4',
 '0008_001.2',
 '0009_000.0',
 '0009_000.5',
 '0009_001.1',
 '0009_002.8',
 '0009_004.9',
 '0009_005.4',
 '0009_006.6',
 '0009_007.0',
 '0009_007.5',
 '0009_007.9',
 '0009_008.3',
 '0009_008.6',
 '0009_009.1',
 '0009_009.6',
 '0009_009.8',
 '0009_010.5',
 '0009_010.9',
 '0009_011.4',
 '0009_012.5',
 '0009_012.6',
 '0009_015.3',
 '0013_000.0',
 '0013_000.6',
 '0013_001.3',
 '0013_001.6',
 '0013_001

In [120]:
# filter segments by PLANAREA
_df = gdfSegments#[gdfSegments['PLANAREA'].isin(filterPlanArea)].copy()

_df.fillna(0,inplace=True)

# Correct F_AREA values based on condition
_df['F_AREA'] = np.where(_df['F_AREA'] == 0, _df['PLANAREA'], _df['F_AREA'])

# get columns with AADT at beginning of name
aadt_columns = [
    col for col in _df.columns 
    if col.startswith('AADT') 
       and len(col) == 8 
       and col[-4:].isdigit()
]

# get dataframe for just segids and aadt columns
_df = _df[['SOURCE','SEGID','SUBAREAID','PLANAREA','F_AREA'] + aadt_columns]
dftemp = _df
dftemp

Unnamed: 0,SOURCE,SEGID,SUBAREAID,PLANAREA,F_AREA,AADT2019,AADT2022,AADT2021,AADT2020,AADT2018,...,AADT1990,AADT1989,AADT1988,AADT1987,AADT1986,AADT1985,AADT1984,AADT1983,AADT1982,AADT1981
0,Segments_State_20231221_Draft.shp,0006_000.0,0.0,UDOT,UDOT,415.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Segments_State_20231221_Draft.shp,0006_000.7,0.0,UDOT,UDOT,415.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Segments_State_20231221_Draft.shp,0006_016.0,0.0,UDOT,UDOT,415.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Segments_State_20231221_Draft.shp,0006_046.0,0.0,UDOT,UDOT,372.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Segments_State_20231221_Draft.shp,0006_060.2,0.0,UDOT,UDOT,372.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5338,WF_Segments_20240326_Draft.shp,UTA_7593,0.0,WFRC,WFRC,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5339,WF_Segments_20240326_Draft.shp,UTA_7594,0.0,WFRC,WFRC,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5340,WF_Segments_20240326_Draft.shp,WFRC_8430,1.0,WFRC,WFRC,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5341,WF_Segments_20240326_Draft.shp,UTA_9556,1.0,WFRC,WFRC,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [121]:
# filter segments by PLANAREA
_df = gdfSegments#[gdfSegments['PLANAREA'].isin(filterPlanArea)].copy()

_df.fillna(0,inplace=True)

# get columns with AADT at beginning of name
aadt_columns = [
    col for col in _df.columns 
    if col.startswith('AADT') 
       and len(col) == 8 
       and col[-4:].isdigit()
]

# get dataframe for just segids and aadt columns
_df = _df[['SOURCE','SEGID'] + aadt_columns]

# melt to make long
_df = _df.melt(id_vars=('SOURCE','SEGID'), var_name='YEAR', value_name='AADT')

# filter out zero volumes
_df = _df[_df['AADT']>0]

# get YEAR from string
_df['YEAR'] = _df['YEAR'].str[-4:].astype(int)

# set AADT to int
_df['AADT'] = _df['AADT'].astype(int)

dfAadtFromSegments = _df
dfAadtFromSegments

Unnamed: 0,SOURCE,SEGID,YEAR,AADT
0,Segments_State_20231221_Draft.shp,0006_000.0,2019,415
1,Segments_State_20231221_Draft.shp,0006_000.7,2019,415
2,Segments_State_20231221_Draft.shp,0006_016.0,2019,415
3,Segments_State_20231221_Draft.shp,0006_046.0,2019,372
4,Segments_State_20231221_Draft.shp,0006_060.2,2019,372
...,...,...,...,...
808065,WF_Segments_20240326_Draft.shp,2899_002.4,1981,2635
808066,WF_Segments_20240326_Draft.shp,2899_003.5,1981,2635
808067,WF_Segments_20240326_Draft.shp,2899_004.1,1981,850
810118,WF_Segments_20240326_Draft.shp,WFRC_8419,1981,15075


In [122]:
dfAadtFromSegments[dfAadtFromSegments['SEGID']=='0013_000.0']

Unnamed: 0,SOURCE,SEGID,YEAR,AADT
185,Segments_State_20231221_Draft.shp,0013_000.0,2019,19514
9003,WFv901_Segments_20240226_Draft.shp,0013_000.0,2019,19514
13983,WF_Segments_20240326_Draft.shp,0013_000.0,2019,19514
28293,WFv901_Segments_20240226_Draft.shp,0013_000.0,2022,19243
33273,WF_Segments_20240326_Draft.shp,0013_000.0,2022,19243
...,...,...,...,...
727713,WF_Segments_20240326_Draft.shp,0013_000.0,1985,12035
742023,WFv901_Segments_20240226_Draft.shp,0013_000.0,1984,11910
747003,WF_Segments_20240326_Draft.shp,0013_000.0,1984,11910
761313,WFv901_Segments_20240226_Draft.shp,0013_000.0,1983,11780


# Get Historic Data from AADTHistory UDOT Spreadsheet

In [123]:
#import excel spreadsheet
xl_file1 = pd.ExcelFile(fnExcelAADTHistory)
dfs1 = {sheet_name: xl_file1.parse(sheet_name) for sheet_name in xl_file1.sheet_names}

In [124]:
#show sheet
display(xl_file1.sheet_names)
dfs1['UnroundedAADT2023']

['RoundedAADT2023', 'UnroundedAADT2023']

Unnamed: 0,STATION,RouteID,BeginPoint,EndPoint,Section_Length,DESC,AADT2023,AADT2022,AADT2021,AADT2020,...,SUTRK2014,CUTRK2014,SUTRK2013,CUTRK2013,SUTRK2012,CUTRK2012,SUTRK2011,CUTRK2011,SUTRK2010,CUTRK2010
0,027-0005,0006PM,0.000,46.0380,46.0380,Nevada State Line via SR 6 - Antelope Springs,457,441.0,474.0,430.0,...,0.249610,0.232449,0.249610,0.232449,0.249610,0.232449,0.249610,0.232449,0.239372,0.103931
1,027-0007,0006PM,46.038,77.5560,31.5180,Antelope Springs via SR 6 - Gunnison Massacre ...,409,395.0,424.0,385.0,...,0.175063,0.333819,0.175063,0.333819,0.175063,0.333819,0.175063,0.333819,0.175063,0.333819
2,027-0010,0006PM,77.556,82.8970,5.3410,Gunnison Massacre Site via SR 6 - Main St Hin...,586,566.0,609.0,552.0,...,0.162481,0.264722,0.162481,0.264722,0.162481,0.264722,0.162481,0.264722,0.162481,0.264722
3,027-0015,0006PM,82.897,83.9110,1.0140,Main St via SR 6 (500 N) Hinckley - SR 257 (40...,2189,2113.0,2272.0,2061.0,...,0.149898,0.195620,0.149898,0.195620,0.149898,0.195620,0.149898,0.195620,0.149898,0.195620
4,027-0025,0006PM,83.911,87.6940,3.7830,SR 257 (4000 W) Hinckley via SR 6 - 1000 W to ...,4012,3910.0,3852.0,3409.0,...,0.137314,0.126513,0.137314,0.126513,0.137314,0.126513,0.137314,0.126513,0.137314,0.126513
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4562,057-1157,3468PM,0.000,5.1248,5.1248,SR 39 Huntsville via 7800 E - 2200 North Eden,2851,2733.0,2764.0,2578.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4563,057-1255,3469PM,0.000,6.9300,6.9300,Snow Basin via Old Snow Basin Rd - SR 39,387,371.0,375.0,350.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4564,057-1460,3470PM,0.000,1.0394,1.0394,2700 N (SR 134) via 1000 W - Pleasant View Dr ...,1710,1686.0,1669.0,1540.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4565,057-1195,3478PM,0.000,2.0404,2.0404,Entrance Bluff Swim Beach via 1st St - SR 39 (...,1006,965.0,976.0,910.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [125]:
# GET AADTS FOR YEARS PRIOR TO 2021

# melt AADT columns to rows
df = dfs1['UnroundedAADT2023']

# get all columns that start with 'AADT'
aadt_cols = [col for col in df.columns if col.startswith('AADT')]

# define the id columns that you want to keep
id_vars = ['RouteID', 'BeginPoint', 'EndPoint']

# melt the DataFrame
dfExcelAadt = df.melt(id_vars=id_vars, value_vars=aadt_cols, var_name='YEAR', value_name='AADT')

# replace 'AADT' in the 'YEAR' column and convert to integer
dfExcelAadt['YEAR'] = dfExcelAadt['YEAR'].str.replace('AADT', '').astype(int)

# rename columns to match pre-2021 data
dfExcelAadt.rename(columns={'RouteID':'ROUTE_ID','BeginPoint':'FROM_MEASURE','EndPoint':'TO_MEASURE'}, inplace=True)

display(dfExcelAadt)

Unnamed: 0,ROUTE_ID,FROM_MEASURE,TO_MEASURE,YEAR,AADT
0,0006PM,0.000,46.0380,2023,457.0
1,0006PM,46.038,77.5560,2023,409.0
2,0006PM,77.556,82.8970,2023,586.0
3,0006PM,82.897,83.9110,2023,2189.0
4,0006PM,83.911,87.6940,2023,4012.0
...,...,...,...,...,...
196376,3468PM,0.000,5.1248,1981,0.0
196377,3469PM,0.000,6.9300,1981,0.0
196378,3470PM,0.000,1.0394,1981,0.0
196379,3478PM,0.000,2.0404,1981,0.0


In [126]:
# combine 2022, 2021 and under 2021
dfAADT = pd.concat([dfExcelAadt])

dfAADT['FROM_MEASURE'] = dfAADT['FROM_MEASURE'].round(2)
dfAADT['TO_MEASURE'] = dfAADT['TO_MEASURE'].round(2)

# melt for easier joining
dfAADT = pd.pivot_table(dfAADT,index=('ROUTE_ID','FROM_MEASURE','TO_MEASURE'),columns='YEAR',values='AADT')

dfAADT.reset_index(inplace=True)
dfAADT.fillna('0',inplace=True)

# Filter rows with 'ROUTE_ID' containing 'PM' - Positive Direction & Mainline?
dfAADT_filtered = dfAADT[dfAADT['ROUTE_ID'].str.contains('PM')].copy()

# Trim 'ROUTE_ID' down to the first four characters in order to match with SEGID
dfAADT_filtered['ROUTE_ID'] = dfAADT_filtered['ROUTE_ID'].str[:4]

dfAADT_filtered

YEAR,ROUTE_ID,FROM_MEASURE,TO_MEASURE,1981,1982,1983,1984,1985,1986,1987,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,0006,0.00,46.04,325.0,335.0,430.0,580.0,585.0,585.0,595.0,...,350.0,375.0,399.0,409.0,412.0,415.0,430.0,474.0,441.0,457.0
1,0006,46.04,77.56,325.0,335.0,430.0,580.0,585.0,585.0,595.0,...,360.0,390.0,412.0,366.0,369.0,372.0,385.0,424.0,395.0,409.0
2,0006,77.56,82.90,520.0,535.0,630.0,780.0,790.0,790.0,800.0,...,450.0,480.0,512.0,525.0,529.0,533.0,552.0,609.0,566.0,586.0
3,0006,82.90,83.91,0.0,1350.0,1450.0,1650.0,1675.0,1600.0,1525.0,...,1675.0,1800.0,1913.0,1961.0,1975.0,1991.0,2061.0,2272.0,2113.0,2189.0
4,0006,83.91,87.69,0.0,2650.0,2750.0,3000.0,3050.0,2920.0,2790.0,...,3495.0,3660.0,3846.0,3468.0,3496.0,3573.0,3409.0,3852.0,3910.0,4012.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4562,3468,0.00,5.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2120.0,2245.0,2354.0,2452.0,2550.0,2581.0,2578.0,2764.0,2733.0,2851.0
4563,3469,0.00,6.93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,515.0,545.0,572.0,333.0,346.0,350.0,350.0,375.0,371.0,387.0
4564,3470,0.00,1.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1653.0,1694.0,1724.0,1540.0,1669.0,1686.0,1710.0
4565,3478,0.00,2.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1110.0,1175.0,1232.0,865.0,900.0,911.0,910.0,976.0,965.0,1006.0


In [127]:
# CHECK FOR DUPLICATES

# Define the columns you want to check for duplicates
columns_to_check = ['ROUTE_ID', 'FROM_MEASURE', 'TO_MEASURE']

# Use the duplicated method to find duplicates in those columns
duplicates = dfAADT_filtered.duplicated(subset=columns_to_check, keep=False)

# Filter the DataFrame to only include the duplicates
dfAADT_duplicates = dfAADT_filtered[duplicates]

display(dfAADT_duplicates)

YEAR,ROUTE_ID,FROM_MEASURE,TO_MEASURE,1981,1982,1983,1984,1985,1986,1987,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023


In [128]:
# JOIN DATA BACK TO SEGMENTS FILE AND FILTER WITH MIDPOINT_MP BETWEEN BMP AND EMP

_df = gdfSegments[gdfSegments['SOURCE']==sourceExcelAADTSegData].copy()


# calculate midpoint for 
_df['MIDPOINT_MP'] = (_df['BMP'] + _df['EMP']) / 2
_df['ROUTE_ID'] = _df['SEGID'].str.split('_').str[0]

# get only data needed to join
_df = _df[['SEGID','ROUTE_ID','MIDPOINT_MP']]

# create temp dataframe to merge to segments data
_df2 = pd.DataFrame.merge(_df, dfAADT_filtered, on='ROUTE_ID')

# filter for only segments with midpoints between AADT FROM_MEASURE and TO_MEASURE
_df2 = _df2[_df2['MIDPOINT_MP'].between(_df2['FROM_MEASURE'],_df2['TO_MEASURE'])].copy()

# give me only columns with years
cols_to_keep = [col for col in _df2.columns if str(col).isdigit() and 1981 <= int(col) <= 3000]

# filter by only segid and columns with years
_df2 = _df2[['SEGID']+cols_to_keep]

# melt to make table long
_df2 = _df2.melt(id_vars=('SEGID'),var_name="YEAR",value_name='AADT')

# convert AADT to int
_df2['AADT'] = _df2['AADT'].astype(int)

# add source
_df2['SOURCE'] = fnExcelAADTHistory.split('/')[-1]

# filter out an zero data
_df2 = _df2[_df2['AADT']>0]

dfAadtFromHistoric = _df2

dfAadtFromHistoric

Unnamed: 0,SEGID,YEAR,AADT,SOURCE
0,0006_000.0,1981,325,AADTHistory_2023.xlsx
1,0006_000.7,1981,325,AADTHistory_2023.xlsx
2,0006_016.0,1981,325,AADTHistory_2023.xlsx
3,0006_046.0,1981,325,AADTHistory_2023.xlsx
4,0006_060.2,1981,325,AADTHistory_2023.xlsx
...,...,...,...,...
306843,3468_000.0,2023,2851,AADTHistory_2023.xlsx
306844,3469_000.0,2023,387,AADTHistory_2023.xlsx
306845,3470_000.0,2023,1710,AADTHistory_2023.xlsx
306846,3478_000.0,2023,1006,AADTHistory_2023.xlsx


In [129]:
dfAadtFromHistoric[dfAadtFromHistoric['SEGID']=='0013_000.0']

Unnamed: 0,SEGID,YEAR,AADT,SOURCE
14457,0013_000.0,1983,11780,AADTHistory_2023.xlsx
21593,0013_000.0,1984,11910,AADTHistory_2023.xlsx
28729,0013_000.0,1985,12035,AADTHistory_2023.xlsx
35865,0013_000.0,1986,12075,AADTHistory_2023.xlsx
43001,0013_000.0,1987,12115,AADTHistory_2023.xlsx
50137,0013_000.0,1988,12170,AADTHistory_2023.xlsx
57273,0013_000.0,1989,13070,AADTHistory_2023.xlsx
64409,0013_000.0,1990,13325,AADTHistory_2023.xlsx
71545,0013_000.0,1991,13485,AADTHistory_2023.xlsx
78681,0013_000.0,1992,16335,AADTHistory_2023.xlsx


# Wrap up and export

In [130]:
dfAadt = pd.concat([dfAadtFromSegments,dfAadtFromHistoric])
dfAadt = dfAadt.sort_values(by=['SEGID','SOURCE','YEAR'])
dfAadt = dfAadt.drop_duplicates()
dfAadt

Unnamed: 0,SOURCE,SEGID,YEAR,AADT
0,AADTHistory_2023.xlsx,0006_000.0,1981,325
7136,AADTHistory_2023.xlsx,0006_000.0,1982,335
14272,AADTHistory_2023.xlsx,0006_000.0,1983,430
21408,AADTHistory_2023.xlsx,0006_000.0,1984,580
28544,AADTHistory_2023.xlsx,0006_000.0,1985,585
...,...,...,...,...
91100,WFv901_Segments_20240226_Draft.shp,WFRC_8467,2018,26161
13940,WFv901_Segments_20240226_Draft.shp,WFRC_8467,2019,26632
71810,WFv901_Segments_20240226_Draft.shp,WFRC_8467,2020,23782
52520,WFv901_Segments_20240226_Draft.shp,WFRC_8467,2021,25780


In [131]:
dfAadt[dfAadt['SEGID']=='0013_000.0']

Unnamed: 0,SOURCE,SEGID,YEAR,AADT
14457,AADTHistory_2023.xlsx,0013_000.0,1983,11780
21593,AADTHistory_2023.xlsx,0013_000.0,1984,11910
28729,AADTHistory_2023.xlsx,0013_000.0,1985,12035
35865,AADTHistory_2023.xlsx,0013_000.0,1986,12075
43001,AADTHistory_2023.xlsx,0013_000.0,1987,12115
...,...,...,...,...
86163,WFv901_Segments_20240226_Draft.shp,0013_000.0,2018,19321
9003,WFv901_Segments_20240226_Draft.shp,0013_000.0,2019,19514
66873,WFv901_Segments_20240226_Draft.shp,0013_000.0,2020,16977
47583,WFv901_Segments_20240226_Draft.shp,0013_000.0,2021,18811


In [132]:
# CHECK NON-NUMERIC SEGIDs!!
dfAadt['SEGID'][dfAadt['SEGID'].str.match(r'[^\d]')].drop_duplicates()


8107       MAG_6017
384501     MAG_6021
56607      MAG_6067
56609      MAG_6069
56676      MAG_6139
109525     MAG_6140
8252       MAG_6164
8341       MAG_6254
114987     MAG_6604
8517      WFRC_8004
153884    WFRC_8010
385544    WFRC_8220
8758      WFRC_8262
8759      WFRC_8263
810118    WFRC_8419
810163    WFRC_8467
Name: SEGID, dtype: object

In [133]:
# merge locked with unlocked

if len(lstLockedSegments)>=1:

    # export final file
    dfAadt_Prev = pd.read_csv('intermediate/aadt.csv')
    dfAadt_Locked = dfAadt_Prev[dfAadt_Prev['SEGID'].isin(lstLockedSegments)].copy()

    dfAadt_Unlocked = dfAadt[~dfAadt['SEGID'].isin(lstLockedSegments)].copy()

    # Concatenate the locked and unlocked DataFrames
    dfAadtMerged = pd.concat([dfAadt_Locked, dfAadt_Unlocked])

    # Reset the index of the final DataFrame, if needed
    dfAadtMerged.reset_index(drop=True, inplace=True)

else:
    dfAadtMerged = dfAadt


dfAadtMerged = dfAadtMerged[['SOURCE','SEGID','YEAR','AADT']]

dfAadtMerged

Unnamed: 0,SOURCE,SEGID,YEAR,AADT
0,Segments_State_20231221_Draft.shp,0013_000.0,2019,19514
1,WF_Segments_20240326_Draft.shp,0013_000.0,1983,11780
2,WF_Segments_20240326_Draft.shp,0013_000.0,1984,11910
3,WF_Segments_20240326_Draft.shp,0013_000.0,1985,12035
4,WF_Segments_20240326_Draft.shp,0013_000.0,1986,12075
...,...,...,...,...
317889,AADTHistory_2023.xlsx,3483_000.0,2020,1041
317890,AADTHistory_2023.xlsx,3483_000.0,2021,1116
317891,AADTHistory_2023.xlsx,3483_000.0,2022,1104
317892,AADTHistory_2023.xlsx,3483_000.0,2023,1151


In [134]:
dfAadtMerged[dfAadtMerged['SEGID']=='0013_000.0']

Unnamed: 0,SOURCE,SEGID,YEAR,AADT
0,Segments_State_20231221_Draft.shp,0013_000.0,2019,19514
1,WF_Segments_20240326_Draft.shp,0013_000.0,1983,11780
2,WF_Segments_20240326_Draft.shp,0013_000.0,1984,11910
3,WF_Segments_20240326_Draft.shp,0013_000.0,1985,12035
4,WF_Segments_20240326_Draft.shp,0013_000.0,1986,12075
...,...,...,...,...
76,WFv901_Segments_20240226_Draft.shp,0013_000.0,2018,19321
77,WFv901_Segments_20240226_Draft.shp,0013_000.0,2019,19514
78,WFv901_Segments_20240226_Draft.shp,0013_000.0,2020,16977
79,WFv901_Segments_20240226_Draft.shp,0013_000.0,2021,18811


In [135]:
dfAadtMerged = dfAadtMerged.merge(segid_planarea_df, on='SEGID')
dfAadtMerged.rename(columns={'AADT':'obsAADT'})

# export AADT
dfAadtMerged.to_csv('intermediate/aadt.csv',index=False)

# export AADT sources
dfAadtMerged[['SOURCE']].drop_duplicates().to_csv('intermediate/aadt-sources.csv',index=False)
display(dfAadtMerged[['SOURCE']].drop_duplicates())

## export Forecasts
#dfForecastsFromSegments.to_csv('intermediate/previous-forecasts.csv',index=False)

## export AADT sources
#dfForecastsFromSegments[['SOURCE']].drop_duplicates().to_csv('intermediate/previous-forecasts-sources.csv',index=False)

Unnamed: 0,SOURCE
0,Segments_State_20231221_Draft.shp
1,WF_Segments_20240326_Draft.shp
41,WFv901_Segments_20240226_Draft.shp
11579,AADTHistory.xlsx
231628,AADTHistory_2023.xlsx


In [136]:
dfAadtMerged[dfAadtMerged['SEGID']=='0006_141.0']

Unnamed: 0,SOURCE,SEGID,YEAR,AADT,PLANAREA,F_AREA
133530,AADTHistory.xlsx,0006_141.0,1981,975,UDOT,MAG
133531,AADTHistory.xlsx,0006_141.0,1982,1000,UDOT,MAG
133532,AADTHistory.xlsx,0006_141.0,1983,965,UDOT,MAG
133533,AADTHistory.xlsx,0006_141.0,1984,1240,UDOT,MAG
133534,AADTHistory.xlsx,0006_141.0,1985,1340,UDOT,MAG
...,...,...,...,...,...,...
133652,WFv901_Segments_20240226_Draft.shp,0006_141.0,2018,1443,UDOT,MAG
133653,WFv901_Segments_20240226_Draft.shp,0006_141.0,2019,1517,UDOT,MAG
133654,WFv901_Segments_20240226_Draft.shp,0006_141.0,2020,1647,UDOT,MAG
133655,WFv901_Segments_20240226_Draft.shp,0006_141.0,2021,1760,UDOT,MAG


In [137]:
dfAadtMerged[dfAadtMerged['SEGID']=='0006_222.1']

Unnamed: 0,SOURCE,SEGID,YEAR,AADT,PLANAREA,F_AREA
232583,AADTHistory_2023.xlsx,0006_222.1,1981,4325,UDOT,UDOT
232584,AADTHistory_2023.xlsx,0006_222.1,1982,4750,UDOT,UDOT
232585,AADTHistory_2023.xlsx,0006_222.1,1983,700,UDOT,UDOT
232586,AADTHistory_2023.xlsx,0006_222.1,1984,4170,UDOT,UDOT
232587,AADTHistory_2023.xlsx,0006_222.1,1985,3565,UDOT,UDOT
232588,AADTHistory_2023.xlsx,0006_222.1,1986,3880,UDOT,UDOT
232589,AADTHistory_2023.xlsx,0006_222.1,1987,4010,UDOT,UDOT
232590,AADTHistory_2023.xlsx,0006_222.1,1988,4230,UDOT,UDOT
232591,AADTHistory_2023.xlsx,0006_222.1,1989,4380,UDOT,UDOT
232592,AADTHistory_2023.xlsx,0006_222.1,1990,4690,UDOT,UDOT


# create shapefile

In [138]:
#_df = dfAadt
#_df['YEARCOL'] = "AADT" + _df['YEAR'].astype(str)
#_df['AADT'] = _df['AADT'].astype(int)
#_df2 = _df.groupby(['SEGID','YEARCOL'],as_index=False).agg(cntRows=('YEAR','count'))
#_df2 = _df.pivot(index="SEGID",columns=('YEARCOL'), values='AADT')
#
#_df2.fillna(0,inplace=True)
#
#_df3 = pd.DataFrame.merge(gdfSegments[['SEGID','geometry']],_df2, on="SEGID", how="left")
#_df3.fillna(0,inplace=True)
#
#
## Identify numeric columns
#numeric_cols = _df3.select_dtypes(include=['float64']).columns
#
## Convert numeric columns to integer type
#for col in numeric_cols:
#    _df3[col] = _df3[col].astype(int)
#
#from datetime import datetime
#
## Generate a filename with the current timestamp in the specified folder
#timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
#
#_df3.to_file('results/Segments_State_' + timestamp + '_Draft_AADTOnly.shp')

# Get Factors

In [139]:
# filter segments by PLANAREA
_df = gdfSegments#[gdfSegments['PLANAREA'].isin(filterPlanArea)].copy()

# get columns with AADT at beginning of name
fac_columns = [
    col for col in _df.columns 
    if 'FAC' in col
]
dfFactors = _df[['SOURCE','SEGID'] + fac_columns].copy()
dfFactors

Unnamed: 0,SOURCE,SEGID,FACMANADJ,FAC_APR,FAC_AUG,FAC_DEC,FAC_FAL,FAC_FEB,FAC_FRI,FAC_JAN,...,FAC_SPR,FAC_SUM,FAC_SUN,FAC_THU,FAC_TUE,FAC_WDAVG,FAC_WEAVG,FAC_WED,FAC_WEMAX,FAC_WIN
0,Segments_State_20231221_Draft.shp,0006_000.0,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,1.0276,1.0914,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495
1,Segments_State_20231221_Draft.shp,0006_000.7,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,1.0276,1.0914,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495
2,Segments_State_20231221_Draft.shp,0006_016.0,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,1.0276,1.0914,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495
3,Segments_State_20231221_Draft.shp,0006_046.0,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,1.0276,1.0914,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495
4,Segments_State_20231221_Draft.shp,0006_060.2,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,1.0276,1.0914,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5338,WF_Segments_20240326_Draft.shp,UTA_7593,0.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
5339,WF_Segments_20240326_Draft.shp,UTA_7594,0.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
5340,WF_Segments_20240326_Draft.shp,WFRC_8430,0.0,1.0146,1.0396,0.9616,1.0243,0.9471,1.1418,0.9024,...,1.0104,1.0283,0.6061,1.1071,1.0813,1.0924,0.7653,1.0887,0.9245,0.9370
5341,WF_Segments_20240326_Draft.shp,UTA_9556,0.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000


In [140]:
# merge locked with unlocked

if len(lstLockPlanArea):

    print ('Merging Locked with Unlocked')

    # export final file
    dfFactors_Prev = pd.read_csv('intermediate/factors.csv')
    dfFactors_Locked = dfFactors_Prev[dfFactors_Prev['SEGID'].isin(lstLockedSegments)].copy()

    dfFactors_Unlocked = dfFactors[~dfFactors['SEGID'].isin(lstLockedSegments)].copy()

    # Concatenate the locked and unlocked DataFrames
    dfFactorsMerged = pd.concat([dfFactors_Locked, dfFactors_Unlocked])

    # Reset the index of the final DataFrame, if needed
    dfFactorsMerged.reset_index(drop=True, inplace=True)

else:
    print ('No Locked')
    dfFactorsMerged = dfFactors

#dfFactorsMerged.drop(columns=['SAID_FAC','PLANAREA','F_AREA'], inplace=True)

dfFactorsMerged = dfFactorsMerged.merge(segid_planarea_df,on='SEGID')
dfFactorsMerged

Merging Locked with Unlocked


Unnamed: 0,SOURCE,SEGID,FACMANADJ,FAC_APR,FAC_AUG,FAC_DEC,FAC_FAL,FAC_FEB,FAC_FRI,FAC_JAN,...,FAC_SUN,FAC_THU,FAC_TUE,FAC_WDAVG,FAC_WEAVG,FAC_WED,FAC_WEMAX,FAC_WIN,PLANAREA,F_AREA
0,Segments_State_20231221_Draft.shp,0006_141.0,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495,UDOT,MAG
1,WFv901_Segments_20240226_Draft.shp,0006_141.0,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495,UDOT,MAG
2,WF_Segments_20240326_Draft.shp,0006_141.0,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495,UDOT,MAG
3,Segments_State_20231221_Draft.shp,0006_146.9,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495,MAG,MAG
4,WFv901_Segments_20240226_Draft.shp,0006_146.9,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495,MAG,MAG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19285,Segments_State_20231221_Draft.shp,UDOT_7090,0.0,1.0119,1.0724,0.9399,1.0324,0.8982,1.1558,0.8600,...,0.6429,1.1132,1.0744,1.0918,0.7585,1.0878,0.8742,0.8994,UDOT,UDOT
19286,Segments_State_20231221_Draft.shp,UDOT_7091,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495,UDOT,UDOT
19287,Segments_State_20231221_Draft.shp,UDOT_7092,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495,UDOT,UDOT
19288,Segments_State_20231221_Draft.shp,UDOT_7093,0.0,1.0366,1.0897,0.8822,1.0316,0.8504,1.1505,0.8159,...,0.8819,1.0120,0.9628,0.9840,0.9734,0.9773,1.0649,0.8495,UDOT,UDOT


In [141]:
dfFactors.to_csv('intermediate/factors.csv',index=False)