In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
# GLOBAL VARIABLES

# seg summary fields to be useds
use_fields = ['SEGID','DY_VOL','FT','ATYPENAME','LANES']

# segment shapefile with AADT and previous forecasts - copied from 'A:/1 - TDM/3 - Model Dev/1 - WF/1 - Official Release/v9x/v9.0/WF TDM v9.0 - official/1_Inputs/6_Segment/Segments_WF - 2023-08-01.shp'
fnSegmentsShapefile = 'data/segments/WFv901_Segments_20240226_GeoKeys_AADTs_Factors.shp'

# field for weekday factor
colFacWdAvg = 'FAC_WDAVG'
colFacSpr   = 'FAC_SPR'
colFacFal   = 'FAC_FAL'

baseYear = 2019
baseAadtField = 'AADT2019'

dfModelSegSummaries = pd.DataFrame( [
    [1, 2019, 'data/model-output/1_WF/v9_SE19_Net19_Summary_SEGID_withnewSegIds.csv'           ],
    [1, 2023, 'data/model-output/1_WF/v9_SE23_Net23_Summary_SEGID_withnewSegIds.csv'           ],
    [1, 2028, 'data/model-output/1_WF/v9_TIP_SE28_Net28_Summary_SEGID_withnewSegIds.csv'       ],
    [1, 2032, 'data/model-output/1_WF/v9_RTP_SE32_Net32_Summary_SEGID_withnewSegIds.csv'       ],
    [1, 2042, 'data/model-output/1_WF/v9_RTP_SE42_Net42_Summary_SEGID_withnewSegIds.csv'       ],
    [1, 2050, 'data/model-output/1_WF/v9_RTP_SE50_Net50_Summary_SEGID_withnewSegIds.csv'       ]
], columns=('modSubareaId','modYear','modSegSummaryFile'))

#display(dfModelSegSummaries)


In [3]:
# Required libraries
from dbfread import DBF
import pandas as pd

# Create a list to store DataFrames read from each file
frames = []

# Iterate through the rows and read each file
for index, row in dfModelSegSummaries.iterrows():
    # Prepend path with 'dbfs:/'
    file_path = row['modSegSummaryFile']
    print(file_path)
    
    # Check the file extension
    if file_path.endswith('.dbf'):
        # Read the .dbf file with dbfread
        table = DBF(file_path)
        df = pd.DataFrame(iter(table))  # Convert the DBF table to a DataFrame
    elif file_path.endswith('.csv'):
        df = pd.read_csv(file_path)
    else:
        # If the file is neither .csv nor .dbf, skip this iteration
        print(f"Unsupported file format for file: {file_path}")
        continue

    df = df[use_fields]
    df['YEAR'     ] = row['modYear'     ] # Add modYear column
    df['SUBAREAID'] = row['modSubareaId'] # Add modSubareaId column
    frames.append(df)

# Concatenate all the frames into a single DataFrame
dfMdlVol = pd.concat(frames, ignore_index=True)

# If you're using Databricks, use 'display' to show the DataFrame
display(dfMdlVol)

data/model-output/1_WF/v9_SE19_Net19_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v9_SE23_Net23_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v9_TIP_SE28_Net28_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v9_RTP_SE32_Net32_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v9_RTP_SE42_Net42_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v9_RTP_SE50_Net50_Summary_SEGID_withnewSegIds.csv


Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SUBAREAID
0,0006_141.0,1524.0,2.0,Rural,2.0,2019,1
1,0006_146.9,1528.0,2.0,Rural,2.0,2019,1
2,0006_149.9,1558.4,2.0,Rural,2.0,2019,1
3,0006_150.6,1748.2,2.0,Rural,2.0,2019,1
4,0006_152.6,2535.8,3.0,Rural,2.0,2019,1
...,...,...,...,...,...,...,...
25811,WFRC_8465,46751.7,2.0,Suburban,4.0,2050,1
25812,WFRC_8466,15608.7,3.0,Urban,4.0,2050,1
25813,WFRC_8467,26571.9,2.0,Suburban,4.0,2050,1
25814,WFRC_8471,20343.9,4.0,Suburban,4.0,2050,1


In [4]:
dfMdlVol[dfMdlVol['YEAR']==0]

Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SUBAREAID


In [5]:
# GET WEEKDAY FACTORS

# read in segment shapefile
gdfSegments = gpd.read_file(fnSegmentsShapefile)
dfFac = gdfSegments[['SEGID', 'SUBAREAID', 'CO_FIPS', colFacWdAvg, colFacSpr, colFacFal]].copy()

dfFac['FAC_SPRFAL'] = (dfFac[colFacSpr] + dfFac[colFacFal]) / 2

display(dfFac)

Unnamed: 0,SEGID,SUBAREAID,CO_FIPS,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL
0,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960
1,0006_146.9,1.0,49.0,0.9840,1.0276,1.0316,1.02960
2,0006_149.9,1.0,49.0,0.9840,1.0276,1.0316,1.02960
3,0006_150.6,1.0,49.0,0.9840,1.0276,1.0316,1.02960
4,0006_152.6,1.0,49.0,0.9840,1.0276,1.0316,1.02960
...,...,...,...,...,...,...,...
4975,WFRC_8469,1.0,35.0,1.0924,1.0104,1.0243,1.01735
4976,WFRC_8470,1.0,35.0,1.0918,1.0107,1.0324,1.02155
4977,WFRC_8471,1.0,35.0,1.0924,1.0104,1.0243,1.01735
4978,WFRC_8472,1.0,35.0,1.0946,1.0117,1.0095,1.01060


In [6]:
dfFac.loc[(dfFac['FAC_WDAVG']==0), 'checkZero'] = 1
dfFac.groupby(['SUBAREAID'],as_index = False).agg(numSegs=('SEGID','count'),numSegsWithFacZero=('checkZero','sum'))

Unnamed: 0,SUBAREAID,numSegs,numSegsWithFacZero
0,1.0,4980,81.0


In [7]:
import numpy as np

# Your existing code for merging and filling NA
_df = pd.DataFrame.merge(dfFac, dfMdlVol, on=('SEGID','SUBAREAID'), how='inner')
_df.fillna(0, inplace=True)
display()

# calculate AADT by subareaid
# divide by colWeekdayFactor
_df.loc[(~(_df['SUBAREAID'].isin([4])) & (_df[colFacWdAvg]> 0)), 'modAadt'] = (_df['DY_VOL'] / _df[colFacWdAvg])
_df.loc[(~(_df['SUBAREAID'].isin([4])) & (_df[colFacWdAvg]==0)), 'modAadt'] = 0

# divide by average of Spring and Fall only for Summit/Wasatch Model
_df.loc[(_df['SUBAREAID'].isin([4]) & (_df['FAC_SPRFAL']>0 )), 'modAadt'] = (_df['DY_VOL'] / _df[colFacWdAvg] / _df['FAC_SPRFAL'])
_df.loc[(_df['SUBAREAID'].isin([4]) & (_df['FAC_SPRFAL']==0)), 'modAadt'] = 0

# Convert to integer as you had before
_df['modAadt'] = _df['modAadt'].round(0).astype(int)

dfModAadt = _df

# Display the DataFrame
display(dfModAadt)

Unnamed: 0,SEGID,SUBAREAID,CO_FIPS,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,checkZero,DY_VOL,FT,ATYPENAME,LANES,YEAR,modAadt
0,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1524.0,2.0,Rural,2.0,2019,1549
1,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1655.0,2.0,Rural,2.0,2023,1682
2,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1737.0,2.0,Rural,2.0,2028,1765
3,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1851.0,2.0,Rural,2.0,2032,1881
4,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,2094.0,2.0,Rural,2.0,2042,2128
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25811,WFRC_8473,1.0,11.0,1.0924,1.0104,1.0243,1.01735,0.0,6981.7,5.0,Suburban,2.0,2023,6391
25812,WFRC_8473,1.0,11.0,1.0924,1.0104,1.0243,1.01735,0.0,7021.1,5.0,Urban,2.0,2028,6427
25813,WFRC_8473,1.0,11.0,1.0924,1.0104,1.0243,1.01735,0.0,7386.4,5.0,Urban,2.0,2032,6762
25814,WFRC_8473,1.0,11.0,1.0924,1.0104,1.0243,1.01735,0.0,8158.2,5.0,Urban,2.0,2042,7468


In [8]:
dfModAadt[dfModAadt['SUBAREAID'].isin([4,5]) & (dfModAadt['FAC_SPRFAL']>0)]

Unnamed: 0,SEGID,SUBAREAID,CO_FIPS,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,checkZero,DY_VOL,FT,ATYPENAME,LANES,YEAR,modAadt


In [9]:
# GET BASE YEAR OBSERVED FROM SEGMENT SHAPEFILE
_df = gdfSegments[['SEGID',baseAadtField]].copy()

_df.rename(columns={baseAadtField:'obsAadt'}, inplace=True)

dfObsAadt_BaseYear = _df

display(dfObsAadt_BaseYear)

Unnamed: 0,SEGID,obsAadt
0,0006_141.0,1517.0
1,0006_146.9,1517.0
2,0006_149.9,2441.0
3,0006_150.6,2441.0
4,0006_152.6,2417.0
...,...,...
4975,WFRC_8469,0.0
4976,WFRC_8470,0.0
4977,WFRC_8471,0.0
4978,WFRC_8472,0.0


In [10]:
# CALCULATE ADJUSTMENT FACTOR FROM BASE YEAR MODEL VS OBSERVED
dfModAadt_BaseYear = dfModAadt[dfModAadt['YEAR']==2019]

_df = pd.DataFrame.merge(dfModAadt_BaseYear[use_fields + ['modAadt']], dfObsAadt_BaseYear[['SEGID','obsAadt']], on='SEGID', how='left')
_df.fillna(0,inplace=True)

_df['aadtAdjFactor'] = _df['obsAadt'] - _df['modAadt']

_df.loc[(_df['obsAadt']==0), 'aadtAdjFactor'] = 0

display(_df[_df['modAadt']==0])

dfModAdjFactor = _df[['SEGID','aadtAdjFactor']]

display(dfModAdjFactor)


Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,modAadt,obsAadt,aadtAdjFactor
873,0135_000.0,0.0,4.0,Suburban,2.0,0,1000.0,1000.0
1192,0202_001.5,0.0,4.0,Rural,2.0,0,0.0,0.0
1259,0210_011.6,0.0,2.0,Rural,2.0,0,6688.0,6688.0
1485,1416_000.0,0.0,5.0,Transition,2.0,0,1549.0,1549.0
1486,1416_009.6,0.0,5.0,Transition,2.0,0,525.0,525.0
1487,1416_024.8,0.0,5.0,Urban,2.0,0,898.0,898.0
1955,2082_005.0,0.0,2.0,Suburban,2.0,0,9697.0,9697.0
2488,2251_000.0,0.0,4.0,Rural,2.0,0,0.0,0.0
2489,2251_000.5,0.0,4.0,Rural,2.0,0,0.0,0.0
2772,2372_000.0,0.0,4.0,Suburban,4.0,0,4123.0,4123.0


Unnamed: 0,SEGID,aadtAdjFactor
0,0006_141.0,-32.0
1,0006_146.9,-36.0
2,0006_149.9,857.0
3,0006_150.6,664.0
4,0006_152.6,-160.0
...,...,...
4098,WFRC_8456,0.0
4099,WFRC_8457,0.0
4100,WFRC_8461,0.0
4101,WFRC_8466,0.0


In [11]:
# Function to round based on the given ranges
def custom_rounding(value):
    if 0 <= value < 100:
        return round(value / 10) * 10
    elif 100 <= value < 1000:
        return round(value / 50) * 50
    elif 1000 <= value < 10000:
        return round(value / 100) * 100
    elif 10000 <= value < 100000:
        return round(value / 500) * 500
    elif value >= 100000:
        return round(value / 1000) * 1000
    else:
        return value

In [12]:
# ADJUST ALL MODEL VOLUMES BASED ON BASE YEAR ADJ FACTOR

dfModWithAdj = pd.DataFrame.merge(dfModAadt, dfModAdjFactor, on='SEGID', how='left')
dfModWithAdj['aadtAdjFactor'].fillna(0,inplace=True)
dfModWithAdj['modForecast'] = (dfModWithAdj['modAadt'] + dfModWithAdj['aadtAdjFactor']).apply(custom_rounding)
dfModWithAdj

# remove 2019 since it is base
#dfModWithAdjNo2019 = dfModWithAdj[dfModWithAdj['YEAR']>2019]

# sort
#dfModWithAdjNo2019 = dfModWithAdjNo2019.sort_values(by=['SEGID', 'YEAR'], ascending=[True, True])

#dfModWithAdjNo2019

Unnamed: 0,SEGID,SUBAREAID,CO_FIPS,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,checkZero,DY_VOL,FT,ATYPENAME,LANES,YEAR,modAadt,aadtAdjFactor,modForecast
0,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1524.0,2.0,Rural,2.0,2019,1549,-32.0,1500.0
1,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1655.0,2.0,Rural,2.0,2023,1682,-32.0,1600.0
2,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1737.0,2.0,Rural,2.0,2028,1765,-32.0,1700.0
3,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1851.0,2.0,Rural,2.0,2032,1881,-32.0,1800.0
4,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,2094.0,2.0,Rural,2.0,2042,2128,-32.0,2100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25811,WFRC_8473,1.0,11.0,1.0924,1.0104,1.0243,1.01735,0.0,6981.7,5.0,Suburban,2.0,2023,6391,0.0,6400.0
25812,WFRC_8473,1.0,11.0,1.0924,1.0104,1.0243,1.01735,0.0,7021.1,5.0,Urban,2.0,2028,6427,0.0,6400.0
25813,WFRC_8473,1.0,11.0,1.0924,1.0104,1.0243,1.01735,0.0,7386.4,5.0,Urban,2.0,2032,6762,0.0,6800.0
25814,WFRC_8473,1.0,11.0,1.0924,1.0104,1.0243,1.01735,0.0,8158.2,5.0,Urban,2.0,2042,7468,0.0,7500.0


In [13]:
# check if base year adjusted equals base year observed

_df1 = dfModWithAdj[dfModWithAdj['YEAR']==baseYear]
_df2 = dfObsAadt_BaseYear

_df3 = pd.DataFrame.merge(_df1, _df2, on='SEGID')

_df3['ModVsObs'] = _df3['modForecast'] - _df3['obsAadt']

display(_df3[_df3['ModVsObs']!=0])


Unnamed: 0,SEGID,SUBAREAID,CO_FIPS,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,checkZero,DY_VOL,FT,ATYPENAME,LANES,YEAR,modAadt,aadtAdjFactor,modForecast,obsAadt,ModVsObs
0,0006_141.0,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1524.0,2.0,Rural,2.0,2019,1549,-32.0,1500.0,1517.0,-17.0
1,0006_146.9,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1528.0,2.0,Rural,2.0,2019,1553,-36.0,1500.0,1517.0,-17.0
2,0006_149.9,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1558.4,2.0,Rural,2.0,2019,1584,857.0,2400.0,2441.0,-41.0
3,0006_150.6,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,1748.2,2.0,Rural,2.0,2019,1777,664.0,2400.0,2441.0,-41.0
4,0006_152.6,1.0,49.0,0.9840,1.0276,1.0316,1.02960,0.0,2535.8,3.0,Rural,2.0,2019,2577,-160.0,2400.0,2417.0,-17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4098,WFRC_8456,1.0,57.0,0.9840,1.0276,1.0316,1.02960,0.0,3049.9,4.0,Rural,2.0,2019,3099,0.0,3100.0,0.0,3100.0
4099,WFRC_8457,1.0,57.0,0.9840,1.0276,1.0316,1.02960,0.0,3050.0,4.0,Rural,2.0,2019,3100,0.0,3100.0,0.0,3100.0
4100,WFRC_8461,1.0,35.0,1.0918,1.0107,1.0324,1.02155,0.0,2211.3,5.0,Transition,2.0,2019,2025,0.0,2000.0,0.0,2000.0
4101,WFRC_8466,1.0,35.0,1.0946,1.0117,1.0095,1.01060,0.0,9082.2,3.0,Urban,4.0,2019,8297,0.0,8300.0,0.0,8300.0


In [14]:
# export final file
dfModWithAdj.to_csv('intermediate/model-forecasts.csv', index=False)

In [15]:
dfModWithAdj[dfModWithAdj['SEGID']=='0085_011.9']

Unnamed: 0,SEGID,SUBAREAID,CO_FIPS,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,checkZero,DY_VOL,FT,ATYPENAME,LANES,YEAR,modAadt,aadtAdjFactor,modForecast
2670,0085_011.9,1.0,35.0,1.1568,1.0112,1.0105,1.01085,0.0,44752.2,13.0,Transition,4.0,2019,38686,-5538.0,33000.0
2671,0085_011.9,1.0,35.0,1.1568,1.0112,1.0105,1.01085,0.0,53052.9,13.0,Suburban,4.0,2023,45862,-5538.0,40500.0
2672,0085_011.9,1.0,35.0,1.1568,1.0112,1.0105,1.01085,0.0,62913.6,13.0,Suburban,4.0,2028,54386,-5538.0,49000.0
2673,0085_011.9,1.0,35.0,1.1568,1.0112,1.0105,1.01085,0.0,98869.1,34.0,Suburban,8.0,2032,85468,-5538.0,80000.0
2674,0085_011.9,1.0,35.0,1.1568,1.0112,1.0105,1.01085,0.0,125201.8,34.0,Urban,8.0,2042,108231,-5538.0,103000.0
2675,0085_011.9,1.0,35.0,1.1568,1.0112,1.0105,1.01085,0.0,155509.0,34.0,Urban,12.0,2050,134430,-5538.0,129000.0


In [16]:
dfModWithAdj[dfModWithAdj['SEGID']=='WFRC_8430']

Unnamed: 0,SEGID,SUBAREAID,CO_FIPS,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,checkZero,DY_VOL,FT,ATYPENAME,LANES,YEAR,modAadt,aadtAdjFactor,modForecast
25701,WFRC_8430,1.0,35.0,1.0924,1.0104,1.0243,1.01735,0.0,4839.5,3.0,Suburban,2.0,2019,4430,0.0,4400.0
25702,WFRC_8430,1.0,35.0,1.0924,1.0104,1.0243,1.01735,0.0,6383.6,3.0,Suburban,2.0,2023,5844,0.0,5800.0
25703,WFRC_8430,1.0,35.0,1.0924,1.0104,1.0243,1.01735,0.0,7335.2,3.0,Suburban,2.0,2028,6715,0.0,6700.0
25704,WFRC_8430,1.0,35.0,1.0924,1.0104,1.0243,1.01735,0.0,6959.5,3.0,Suburban,2.0,2032,6371,0.0,6400.0
25705,WFRC_8430,1.0,35.0,1.0924,1.0104,1.0243,1.01735,0.0,8526.8,3.0,Suburban,2.0,2042,7806,0.0,7800.0
25706,WFRC_8430,1.0,35.0,1.0924,1.0104,1.0243,1.01735,0.0,17282.9,3.0,Suburban,2.0,2050,15821,0.0,16000.0


In [17]:
dfModWithAdj[dfModWithAdj['SEGID']=='WFRC_8400']

Unnamed: 0,SEGID,SUBAREAID,CO_FIPS,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,checkZero,DY_VOL,FT,ATYPENAME,LANES,YEAR,modAadt,aadtAdjFactor,modForecast
25562,WFRC_8400,1.0,35.0,1.0918,1.0107,1.0324,1.02155,0.0,4174.9,4.0,Suburban,2.0,2032,3824,0.0,3800.0
25563,WFRC_8400,1.0,35.0,1.0918,1.0107,1.0324,1.02155,0.0,5437.0,4.0,Suburban,2.0,2042,4980,0.0,5000.0
25564,WFRC_8400,1.0,35.0,1.0918,1.0107,1.0324,1.02155,0.0,4445.8,4.0,Urban,2.0,2050,4072,0.0,4100.0
