In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
# GLOBAL VARIABLES

dfModelSegSummaries = pd.DataFrame( [
    [2019, 'data/model-output/v9_SE19_Net19_Summary_SEGID.csv'    ],
    [2028, 'data/model-output/v9_TIP_SE28_Net28_Summary_SEGID.csv'],
    [2023, 'data/model-output/v9_SE23_Net23_Summary_SEGID.csv'    ],
    [2032, 'data/model-output/v9_RTP_SE32_Net32_Summary_SEGID.csv'],
    [2042, 'data/model-output/v9_RTP_SE42_Net42_Summary_SEGID.csv'],
    [2050, 'data/model-output/v9_RTP_SE50_Net50_Summary_SEGID.csv'],
], columns=('modYear','modSegSummaryFile'))

#display(dfModelSegSummaries)

# seg summary fields to be useds
use_fields = ['SEGID','DY_VOL','FT','ATYPENAME','LANES']

# segment shapefile with AADT and previous forecasts - copied from 'A:/1 - TDM/3 - Model Dev/1 - WF/1 - Official Release/v9x/v9.0/WF TDM v9.0 - official/1_Inputs/6_Segment/Segments_WF - 2023-08-01.shp'
fnSegmentsShapefile = 'data/segments/Segments_WF - 2023-08-01.shp'

# field for weekday factor
colWeekdayFactor = 'FAC_WDAVG'

# SHOULD WE ADJUST WEEKDAY FACTOR INTO FUTURE?? 

# base year adjustment
# segment shapefile with AADT and previous forecasts - copied from 'A:/1 - TDM/3 - Model Dev/1 - WF/1 - Official Release/v9x/v9.0/WF TDM v9.0 - official/1_Inputs/6_Segment/Segments_WF - 2023-08-01.shp'
sourceName = 'AADTHistory.xlsx'
baseYear = 2019



In [3]:
# READ IN CSVs and STORE
# ChatGPT: https://chat.openai.com/share/8d8aa536-ad1f-4893-a6dc-3e112201beee

# Create a list to store DataFrames read from each CSV
frames = []

# Iterate through the rows and read each CSV
for index, row in dfModelSegSummaries.iterrows():
    df = pd.read_csv(row['modSegSummaryFile'])
    df = df[use_fields]
    df['YEAR'] = row['modYear'] # Add modYear column
    frames.append(df)

# Concatenate all the frames into a single DataFrame
dfMdlVol = pd.concat(frames, ignore_index=True)

display(dfMdlVol)

Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,YEAR
0,0006_146.9,1510.7,2.0,Rural,2.0,2019
1,0006_149.9,1559.7,2.0,Rural,2.0,2019
2,0006_150.6,1755.8,2.0,Rural,2.0,2019
3,0006_152.6,2495.6,3.0,Rural,2.0,2019
4,0006_152.9,3725.4,2.0,Rural,2.0,2019
...,...,...,...,...,...,...
23002,WFRC_8261,18.9,4.0,Urban,2.0,2050
23003,WFRC_8263,7783.9,4.0,Suburban,2.0,2050
23004,WFRC_8264,12940.8,3.0,Urban,4.0,2050
23005,WFRC_8265,4036.0,4.0,Urban,2.0,2050


In [4]:
# GET WEEKDAY FACTORS

# read in segment shapefile
gdfSegments = gpd.read_file(fnSegmentsShapefile)
dfWkDyFac = gdfSegments[['SEGID', colWeekdayFactor]]

display(dfWkDyFac)

Unnamed: 0,SEGID,FAC_WDAVG
0,0006_000.0,0.984
1,0006_000.7,0.984
2,0006_016.0,0.984
3,0006_046.0,0.984
4,0006_060.2,0.984
...,...,...
8722,UTA_7132,0.000
8723,UTA_7316,0.000
8724,UTA_7320,0.000
8725,UTA_7328,0.000


In [5]:
import numpy as np

# Your existing code for merging and filling NA
_df = pd.DataFrame.merge(dfMdlVol, dfWkDyFac, on='SEGID', how='left')
_df.fillna(0, inplace=True)

# Create a mask where the denominator is zero
mask_zero_denominator = _df[colWeekdayFactor] == 0

# Apply the division only where the denominator is not zero, and set the value to 0 where it is zero
_df['modAadt'] = np.where(mask_zero_denominator, 0, _df['DY_VOL'] / _df[colWeekdayFactor])

# Convert to integer as you had before
_df['modAadt'] = _df['modAadt'].astype(int)

dfModAadt = _df

# Display the DataFrame
display(dfModAadt)

Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,YEAR,FAC_WDAVG,modAadt
0,0006_146.9,1510.7,2.0,Rural,2.0,2019,0.9840,1535
1,0006_149.9,1559.7,2.0,Rural,2.0,2019,0.9840,1585
2,0006_150.6,1755.8,2.0,Rural,2.0,2019,0.9840,1784
3,0006_152.6,2495.6,3.0,Rural,2.0,2019,0.9840,2536
4,0006_152.9,3725.4,2.0,Rural,2.0,2019,0.9593,3883
...,...,...,...,...,...,...,...,...
23026,WFRC_8261,18.9,4.0,Urban,2.0,2050,0.0000,0
23027,WFRC_8263,7783.9,4.0,Suburban,2.0,2050,1.0924,7125
23028,WFRC_8264,12940.8,3.0,Urban,4.0,2050,1.0924,11846
23029,WFRC_8265,4036.0,4.0,Urban,2.0,2050,1.0924,3694


In [6]:
# SHOW WHERE FAC_WDAVG is 0... KEEP ZERO UNTIL FIGURE OUT WHAT TO DO!
dfModAadt[dfModAadt[colWeekdayFactor]==0]


Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,YEAR,FAC_WDAVG,modAadt
38,0015_250.3,22163.0,35.0,Suburban,3.0,2019,0.0,0
798,0134_007.8,3478.9,3.0,Transition,2.0,2019,0.0,0
1057,0194_002.1,18556.6,14.0,Suburban,2.0,2019,0.0,0
1560,1508_000.4,8032.2,4.0,Transition,2.0,2019,0.0,0
1612,2036_003.1,13985.2,3.0,Suburban,2.0,2019,0.0,0
...,...,...,...,...,...,...,...,...
23023,WFRC_8258,3698.5,4.0,Urban,2.0,2050,0.0,0
23024,WFRC_8259,7257.0,4.0,Urban,2.0,2050,0.0,0
23025,WFRC_8260,7295.2,4.0,Urban,2.0,2050,0.0,0
23026,WFRC_8261,18.9,4.0,Urban,2.0,2050,0.0,0


In [7]:
_df = pd.read_csv('intermediate/aadt.csv')
_df

Unnamed: 0,SEGID,YEAR,AADT,SOURCE
0,0006_146.9,1981,975,AADTHistory.xlsx
1,0006_146.9,1982,1000,AADTHistory.xlsx
2,0006_146.9,1983,965,AADTHistory.xlsx
3,0006_146.9,1984,1240,AADTHistory.xlsx
4,0006_146.9,1985,1340,AADTHistory.xlsx
...,...,...,...,...
159921,WFRC_8263,2015,13770,Segments_WF - 2023-08-01.shp
159922,WFRC_8263,2016,14498,Segments_WF - 2023-08-01.shp
159923,WFRC_8263,2017,6489,Segments_WF - 2023-08-01.shp
159924,WFRC_8263,2018,6554,Segments_WF - 2023-08-01.shp


In [8]:
# GET BASE YEAR OBSERVED

# read intermediate CSV with base year observed AADT
_df = pd.read_csv('intermediate/aadt.csv')

# filter by source and year
_df = _df[(_df['SOURCE']==sourceName) & (_df['YEAR']<=baseYear)]

# get max row of max year for each segid... incase there is no observed 2019 data, but maybe a earlier year
# Then, group by 'SEGID' and find the index of the maximum 'YEAR' for each group
idx = _df.groupby('SEGID')['YEAR'].idxmax()

# Use the index to filter the rows
_df = _df.loc[idx]

_df = _df[['SEGID','AADT']]
_df.rename(columns={'AADT':'obsAadt'}, inplace=True)

dfObsAadt_BaseYear = _df

display(dfObsAadt_BaseYear)

Unnamed: 0,SEGID,obsAadt
38,0006_146.9,1517
117,0006_149.9,2441
196,0006_150.6,2441
274,0006_152.6,2417
351,0006_152.9,3759
...,...,...
159749,3465_000.0,509
159771,3466_000.0,7480
159807,3466_000.1,7480
159843,3466_000.3,7480


In [9]:
# CALCULATE ADJUSTMENT FACTOR FROM BASE YEAR MODEL VS OBSERVED
dfModAadt_BaseYear = dfModAadt[dfModAadt['YEAR']==2019]

_df = pd.DataFrame.merge(dfModAadt_BaseYear[use_fields + ['modAadt']], dfObsAadt_BaseYear[['SEGID','obsAadt']], on='SEGID', how='left')
_df.fillna(0,inplace=True)

_df['aadtAdjFactor'] = _df['obsAadt'] - _df['modAadt']

_df.loc[(_df['obsAadt']==0), 'aadtAdjFactor'] = 0

display(_df[_df['modAadt']==0])

dfModAdjFactor = _df[['SEGID','aadtAdjFactor']]

display(dfModAdjFactor)


Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,modAadt,obsAadt,aadtAdjFactor
38,0015_250.3,22163.0,35.0,Suburban,3.0,0,0.0,0.0
798,0134_007.8,3478.9,3.0,Transition,2.0,0,0.0,0.0
807,0135_000.1,0.0,4.0,Suburban,2.0,0,3168.0,3168.0
1057,0194_002.1,18556.6,14.0,Suburban,2.0,0,0.0,0.0
1106,0202_001.5,0.0,4.0,Rural,2.0,0,884.0,884.0
...,...,...,...,...,...,...,...,...
3720,WFRC_8258,7452.5,4.0,Urban,2.0,0,0.0,0.0
3721,WFRC_8259,6214.0,4.0,Urban,2.0,0,0.0,0.0
3722,WFRC_8260,6914.5,4.0,Urban,2.0,0,0.0,0.0
3723,WFRC_8261,2588.0,4.0,Urban,2.0,0,0.0,0.0


Unnamed: 0,SEGID,aadtAdjFactor
0,0006_146.9,-18.0
1,0006_149.9,856.0
2,0006_150.6,657.0
3,0006_152.6,-119.0
4,0006_152.9,-124.0
...,...,...
3721,WFRC_8259,0.0
3722,WFRC_8260,0.0
3723,WFRC_8261,0.0
3724,WFRC_8263,0.0


In [10]:
# Function to round based on the given ranges
def custom_rounding(value):
    if 0 <= value < 100:
        return round(value / 10) * 10
    elif 100 <= value < 1000:
        return round(value / 50) * 50
    elif 1000 <= value < 10000:
        return round(value / 100) * 100
    elif 10000 <= value < 100000:
        return round(value / 500) * 500
    elif value >= 100000:
        return round(value / 1000) * 1000
    else:
        return value

In [11]:
# ADJUST ALL MODEL VOLUMES BASED ON BASE YEAR ADJ FACTOR

dfModWithAdj = pd.DataFrame.merge(dfModAadt, dfModAdjFactor, on='SEGID', how='left')
dfModWithAdj['modForecast'] = (dfModWithAdj['modAadt'] + dfModWithAdj['aadtAdjFactor']).apply(custom_rounding)
dfModWithAdj

# remove 2019 since it is base
#dfModWithAdjNo2019 = dfModWithAdj[dfModWithAdj['YEAR']>2019]

# sort
#dfModWithAdjNo2019 = dfModWithAdjNo2019.sort_values(by=['SEGID', 'YEAR'], ascending=[True, True])

#dfModWithAdjNo2019

Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,YEAR,FAC_WDAVG,modAadt,aadtAdjFactor,modForecast
0,0006_146.9,1510.7,2.0,Rural,2.0,2019,0.9840,1535,-18.0,1500.0
1,0006_149.9,1559.7,2.0,Rural,2.0,2019,0.9840,1585,856.0,2400.0
2,0006_150.6,1755.8,2.0,Rural,2.0,2019,0.9840,1784,657.0,2400.0
3,0006_152.6,2495.6,3.0,Rural,2.0,2019,0.9840,2536,-119.0,2400.0
4,0006_152.9,3725.4,2.0,Rural,2.0,2019,0.9593,3883,-124.0,3800.0
...,...,...,...,...,...,...,...,...,...,...
23074,WFRC_8261,18.9,4.0,Urban,2.0,2050,0.0000,0,0.0,0.0
23075,WFRC_8263,7783.9,4.0,Suburban,2.0,2050,1.0924,7125,0.0,7100.0
23076,WFRC_8264,12940.8,3.0,Urban,4.0,2050,1.0924,11846,,
23077,WFRC_8265,4036.0,4.0,Urban,2.0,2050,1.0924,3694,,


In [45]:
# check if base year adjusted equals base year observed

_df1 = dfModWithAdj[dfModWithAdj['YEAR']==baseYear]
_df2 = dfObsAadt_BaseYear

_df3 = pd.DataFrame.merge(_df1, _df2, on='SEGID')

_df3['ModVsObs'] = _df3['modForecast'] - _df3['obsAadt']

display(_df3[_df3['ModVsObs']!=0])


Unnamed: 0,SEGID,DY_VOL,FT,LANES,YEAR,FAC_WDAVG,modAadt,aadtAdjFactor,modForecast,obsAadt,ModVsObs
0,0006_146.9,1510.7,2.0,2.0,2019,0.9840,1535,-18.0,1500.0,1517,-17.0
1,0006_149.9,1559.7,2.0,2.0,2019,0.9840,1585,856.0,2400.0,2441,-41.0
2,0006_150.6,1755.8,2.0,2.0,2019,0.9840,1784,657.0,2400.0,2441,-41.0
3,0006_152.6,2495.6,3.0,2.0,2019,0.9840,2536,-119.0,2400.0,2417,-17.0
4,0006_152.9,3725.4,2.0,2.0,2019,0.9593,3883,-124.0,3800.0,3759,41.0
...,...,...,...,...,...,...,...,...,...,...,...
3511,3465_000.0,4171.9,4.0,2.0,2019,1.0918,3821,-3312.0,500.0,509,-9.0
3512,3466_000.0,4558.8,4.0,2.0,2019,1.0918,4175,3305.0,7500.0,7480,20.0
3513,3466_000.1,8171.6,4.0,2.0,2019,1.0924,7480,0.0,7500.0,7480,20.0
3514,3466_000.3,2138.2,4.0,2.0,2019,1.0924,1957,5523.0,7500.0,7480,20.0


In [12]:
# export final file
dfModWithAdj.to_csv('intermediate/model-forecasts.csv', index=False)

In [13]:
dfModWithAdj[dfModWithAdj['SEGID']=='0089_433.3']

Unnamed: 0,SEGID,DY_VOL,FT,ATYPENAME,LANES,YEAR,FAC_WDAVG,modAadt,aadtAdjFactor,modForecast
594,0089_433.3,17487.9,2.0,Suburban,4.0,2019,1.0918,16017,0.0,16000.0
4328,0089_433.3,20155.9,2.0,Suburban,4.0,2028,1.0918,18461,0.0,18500.0
8153,0089_433.3,18555.3,2.0,Suburban,4.0,2023,1.0918,16995,0.0,17000.0
11945,0089_433.3,21350.9,2.0,Suburban,4.0,2032,1.0918,19555,0.0,19500.0
15815,0089_433.3,19802.1,2.0,Suburban,4.0,2042,1.0918,18137,0.0,18000.0
19720,0089_433.3,21116.1,2.0,Suburban,4.0,2050,1.0918,19340,0.0,19500.0
