In [108]:
import pandas as pd
import geopandas as gpd

In [109]:
# GLOBAL VARIABLES

dfObsVolSource = pd.DataFrame([
    ['AADTHistory.xlsx'                  , [0,2,3,4,5]],
    ['WFv901_Segments_20240226_Draft.shp', [1]        ]
],columns=('SOURCE','SUBAREAID'))

dfObsVolSource = dfObsVolSource.explode('SUBAREAID')

# base year adjustment
# segment shapefile with AADT and previous forecasts - copied from 'A:/1 - TDM/3 - Model Dev/1 - WF/1 - Official Release/v9x/v9.0/WF TDM v9.0 - official/1_Inputs/6_Segment/Segments_WF - 2023-08-01.shp'
baseYear = 2019

dfFactorSource = pd.DataFrame([
    ['Segments_State_20231221_Draft.shp' , [0,2,3,4,5]],
    ['WFv901_Segments_20240226_Draft.shp', [1]        ]
],columns=('SOURCE','SUBAREAID'))

dfFactorSource = dfFactorSource.explode('SUBAREAID')

# factors file from last notebook
fnFactors = 'intermediate/factors.csv'

# field for weekday factor
colFacWdAvg = 'FAC_WDAVG'
colFacSpr   = 'FAC_SPR'
colFacFal   = 'FAC_FAL'
# SHOULD WE ADJUST WEEKDAY FACTOR INTO FUTURE?? 

# some models are AWDT for whole year and some are AADT for a season
# this list of SUBAREAIDs is for models that are AADT for season and need to be adjusted by a Spring/Fall factor
subareaidsThatUseSprFalFac = [4]

# where the same segid in multiple subareas and model, drop order for duplicate segids... pick order to drop from models
duplicateSegsInTdms = pd.DataFrame([
    ['0015_042.2', 3],
    ['0065_002.4', 0],
    ['0150_029.4', 0],
    ['0150_030.0', 0],
    ['0150_033.3', 0],
    ['1822_000.0', 0],
    ['2863_000.0', 4],
    ['2865_019.4', 4]
], columns=('SEGID','removeSUBAREAID'))

dfModelSegSummaries = pd.DataFrame( [
    [0, 2019, 'data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_BY2019.csv'     ],
    [0, 2023, 'data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_OY2023.csv'     ],
    [0, 2028, 'data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_STIP2028.csv'   ],
    [0, 2032, 'data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_LRP2032.csv'    ],
    [0, 2042, 'data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_LRP2042.csv'    ],
    [0, 2050, 'data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_LRP2050.csv'    ],
    [1, 2019, 'data/model-output/1_WF/v901_SE19_Net19_Summary_SEGID_withnewSegIds.csv'         ],
    [1, 2023, 'data/model-output/1_WF/v901_SE23_Net23_Summary_SEGID_withnewSegIds.csv'         ],
    [1, 2028, 'data/model-output/1_WF/v901_TIP_SE28_Net28_Summary_SEGID_withnewSegIds.csv'     ],
    [1, 2032, 'data/model-output/1_WF/v901_RTP_SE32_Net32_Summary_SEGID_withnewSegIds.csv'     ],
    [1, 2042, 'data/model-output/1_WF/v901_RTP_SE42_Net42_Summary_SEGID_withnewSegIds.csv'     ],
    [1, 2050, 'data/model-output/1_WF/v901_RTP_SE50_Net50_Summary_SEGID_withnewSegIds.csv'     ],
    [2, 2019, 'data/model-output/2_Cache/BY2019_Summary_SEGID.dbf'                             ],
    [2, 2023, 'data/model-output/2_Cache/BY2023_Summary_SEGID.dbf'                             ],
    [2, 2028, 'data/model-output/2_Cache/Summary_SEGID_STIP28.dbf'                             ],
    [2, 2032, 'data/model-output/2_Cache/Summary_SEGID_LRP23_32.dbf'                           ],
    [2, 2042, 'data/model-output/2_Cache/Summary_SEGID_LRP23_42.dbf'                           ],
    [2, 2050, 'data/model-output/2_Cache/Summary_SEGID_LRP23_50.dbf'                           ],
    [3, 2019, 'data/model-output/3_Dixie/Summary_SEGID_19.dbf'                                 ],
    [3, 2023, 'data/model-output/3_Dixie/Summary_SEGID_23.dbf'                                 ],
    [3, 2028, 'data/model-output/3_Dixie/Summary_SEGID_28.dbf'                                 ],
    [3, 2032, 'data/model-output/3_Dixie/Summary_SEGID_32.dbf'                                 ],
    [3, 2042, 'data/model-output/3_Dixie/Summary_SEGID_42.dbf'                                 ],
    [3, 2050, 'data/model-output/3_Dixie/Summary_SEGID_50.dbf'                                 ],
    [4, 2019, 'data/model-output/4_SuWsv2_2024-02-08_DRAFT/Summary_SEGID_BY2019.dbf'           ],
    [4, 2023, 'data/model-output/4_SuWsv2_2024-02-08_DRAFT/Summary_SEGID_OY2023.dbf'           ],
    [4, 2028, 'data/model-output/4_SuWsv2_2024-02-08_DRAFT/Summary_SEGID_STIP2028.dbf'         ],
    [4, 2032, 'data/model-output/4_SuWsv2_2024-02-08_DRAFT/Summary_SEGID_LRP2032.dbf'          ],
    [4, 2042, 'data/model-output/4_SuWsv2_2024-02-08_DRAFT/Summary_SEGID_LRP2042.dbf'          ],
    [4, 2050, 'data/model-output/4_SuWsv2_2024-02-08_DRAFT/Summary_SEGID_LRP2050.dbf'          ],
    [5, 2019, 'data/model-output/5_IronCo - v1.0 - 2023-09-13_DRAFT/Summary_SEGID_BY2019.dbf'  ],
    [5, 2023, 'data/model-output/5_IronCo - v1.0 - 2023-09-13_DRAFT/Summary_SEGID_OY2023.dbf'  ],
    [5, 2028, 'data/model-output/5_IronCo - v1.0 - 2023-09-13_DRAFT/Summary_SEGID_STIP2028.dbf'],
    [5, 2032, 'data/model-output/5_IronCo - v1.0 - 2023-09-13_DRAFT/Summary_SEGID_FY2032.dbf'  ],
    [5, 2042, 'data/model-output/5_IronCo - v1.0 - 2023-09-13_DRAFT/Summary_SEGID_FY2042.dbf'  ],
    [5, 2050, 'data/model-output/5_IronCo - v1.0 - 2023-09-13_DRAFT/Summary_SEGID_FY2050.dbf'  ]
], columns=('modSubareaId','modYear','modSegSummaryFile'))

# seg summary fields to be useds
use_fields = ['SEGID','SUBAREAID','DY_VOL','FT','ATYPENAME','LANES']

# drop the following records that have following SEGIDs
dropSegId = ['NO','N0']

#display(dfModelSegSummaries)


In [110]:
# Required libraries
from dbfread import DBF
import pandas as pd

# Create a list to store DataFrames read from each file
frames = []

# Iterate through the rows and read each file
for index, row in dfModelSegSummaries.iterrows():
    # Prepend path with 'dbfs:/'
    file_path = row['modSegSummaryFile']
    print(file_path)
    
    # Check the file extension
    if file_path.endswith('.dbf'):
        # Read the .dbf file with dbfread
        table = DBF(file_path)
        df = pd.DataFrame(iter(table))  # Convert the DBF table to a DataFrame
    elif file_path.endswith('.csv'):
        df = pd.read_csv(file_path, low_memory=False)
    else:
        # If the file is neither .csv nor .dbf, skip this iteration
        print(f"Unsupported file format for file: {file_path}")
        continue

    df = df[use_fields]

    # filter out where link SUBAREAID match the model SUBAREAID... Mostly for USTM that spans multiple model spaces
    df = df[df['SUBAREAID']==row['modSubareaId']]

    # drop SEGIDs defined in dropSegId list
    df = df[~df['SEGID'].isin(dropSegId)]

    df['YEAR'] = row['modYear'] # Add modYear column

    frames.append(df)

# Concatenate all the frames into a single DataFrame
dfMdlVol = pd.concat(frames, ignore_index=True)

# If you're using Databricks, use 'display' to show the DataFrame
display(dfMdlVol)

data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_BY2019.csv
data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_OY2023.csv
data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_STIP2028.csv
data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_LRP2032.csv
data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_LRP2042.csv
data/model-output/0_USTM_v3.0 - 2024-02-20_DRAFT/_Summary_SEGID_LRP2050.csv
data/model-output/1_WF/v901_SE19_Net19_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v901_SE23_Net23_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v901_TIP_SE28_Net28_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v901_RTP_SE32_Net32_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v901_RTP_SE42_Net42_Summary_SEGID_withnewSegIds.csv
data/model-output/1_WF/v901_RTP_SE50_Net50_Summary_SEGID_withnewSegIds.csv
data/model-output/2_Cache/BY2019_Summary_SEGID.dbf
data/model-output/2_Cache/BY2023_Summary_SEGID.dbf
d

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR
0,0006_000.0,0.0,126.5,2.0,Rural,1.0,2019
1,0006_000.7,0.0,213.6,2.0,Rural,1.0,2019
2,0006_016.0,0.0,76.9,2.0,Rural,1.0,2019
3,0006_046.0,0.0,76.9,2.0,Rural,1.0,2019
4,0006_060.2,0.0,80.3,2.0,Rural,1.0,2019
...,...,...,...,...,...,...,...
48282,3218_006.4,5.0,49.5,7.0,Rural,2.0,2050
48283,UDOT_7006,5.0,4181.0,4.0,Transition,4.0,2050
48284,UDOT_7007,5.0,7259.0,4.0,Rural,4.0,2050
48285,UDOT_7008,5.0,453.9,4.0,Rural,2.0,2050


# Add Factors

In [111]:
dfMdlVolWithFactorSource = pd.merge(dfMdlVol, dfFactorSource, on='SUBAREAID', how='left')
dfMdlVolWithFactorSource

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE
0,0006_000.0,0.0,126.5,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp
1,0006_000.7,0.0,213.6,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp
2,0006_016.0,0.0,76.9,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp
3,0006_046.0,0.0,76.9,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp
4,0006_060.2,0.0,80.3,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp
...,...,...,...,...,...,...,...,...
48282,3218_006.4,5.0,49.5,7.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp
48283,UDOT_7006,5.0,4181.0,4.0,Transition,4.0,2050,Segments_State_20231221_Draft.shp
48284,UDOT_7007,5.0,7259.0,4.0,Rural,4.0,2050,Segments_State_20231221_Draft.shp
48285,UDOT_7008,5.0,453.9,4.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp


In [112]:
# GET WEEKDAY FACTORS

# read in factors
_dfFac = pd.read_csv(fnFactors, usecols=('SOURCE', 'SEGID', 'SAID_FAC', colFacWdAvg, colFacSpr, colFacFal))

_dfFac['FAC_SPRFAL'] = (_dfFac[colFacSpr] + _dfFac[colFacFal]) / 2

dfMdlVolWithFac = pd.merge(dfMdlVolWithFactorSource, _dfFac, on=('SEGID','SOURCE'), how="left")

dfMdlVolWithFac.fillna(0, inplace=True)

display(dfMdlVolWithFac)

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL
0,0006_000.0,0.0,126.5,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
1,0006_000.7,0.0,213.6,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
2,0006_016.0,0.0,76.9,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
3,0006_046.0,0.0,76.9,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
4,0006_060.2,0.0,80.3,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48282,3218_006.4,5.0,49.5,7.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960
48283,UDOT_7006,5.0,4181.0,4.0,Transition,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960
48284,UDOT_7007,5.0,7259.0,4.0,Rural,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960
48285,UDOT_7008,5.0,453.9,4.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960


## get and use avg where factors zero... no match

In [113]:
dfFindZeroFac = dfMdlVolWithFac.copy()
dfFindZeroFac.loc[(dfFindZeroFac['FAC_WDAVG']==0), 'checkZero'] = 1
dfFindZeroFac.groupby(['SUBAREAID','YEAR']).agg(numSegs=('SEGID','count'),numSegsWithFacZero=('checkZero','sum'))

Unnamed: 0_level_0,Unnamed: 1_level_0,numSegs,numSegsWithFacZero
SUBAREAID,YEAR,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,2019,2208,1.0
0.0,2023,2211,1.0
0.0,2028,2209,1.0
0.0,2032,2209,1.0
0.0,2042,2209,1.0
0.0,2050,2210,1.0
1.0,2019,4103,0.0
1.0,2023,4183,0.0
1.0,2028,4228,0.0
1.0,2032,4337,0.0


In [114]:
dfNonZeroFac = dfFindZeroFac[~(dfFindZeroFac['checkZero']== 1)].drop(columns=['checkZero'])
display(dfNonZeroFac)
dfZeroFac    = dfFindZeroFac[ (dfFindZeroFac['checkZero']== 1)].drop(columns=['checkZero'])
display(dfZeroFac)

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL
0,0006_000.0,0.0,126.5,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
1,0006_000.7,0.0,213.6,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
2,0006_016.0,0.0,76.9,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
3,0006_046.0,0.0,76.9,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
4,0006_060.2,0.0,80.3,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48282,3218_006.4,5.0,49.5,7.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960
48283,UDOT_7006,5.0,4181.0,4.0,Transition,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960
48284,UDOT_7007,5.0,7259.0,4.0,Rural,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960
48285,UDOT_7008,5.0,453.9,4.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960


Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL
1603,1831_000.0,0.0,1952.2,4.0,Suburban,1.0,2019,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
3811,1831_000.0,0.0,2081.8,4.0,Suburban,1.0,2023,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
6020,1831_000.0,0.0,2305.6,4.0,Suburban,1.0,2028,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
8229,1831_000.0,0.0,2451.9,4.0,Suburban,1.0,2032,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
10438,1831_000.0,0.0,2824.8,4.0,Suburban,1.0,2042,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
12647,1831_000.0,0.0,3067.1,4.0,Suburban,1.0,2050,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
39242,1180_002.5,2.0,326.7,5.0,Transition,2.0,2019,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
39695,1180_002.5,2.0,406.7,5.0,Transition,2.0,2023,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
40147,1180_002.5,2.0,670.4,5.0,Transition,2.0,2028,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0
40601,1180_002.5,2.0,938.5,5.0,Transition,2.0,2032,Segments_State_20231221_Draft.shp,0.0,0.0,0.0,0.0,0.0


In [115]:
dfZeroFacSaAtCombos = dfZeroFac[['SUBAREAID','ATYPENAME']].drop_duplicates()
display(dfZeroFacSaAtCombos)

Unnamed: 0,SUBAREAID,ATYPENAME
1603,0.0,Suburban
39242,2.0,Transition
41515,2.0,Suburban


In [116]:
dfMdlVolWithFac_Filtered = pd.merge(dfMdlVolWithFac, dfZeroFacSaAtCombos, on=['SUBAREAID', 'ATYPENAME'], how='inner')
dfMdlVolWithFac_Filtered


Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL
0,0006_088.7,0.0,8020.1,2.0,Suburban,2.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0243,1.0104,1.0924,1.01735
1,0006_239.9,0.0,9671.9,14.0,Suburban,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0324,1.0107,1.0918,1.02155
2,0010_068.3,0.0,9314.6,2.0,Suburban,1.3,2019,Segments_State_20231221_Draft.shp,0.0,1.0243,1.0104,1.0924,1.01735
3,0015_380.0,0.0,11386.1,35.0,Suburban,2.0,2019,Segments_State_20231221_Draft.shp,0.0,0.9841,0.9766,0.9059,0.98035
4,0021_107.1,0.0,1984.5,3.0,Suburban,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0243,1.0104,1.0924,1.01735
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2937,Cache_4046,2.0,2639.5,4.0,Suburban,2.0,2050,Segments_State_20231221_Draft.shp,2.0,1.0243,1.0104,1.0924,1.01735
2938,Cache_4047,2.0,6343.1,4.0,Suburban,2.0,2050,Segments_State_20231221_Draft.shp,2.0,1.0324,1.0107,1.0918,1.02155
2939,Cache_4048,2.0,6490.5,4.0,Suburban,2.0,2050,Segments_State_20231221_Draft.shp,2.0,1.0243,1.0104,1.0924,1.01735
2940,Cache_4056,2.0,1401.5,6.0,Suburban,2.0,2050,Segments_State_20231221_Draft.shp,2.0,1.0243,1.0104,1.0924,1.01735


In [117]:
dfMdlVolWithFac_Filtered_avg = dfMdlVolWithFac_Filtered.groupby(['SUBAREAID','ATYPENAME'],as_index=False).agg(FAC_FAL = ('FAC_FAL','mean'),FAC_SPR = ('FAC_SPR','mean'),FAC_WDAVG = ('FAC_WDAVG','mean'),FAC_SPRFAL = ('FAC_SPRFAL','mean'))
dfMdlVolWithFac_Filtered_avg

Unnamed: 0,SUBAREAID,ATYPENAME,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL
0,0.0,Suburban,1.023206,1.017086,1.073789,1.020146
1,2.0,Suburban,1.023946,1.00903,1.088912,1.016488
2,2.0,Transition,1.021054,0.999425,1.068011,1.010239


In [118]:
dfZeroFacWithAvg = pd.merge(dfZeroFac.drop(columns=['FAC_FAL','FAC_SPR','FAC_WDAVG','FAC_SPRFAL']),dfMdlVolWithFac_Filtered_avg, on=('SUBAREAID','ATYPENAME'),how='left')
dfZeroFacWithAvg

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL
0,1831_000.0,0.0,1952.2,4.0,Suburban,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
1,1831_000.0,0.0,2081.8,4.0,Suburban,1.0,2023,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
2,1831_000.0,0.0,2305.6,4.0,Suburban,1.0,2028,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
3,1831_000.0,0.0,2451.9,4.0,Suburban,1.0,2032,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
4,1831_000.0,0.0,2824.8,4.0,Suburban,1.0,2042,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
5,1831_000.0,0.0,3067.1,4.0,Suburban,1.0,2050,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
6,1180_002.5,2.0,326.7,5.0,Transition,2.0,2019,Segments_State_20231221_Draft.shp,0.0,1.021054,0.999425,1.068011,1.010239
7,1180_002.5,2.0,406.7,5.0,Transition,2.0,2023,Segments_State_20231221_Draft.shp,0.0,1.021054,0.999425,1.068011,1.010239
8,1180_002.5,2.0,670.4,5.0,Transition,2.0,2028,Segments_State_20231221_Draft.shp,0.0,1.021054,0.999425,1.068011,1.010239
9,1180_002.5,2.0,938.5,5.0,Transition,2.0,2032,Segments_State_20231221_Draft.shp,0.0,1.021054,0.999425,1.068011,1.010239


In [119]:
dfMdlVolWithFac = pd.concat([dfZeroFacWithAvg,dfNonZeroFac], ignore_index=True)
dfMdlVolWithFac

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL
0,1831_000.0,0.0,1952.2,4.0,Suburban,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
1,1831_000.0,0.0,2081.8,4.0,Suburban,1.0,2023,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
2,1831_000.0,0.0,2305.6,4.0,Suburban,1.0,2028,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
3,1831_000.0,0.0,2451.9,4.0,Suburban,1.0,2032,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
4,1831_000.0,0.0,2824.8,4.0,Suburban,1.0,2042,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48282,3218_006.4,5.0,49.5,7.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600
48283,UDOT_7006,5.0,4181.0,4.0,Transition,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600
48284,UDOT_7007,5.0,7259.0,4.0,Rural,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600
48285,UDOT_7008,5.0,453.9,4.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600


# drop where overlapping forecasts (same segids in different subareas)

In [120]:
# get segids only
dfSegIds = dfMdlVolWithFac[['SEGID']].drop_duplicates()
display(dfSegIds)

# filter by 
display(dfMdlVolWithFac[['SEGID','SUBAREAID']].drop_duplicates())

Unnamed: 0,SEGID
0,1831_000.0
6,1180_002.5
12,0006_000.0
13,0006_000.7
14,0006_016.0
...,...
47124,3218_006.4
47125,UDOT_7008
47126,UDOT_7043
47817,UDOT_7006


Unnamed: 0,SEGID,SUBAREAID
0,1831_000.0,0.0
6,1180_002.5,2.0
12,0006_000.0,0.0
13,0006_000.7,0.0
14,0006_016.0,0.0
...,...,...
47124,3218_006.4,5.0
47125,UDOT_7008,5.0
47126,UDOT_7043,5.0
47817,UDOT_7006,5.0


In [121]:
# Assuming dfMdlVolWithFac is your initial DataFrame
df_filtered = dfMdlVolWithFac[['SEGID','SUBAREAID','SAID_FAC']].drop_duplicates()

df_filtered.rename(columns={'SUBAREAID':'TDM_SUBAREAID','SAID_FAC':'SEG_SUBAREAID'}, inplace=True)

# Find duplicates based on 'SEGID'
duplicates = df_filtered.duplicated(subset=['SEGID'], keep=False)

# Filter the DataFrame to include only the duplicates
df_duplicates = df_filtered[duplicates]

df_duplicates = df_duplicates.sort_values(by=['SEGID','TDM_SUBAREAID'])

# This will give you a DataFrame with only the rows where 'SEGID' is duplicated
df_duplicates

Unnamed: 0,SEGID,TDM_SUBAREAID,SEG_SUBAREAID
41866,0015_042.2,3.0,5.0
46903,0015_042.2,5.0,5.0
527,0065_002.4,0.0,1.0
13486,0065_002.4,1.0,1.0
1003,0150_029.4,0.0,0.0
44831,0150_029.4,4.0,0.0
1004,0150_030.0,0.0,0.0
44832,0150_030.0,4.0,0.0
1005,0150_033.3,0.0,0.0
44833,0150_033.3,4.0,0.0


In [122]:
# Assuming dfMdlVolWithFac is your initial DataFrame
df_filtered = dfMdlVolWithFac[['SEGID','SUBAREAID','SAID_FAC']].drop_duplicates()

df_filtered.rename(columns={'SUBAREAID':'TDM_SUBAREAID','SAID_FAC':'SEG_SUBAREAID'}, inplace=True)

# Find duplicates based on 'SEGID'
duplicates = df_filtered.duplicated(subset=['SEGID'], keep=False)

# Filter the DataFrame to include only the duplicates
df_duplicates = df_filtered[duplicates]

df_duplicates = df_duplicates.sort_values(by=['SEGID','TDM_SUBAREAID'])

df_duplicates = df_duplicates.set_index(['SEGID'])

df_duplicates = df_duplicates.astype(int)

# This will give you a DataFrame with only the rows where 'SEGID' is duplicated
df_duplicates[df_duplicates['TDM_SUBAREAID']!=df_duplicates['SEG_SUBAREAID']]

Unnamed: 0_level_0,TDM_SUBAREAID,SEG_SUBAREAID
SEGID,Unnamed: 1_level_1,Unnamed: 2_level_1
0015_042.2,3,5
0065_002.4,0,1
0150_029.4,4,0
0150_030.0,4,0
0150_033.3,4,0
1822_000.0,0,1
2865_019.4,4,1


  - 0015_042.2: Remove SEGID from links in Dixie TDM (3)
 - 0065_002.4: Change SUBAREAID on links in USTM from 0 to 1
 - 0150_029.4, 015_030.0, 015_030.3 both segments where route leaves Summit County briefly into the corner of Duchesne County, so segments are not geographically located in SuWs assume should be in SuWs Model... so, seg summary should be updated... probably have to custom write exclusion into code, since SUBAREAID was calculated with spatial join
 - 1822_000.0: Majority of segment is in Utah County... Should split segment at county line, or change SUBAREAID on links in USTM from 0 to 1
 - 2865_019.4: Majority of segment is in Utah County... Should split segment at county line, or change SUBAREAID on links in USTM from 0 to 1


In [123]:
duplicateSegsInTdms

Unnamed: 0,SEGID,removeSUBAREAID
0,0015_042.2,3
1,0065_002.4,0
2,0150_029.4,0
3,0150_030.0,0
4,0150_033.3,0
5,1822_000.0,0
6,2863_000.0,4
7,2865_019.4,4


In [124]:
# Example dataframes
# df1 = pd.DataFrame({'SEGID': [1, 2], 'removeSUBAREAID': [101, 102]})
# dfMdlVolWithFac = pd.DataFrame({'SEGID': [1, 2, 3], 'SUBAREAID': [101, 102, 103], 'OtherColumn': ['A', 'B', 'C']})

# Assuming 'removeSUBAREAID' in df1 matches 'SUBAREAID' in dfMdlVolWithFac, and you want to remove records from dfMdlVolWithFac that have matching 'SEGID' and 'SUBAREAID'
# If 'removeSUBAREAID' is not meant to directly match 'SUBAREAID', adjust the merge accordingly.

# Merge the dataframes on 'SEGID' and 'SUBAREAID'/'removeSUBAREAID', indicating records to remove
merged_df = pd.merge(dfMdlVolWithFac, duplicateSegsInTdms, left_on=['SEGID', 'SUBAREAID'], right_on=['SEGID', 'removeSUBAREAID'], how='left', indicator=True)

# Filter out the records that have a match
filtered_df = merged_df[merged_df['_merge'] == 'left_only']

# Drop the columns added from df1 and the merge indicator to get back to the original dfMdlVolWithFac format
final_df = filtered_df.drop(columns=['removeSUBAREAID', '_merge'])

# final_df will be your dfMdlVolWithFac dataframe with the matched records removed
dfMdlVolWithFacDupRem = final_df
dfMdlVolWithFacDupRem

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL
0,1831_000.0,0.0,1952.2,4.0,Suburban,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
1,1831_000.0,0.0,2081.8,4.0,Suburban,1.0,2023,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
2,1831_000.0,0.0,2305.6,4.0,Suburban,1.0,2028,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
3,1831_000.0,0.0,2451.9,4.0,Suburban,1.0,2032,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
4,1831_000.0,0.0,2824.8,4.0,Suburban,1.0,2042,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48282,3218_006.4,5.0,49.5,7.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600
48283,UDOT_7006,5.0,4181.0,4.0,Transition,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600
48284,UDOT_7007,5.0,7259.0,4.0,Rural,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600
48285,UDOT_7008,5.0,453.9,4.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600


In [125]:
# CHECK TO SEE IF DUPLICATES STILL REMAIN

# Assuming dfMdlVol is your initial DataFrame
df_filtered2 = dfMdlVolWithFacDupRem[['SEGID','SUBAREAID']].drop_duplicates()

# Find duplicates based on 'SEGID'
duplicates2 = df_filtered2.duplicated(subset=['SEGID'], keep=False)

# Filter the DataFrame to include only the duplicates2
df_duplicates2 = df_filtered2[duplicates2]

# This will give you a DataFrame with only the rows where 'SEGID' is duplicated
df_duplicates2

Unnamed: 0,SEGID,SUBAREAID


# Calculate Model AADT

In [126]:
import numpy as np

# Your existing code for merging and filling NA
_df = dfMdlVolWithFacDupRem.copy()

# calculate AADT by subareaid
# divide by colWeekdayFactor
_df.loc[(~(_df['SUBAREAID'].isin(subareaidsThatUseSprFalFac)) & (_df[colFacWdAvg]> 0)), 'modAadt'] = (_df['DY_VOL'] / _df[colFacWdAvg])
_df.loc[(~(_df['SUBAREAID'].isin(subareaidsThatUseSprFalFac)) & (_df[colFacWdAvg]==0)), 'modAadt'] = 0

# divide by average of Spring and Fall only for Summit/Wasatch Model
_df.loc[(_df['SUBAREAID'].isin(subareaidsThatUseSprFalFac) & (_df['FAC_SPRFAL']>0 )), 'modAadt'] = (_df['DY_VOL'] / _df[colFacWdAvg] / _df['FAC_SPRFAL'])
_df.loc[(_df['SUBAREAID'].isin(subareaidsThatUseSprFalFac) & (_df['FAC_SPRFAL']==0)), 'modAadt'] = 0

# Convert to integer as you had before
_df['modAadt'] = _df['modAadt'].round(0).astype(int)

dfModAadt = _df

# Display the DataFrame
display(dfModAadt)

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL,modAadt
0,1831_000.0,0.0,1952.2,4.0,Suburban,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,1818
1,1831_000.0,0.0,2081.8,4.0,Suburban,1.0,2023,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,1939
2,1831_000.0,0.0,2305.6,4.0,Suburban,1.0,2028,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,2147
3,1831_000.0,0.0,2451.9,4.0,Suburban,1.0,2032,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,2283
4,1831_000.0,0.0,2824.8,4.0,Suburban,1.0,2042,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,2631
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48282,3218_006.4,5.0,49.5,7.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600,50
48283,UDOT_7006,5.0,4181.0,4.0,Transition,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600,4249
48284,UDOT_7007,5.0,7259.0,4.0,Rural,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600,7377
48285,UDOT_7008,5.0,453.9,4.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600,461


# Get observed base year volumes

In [127]:
# GET BASE YEAR OBSERVED

# read intermediate CSV with base year observed AADT
_dfAadt = pd.read_csv('intermediate/aadt.csv')

# get SEGID and SUBAREAIDs from AADT dataframe  and merge to the desired source as indicated in dfObsVolSource
_dfSource = pd.merge(dfModAadt[['SEGID','SUBAREAID']].drop_duplicates(),dfObsVolSource,on='SUBAREAID',how='left')

_df = pd.merge(_dfSource, _dfAadt, on=('SEGID','SOURCE'))

# filter by source and year
_df = _df[(_df['YEAR']<=baseYear)]

# get max row of max year for each segid... incase there is no observed 2019 data, but maybe a earlier year
# Then, group by 'SEGID' and find the index of the maximum 'YEAR' for each group
idx = _df.groupby('SEGID')['YEAR'].idxmax()

# Use the index to filter the rows
_df = _df.loc[idx]

_df = _df[['SEGID','AADT']]
_df.rename(columns={'AADT':'obsAadt'}, inplace=True)

dfObsAadt_BaseYear = _df

display(dfObsAadt_BaseYear)

Unnamed: 0,SEGID,obsAadt
37,0006_000.0,415
78,0006_000.7,415
119,0006_016.0,415
160,0006_046.0,372
201,0006_060.2,372
...,...,...
151282,WFRC_8004,6672
151290,WFRC_8010,1112
151310,WFRC_8220,17096
151352,WFRC_8419,18324


In [128]:
# CALCULATE ADJUSTMENT FACTOR FROM BASE YEAR MODEL VS OBSERVED
dfModAadt_BaseYear = dfModAadt[dfModAadt['YEAR']==2019]

_df = pd.DataFrame.merge(dfModAadt_BaseYear[use_fields + ['modAadt']], dfObsAadt_BaseYear[['SEGID','obsAadt']], on='SEGID', how='left')
_df.fillna(0,inplace=True)

_df['aadtAdjFactor'] = _df['obsAadt'] - _df['modAadt']

_df.loc[(_df['obsAadt']==0), 'aadtAdjFactor'] = 0

display(_df[_df['modAadt']==0])

dfModAdjFactor = _df[['SEGID','aadtAdjFactor']]

display(dfModAdjFactor)


Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,modAadt,obsAadt,aadtAdjFactor
188,0021_077.2,0.0,0.0,3.0,Transition,1.0,0,2605.0,2605.0
245,0025_000.0,0.0,0.0,4.0,Rural,1.0,0,179.0,179.0
246,0025_002.9,0.0,0.0,4.0,Rural,1.0,0,179.0,179.0
900,0122_000.0,0.0,0.0,4.0,Rural,1.0,0,77.0,77.0
914,0128_000.0,0.0,0.0,4.0,Rural,1.0,0,1055.0,1055.0
...,...,...,...,...,...,...,...,...,...
7655,1738_002.0,5.0,0.0,7.0,Rural,2.0,0,0.0,0.0
7667,1744_000.0,5.0,0.0,7.0,Rural,2.0,0,0.0,0.0
7671,1746_000.0,5.0,0.0,7.0,Rural,2.0,0,0.0,0.0
7707,1767_000.2,5.0,0.0,4.0,Urban,2.0,0,2732.0,2732.0


Unnamed: 0,SEGID,aadtAdjFactor
0,1831_000.0,0.0
1,1180_002.5,0.0
2,0006_000.0,286.0
3,0006_000.7,198.0
4,0006_016.0,337.0
...,...,...
7773,3217_001.9,-373.0
7774,3217_008.3,-692.0
7775,3218_006.4,0.0
7776,UDOT_7008,0.0


In [129]:
# Function to round based on the given ranges
def custom_rounding(value):
    if 0 <= value < 100:
        return round(value / 10) * 10
    elif 100 <= value < 1000:
        return round(value / 50) * 50
    elif 1000 <= value < 10000:
        return round(value / 100) * 100
    elif 10000 <= value < 100000:
        return round(value / 500) * 500
    elif value >= 100000:
        return round(value / 1000) * 1000
    else:
        return value

In [130]:
# ADJUST ALL MODEL VOLUMES BASED ON BASE YEAR ADJ FACTOR

dfModWithAdj = pd.DataFrame.merge(dfModAadt, dfModAdjFactor, on='SEGID', how='left')
dfModWithAdj['aadtAdjFactor'].fillna(0,inplace=True)
dfModWithAdj['aadtAdjFactor'] = dfModWithAdj['aadtAdjFactor'].astype(int)
dfModWithAdj['modForecast'] = (dfModWithAdj['modAadt'] + dfModWithAdj['aadtAdjFactor']).apply(custom_rounding).astype(int)
dfModWithAdj

# remove 2019 since it is base
#dfModWithAdjNo2019 = dfModWithAdj[dfModWithAdj['YEAR']>2019]

# sort
#dfModWithAdjNo2019 = dfModWithAdjNo2019.sort_values(by=['SEGID', 'YEAR'], ascending=[True, True])

#dfModWithAdjNo2019

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL,modAadt,aadtAdjFactor,modForecast
0,1831_000.0,0.0,1952.2,4.0,Suburban,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,1818,0,1800
1,1831_000.0,0.0,2081.8,4.0,Suburban,1.0,2023,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,1939,0,1900
2,1831_000.0,0.0,2305.6,4.0,Suburban,1.0,2028,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,2147,0,2100
3,1831_000.0,0.0,2451.9,4.0,Suburban,1.0,2032,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,2283,0,2300
4,1831_000.0,0.0,2824.8,4.0,Suburban,1.0,2042,Segments_State_20231221_Draft.shp,0.0,1.023206,1.017086,1.073789,1.020146,2631,0,2600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48234,3218_006.4,5.0,49.5,7.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600,50,0,50
48235,UDOT_7006,5.0,4181.0,4.0,Transition,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600,4249,0,4200
48236,UDOT_7007,5.0,7259.0,4.0,Rural,4.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600,7377,0,7400
48237,UDOT_7008,5.0,453.9,4.0,Rural,2.0,2050,Segments_State_20231221_Draft.shp,5.0,1.031600,1.027600,0.984000,1.029600,461,0,450


In [131]:
# check if base year adjusted equals base year observed

_df1 = dfModWithAdj[dfModWithAdj['YEAR']==baseYear]
_df2 = dfObsAadt_BaseYear

_df3 = pd.DataFrame.merge(_df1, _df2, on='SEGID')

_df3['ModVsObs'] = _df3['modForecast'] - _df3['obsAadt']

display(_df3[_df3['ModVsObs']!=0])


Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL,modAadt,aadtAdjFactor,modForecast,obsAadt,ModVsObs
0,0006_000.0,0.0,126.5,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960,129,286,400,415,-15
1,0006_000.7,0.0,213.6,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960,217,198,400,415,-15
2,0006_016.0,0.0,76.9,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960,78,337,400,415,-15
3,0006_046.0,0.0,76.9,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960,78,294,350,372,-22
4,0006_060.2,0.0,80.3,2.0,Rural,1.0,2019,Segments_State_20231221_Draft.shp,0.0,1.0316,1.0276,0.9840,1.02960,82,290,350,372,-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6858,1788_004.7,5.0,1755.0,4.0,Rural,2.0,2019,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960,1784,207,2000,1991,9
6859,1788_007.8,5.0,1264.5,4.0,Rural,2.0,2019,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960,1285,706,2000,1991,9
6860,1789_000.0,5.0,2377.9,5.0,Rural,2.0,2019,Segments_State_20231221_Draft.shp,5.0,1.0316,1.0276,0.9840,1.02960,2417,-578,1800,1839,-39
6861,1800_000.0,5.0,1139.0,4.0,Rural,2.0,2019,Segments_State_20231221_Draft.shp,5.0,1.1599,0.7083,0.8535,0.93410,1335,-763,550,572,-22


In [132]:
# export final file
dfModWithAdj.to_csv('intermediate/model-forecasts.csv', index=False)

In [133]:
dfModWithAdj[dfModWithAdj['SEGID']=='0085_011.9']

Unnamed: 0,SEGID,SUBAREAID,DY_VOL,FT,ATYPENAME,LANES,YEAR,SOURCE,SAID_FAC,FAC_FAL,FAC_SPR,FAC_WDAVG,FAC_SPRFAL,modAadt,aadtAdjFactor,modForecast
13677,0085_011.9,1.0,44752.2,13.0,Transition,4.0,2019,WFv901_Segments_20240226_Draft.shp,1.0,1.0105,1.0112,1.1568,1.01085,38686,-5538,33000
17780,0085_011.9,1.0,53052.9,13.0,Suburban,4.0,2023,WFv901_Segments_20240226_Draft.shp,1.0,1.0105,1.0112,1.1568,1.01085,45862,-5538,40500
21963,0085_011.9,1.0,62913.6,13.0,Suburban,4.0,2028,WFv901_Segments_20240226_Draft.shp,1.0,1.0105,1.0112,1.1568,1.01085,54386,-5538,49000
26191,0085_011.9,1.0,98869.1,34.0,Suburban,8.0,2032,WFv901_Segments_20240226_Draft.shp,1.0,1.0105,1.0112,1.1568,1.01085,85468,-5538,80000
30528,0085_011.9,1.0,125201.8,34.0,Urban,8.0,2042,WFv901_Segments_20240226_Draft.shp,1.0,1.0105,1.0112,1.1568,1.01085,108231,-5538,103000
34938,0085_011.9,1.0,155509.0,34.0,Urban,12.0,2050,WFv901_Segments_20240226_Draft.shp,1.0,1.0105,1.0112,1.1568,1.01085,134430,-5538,129000
