In [6]:
import pandas as pd
import os
import geopandas as gpd
import re # regular expression
import numpy as np
from sklearn.linear_model import LinearRegression

In [7]:
# don't update these that have been locked
lstLockPlanArea = ['WFRC', 'MAG', 'Summit', 'Iron', 'Dixie', 'Cache']

# GLOBAL VARIABLES
future_years = [2023, 2028, 2032, 2042, 2050]

In [None]:
dfProjGroupsLinear = pd.DataFrame([
#   ["Since 2012"         , 2012, 2022,     {}, "#FF0000", True ,  [5,5], 0.25],
#   ["Since 2017"         , 2017, 2022,     {}, "#00FF00", True ,  [5,5], 0.25],
    ["Since 1981"         , 1981, 2023,     {}, "#6948F4", False,  [5,5], 1.50],
    ["Since 2011 w/o 2020", 2011, 2023, {2020}, "#FF0000", False,  [5,5], 0.50],
#   ["Since 2016 w/o 2020", 2016, 2022, {2020}, "#00FF00", True , [10,5], 0.50],
    ["Since 2001 w/o 2020", 2001, 2023, {2020}, "#F28705", False, [10,5], 0.50]
], columns=('pgName','pgYearFrom','pgYearTo','pgYearsExclude','pgColor','pgHidden','pgBorderDash','pgBorderWidth'))

dfProjGroupsLinear.to_csv('intermediate/projection-groups.csv', index=False)

display(dfProjGroupsLinear)

Unnamed: 0,pgName,pgYearFrom,pgYearTo,pgYearsExclude,pgColor,pgHidden,pgBorderDash,pgBorderWidth
0,Since 1981,1981,2023,{},#6948F4,False,"[5, 5]",1.5
1,Since 2011 w/o 2020,2011,2023,{2020},#FF0000,False,"[5, 5]",0.5
2,Since 2001 w/o 2020,2001,2023,{2020},#F28705,False,"[10, 5]",0.5


In [9]:
# export AADT sources
dfAadt = pd.read_csv('intermediate/aadt.csv')
dfAadt

Unnamed: 0,SOURCE,SEGID,YEAR,AADT,PLANAREA,F_AREA
0,Segments_State_20231221_Draft.shp,0013_000.0,2019,19514,WFRC,WFRC
1,WF_Segments_20240326_Draft.shp,0013_000.0,1983,11780,WFRC,WFRC
2,WF_Segments_20240326_Draft.shp,0013_000.0,1984,11910,WFRC,WFRC
3,WF_Segments_20240326_Draft.shp,0013_000.0,1985,12035,WFRC,WFRC
4,WF_Segments_20240326_Draft.shp,0013_000.0,1986,12075,WFRC,WFRC
...,...,...,...,...,...,...
317889,AADTHistory_2023.xlsx,3483_000.0,2020,1041,UDOT,UDOT
317890,AADTHistory_2023.xlsx,3483_000.0,2021,1116,UDOT,UDOT
317891,AADTHistory_2023.xlsx,3483_000.0,2022,1104,UDOT,UDOT
317892,AADTHistory_2023.xlsx,3483_000.0,2023,1151,UDOT,UDOT


In [10]:
# Linear forecasts with assist from ChatGPT
# https://chat.openai.com/share/d127492a-ad78-4f45-afd0-50e29069db1a

# Initialize a list to store the individual result DataFrames
forecast_results_list = []

# Open the error file
with open('intermediate/linear-forecasts-errors.txt', 'w') as err_file:
    # Loop through the projection groups
    for index, row in dfProjGroupsLinear.iterrows():
        pgName = row['pgName']
        pgYearFrom = row['pgYearFrom']
        pgYearTo = row['pgYearTo']
        pgYearsExclude = set(row['pgYearsExclude'])
        
        display(pgName)

        # Group by SEGID and SOURCE and iterate through the groups
        for (segid, source, planarea, f_area), group in dfAadt.groupby(['SEGID', 'SOURCE', 'PLANAREA', 'F_AREA']):
            
            # Filter the data according to the projection group criteria
            filtered_group = group[(group['YEAR'] >= pgYearFrom) & (group['YEAR'] <= pgYearTo)]
            filtered_group = filtered_group[~filtered_group['YEAR'].isin(pgYearsExclude)]
            
            # Check if the filtered group is empty
            if filtered_group.shape[0] == 0:
                error_msg = f"Skipping empty group for SEGID: {segid}, SOURCE: {source}, Projection Group: {pgName}"
                print (error_msg)
                err_file.write(error_msg + f"\n")
                continue

            X = filtered_group['YEAR'].values.reshape(-1, 1)
            y = filtered_group['AADT'].values

            model = LinearRegression()
            model.fit(X, y)

            
            # Predict for the specified future years
            aadt = model.predict(np.array([pgYearFrom] + future_years).reshape(-1, 1))

            # Round the forecasted values to the nearest integers
            aadt = np.rint(aadt).astype(int)

            # Create a dictionary to store results for this group
            result_dict = {'SEGID': segid, 'SOURCE': source,  'PLANAREA': planarea,  'F_AREA': f_area, 'PROJGRP': pgName}
            result_dict.update({year: aadt for year, aadt in zip([pgYearFrom] + future_years, aadt)})

            # Convert the dictionary to a DataFrame and add to the list
            result_df = pd.DataFrame([result_dict])

            result_df_melt = result_df.melt(id_vars=['SEGID','PLANAREA','F_AREA','SOURCE','PROJGRP'],var_name="YEAR", value_name="linForecast")

            forecast_results_list.append(result_df_melt)

# Concatenate all the individual result DataFrames
forecast_results = pd.concat(forecast_results_list, ignore_index=True)

# Now forecast_results contains the forecasts for the specified future years, along with SEGID, SOURCE, and Projection Group
display(forecast_results)


'Since 1981'

'Since 2011 w/o 2020'

'Since 2001 w/o 2020'

Unnamed: 0,SEGID,PLANAREA,F_AREA,SOURCE,PROJGRP,YEAR,linForecast
0,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,1981,457
1,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2023,373
2,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2028,363
3,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2032,355
4,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2042,335
...,...,...,...,...,...,...,...
344461,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2023,27464
344462,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2028,29782
344463,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2032,31636
344464,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2042,36272


In [11]:
# merge locked with unlocked

if len(lstLockPlanArea):
    # export final file
    forecast_results_Prev = pd.read_csv('intermediate/linear-forecasts.csv')
    forecast_results_Locked = forecast_results_Prev[forecast_results_Prev['PLANAREA'].isin(lstLockPlanArea)].copy()

    forecast_results_Unlocked = forecast_results[~forecast_results['PLANAREA'].isin(lstLockPlanArea)].copy()

    # Concatenate the locked and unlocked DataFrames
    forecast_results_Merged = pd.concat([forecast_results_Locked, forecast_results_Unlocked])

    # Reset the index of the final DataFrame, if needed
    forecast_results_Merged.reset_index(drop=True, inplace=True)

else:
    forecast_results_Merged = forecast_results

forecast_results_Merged

Unnamed: 0,SEGID,PLANAREA,F_AREA,SOURCE,PROJGRP,YEAR,linForecast
0,0006_141.0,MAG,MAG,WF_Segments_20240326_Draft.shp,Since 1981,1981,913
1,0006_141.0,MAG,MAG,WF_Segments_20240326_Draft.shp,Since 1981,2023,1587
2,0006_141.0,MAG,MAG,WF_Segments_20240326_Draft.shp,Since 1981,2028,1668
3,0006_141.0,MAG,MAG,WF_Segments_20240326_Draft.shp,Since 1981,2032,1732
4,0006_141.0,MAG,MAG,WF_Segments_20240326_Draft.shp,Since 1981,2042,1892
...,...,...,...,...,...,...,...
350923,3483_000.0,UDOT,UDOT,Segments_State_20231221_Draft.shp,Since 2001 w/o 2020,2023,1042
350924,3483_000.0,UDOT,UDOT,Segments_State_20231221_Draft.shp,Since 2001 w/o 2020,2028,1042
350925,3483_000.0,UDOT,UDOT,Segments_State_20231221_Draft.shp,Since 2001 w/o 2020,2032,1042
350926,3483_000.0,UDOT,UDOT,Segments_State_20231221_Draft.shp,Since 2001 w/o 2020,2042,1042


In [12]:
# export csv
forecast_results.to_csv('intermediate/linear-forecasts.csv', index=False)

In [13]:
# check
forecast_results[forecast_results['PROJGRP']=='Since 1981']

Unnamed: 0,SEGID,PLANAREA,F_AREA,SOURCE,PROJGRP,YEAR,linForecast
0,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,1981,457
1,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2023,373
2,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2028,363
3,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2032,355
4,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2042,335
...,...,...,...,...,...,...,...
114817,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 1981,2023,29139
114818,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 1981,2028,32546
114819,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 1981,2032,35271
114820,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 1981,2042,42085
