In [7]:
import pandas as pd
import os
import geopandas as gpd
import re # regular expression
import numpy as np
from sklearn.linear_model import LinearRegression

In [8]:
# don't update these that have been locked
lstLockPlanArea = ['WFRC']

# GLOBAL VARIABLES
future_years = [2023, 2028, 2032, 2042, 2050]

In [9]:
dfProjGroupsLinear = pd.DataFrame([
#    ["Since 2012"         , 2012, 2022,     {}, "#FF0000", True ,  [5,5], 0.25],
#    ["Since 2017"         , 2017, 2022,     {}, "#00FF00", True ,  [5,5], 0.25],
    ["Since 1981"         , 1981, 2022,     {}, "#6948F4", False,  [5,5], 1.50],
    ["Since 2011 w/o 2020", 2011, 2022, {2020}, "#FF0000", False,  [5,5], 0.50],
#    ["Since 2016 w/o 2020", 2016, 2022, {2020}, "#00FF00", True , [10,5], 0.50],
    ["Since 2001 w/o 2020", 2001, 2022, {2020}, "#F28705", False , [10,5], 0.50]
], columns=('pgName','pgYearFrom','pgYearTo','pgYearsExclude','pgColor','pgHidden','pgBorderDash','pgBorderWidth'))

dfProjGroupsLinear.to_csv('intermediate/projection-groups.csv', index=False)

display(dfProjGroupsLinear)

Unnamed: 0,pgName,pgYearFrom,pgYearTo,pgYearsExclude,pgColor,pgHidden,pgBorderDash,pgBorderWidth
0,Since 1981,1981,2022,{},#6948F4,False,"[5, 5]",1.5
1,Since 2011 w/o 2020,2011,2022,{2020},#FF0000,False,"[5, 5]",0.5
2,Since 2001 w/o 2020,2001,2022,{2020},#F28705,False,"[10, 5]",0.5


In [10]:
# export AADT sources
dfAadt = pd.read_csv('intermediate/aadt.csv')
dfAadt

Unnamed: 0,SOURCE,SEGID,PLANAREA,F_AREA,YEAR,AADT
0,Segments_State_20231221_Draft.shp,0013_000.0,WFRC,WFRC,2019,19514
1,WF_Segments_20240326_Draft.shp,0013_000.0,WFRC,WFRC,1983,11780
2,WF_Segments_20240326_Draft.shp,0013_000.0,WFRC,WFRC,1984,11910
3,WF_Segments_20240326_Draft.shp,0013_000.0,WFRC,WFRC,1985,12035
4,WF_Segments_20240326_Draft.shp,0013_000.0,WFRC,WFRC,1986,12075
...,...,...,...,...,...,...
315373,WF_Segments_20240326_Draft.shp,MAG_6604,MAG,MAG,2018,500
315374,WF_Segments_20240326_Draft.shp,MAG_6604,MAG,MAG,2019,500
315375,WF_Segments_20240326_Draft.shp,MAG_6604,MAG,MAG,2020,500
315376,WF_Segments_20240326_Draft.shp,MAG_6604,MAG,MAG,2021,500


In [11]:
# Linear forecasts with assist from ChatGPT
# https://chat.openai.com/share/d127492a-ad78-4f45-afd0-50e29069db1a

# Initialize a list to store the individual result DataFrames
forecast_results_list = []

# Open the error file
with open('intermediate/linear-forecasts-errors.txt', 'w') as err_file:
    # Loop through the projection groups
    for index, row in dfProjGroupsLinear.iterrows():
        pgName = row['pgName']
        pgYearFrom = row['pgYearFrom']
        pgYearTo = row['pgYearTo']
        pgYearsExclude = set(row['pgYearsExclude'])
        
        display(pgName)

        # Group by SEGID and SOURCE and iterate through the groups
        for (segid, source, planarea, f_area), group in dfAadt.groupby(['SEGID', 'SOURCE', 'PLANAREA', 'F_AREA']):
            
            # Filter the data according to the projection group criteria
            filtered_group = group[(group['YEAR'] >= pgYearFrom) & (group['YEAR'] <= pgYearTo)]
            filtered_group = filtered_group[~filtered_group['YEAR'].isin(pgYearsExclude)]
            
            # Check if the filtered group is empty
            if filtered_group.shape[0] == 0:
                error_msg = f"Skipping empty group for SEGID: {segid}, SOURCE: {source}, Projection Group: {pgName}"
                print (error_msg)
                err_file.write(error_msg + f"\n")
                continue

            X = filtered_group['YEAR'].values.reshape(-1, 1)
            y = filtered_group['AADT'].values

            model = LinearRegression()
            model.fit(X, y)

            
            # Predict for the specified future years
            aadt = model.predict(np.array([pgYearFrom] + future_years).reshape(-1, 1))

            # Round the forecasted values to the nearest integers
            aadt = np.rint(aadt).astype(int)

            # Create a dictionary to store results for this group
            result_dict = {'SEGID': segid, 'SOURCE': source,  'PLANAREA': planarea,  'F_AREA': f_area, 'PROJGRP': pgName}
            result_dict.update({year: aadt for year, aadt in zip([pgYearFrom] + future_years, aadt)})

            # Convert the dictionary to a DataFrame and add to the list
            result_df = pd.DataFrame([result_dict])

            result_df_melt = result_df.melt(id_vars=['SEGID','PLANAREA','F_AREA','SOURCE','PROJGRP'],var_name="YEAR", value_name="linForecast")

            forecast_results_list.append(result_df_melt)

# Concatenate all the individual result DataFrames
forecast_results = pd.concat(forecast_results_list, ignore_index=True)

# Now forecast_results contains the forecasts for the specified future years, along with SEGID, SOURCE, and Projection Group
display(forecast_results)


'Since 1981'

'Since 2011 w/o 2020'

'Since 2001 w/o 2020'

Unnamed: 0,SEGID,PLANAREA,F_AREA,SOURCE,PROJGRP,YEAR,linForecast
0,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,1981,461
1,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,2023,365
2,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,2028,353
3,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,2032,344
4,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,2042,321
...,...,...,...,...,...,...,...
349339,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2023,27464
349340,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2028,29782
349341,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2032,31636
349342,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2042,36272


In [12]:
# merge locked with unlocked

if len(lstLockPlanArea):
    # export final file
    forecast_results_Prev = pd.read_csv('intermediate/factors.csv')
    forecast_results_Locked = forecast_results_Prev[forecast_results_Prev['PLANAREA'].isin(lstLockPlanArea)].copy()

    forecast_results_Unlocked = forecast_results[~forecast_results['PLANAREA'].isin(lstLockPlanArea)].copy()

    # Concatenate the locked and unlocked DataFrames
    forecast_results_Merged = pd.concat([forecast_results_Locked, forecast_results_Unlocked])

    # Reset the index of the final DataFrame, if needed
    forecast_results_Merged.reset_index(drop=True, inplace=True)

else:
    forecast_results_Merged = forecast_results

forecast_results_Merged

Unnamed: 0,SOURCE,SEGID,SAID_FAC,PLANAREA,F_AREA,FACMANADJ,FAC_APR,FAC_AUG,FAC_DEC,FAC_FAL,...,FAC_THU,FAC_TUE,FAC_WDAVG,FAC_WEAVG,FAC_WED,FAC_WEMAX,FAC_WIN,PROJGRP,YEAR,linForecast
0,Segments_State_20231221_Draft.shp,0013_000.0,1.0,WFRC,WFRC,0.0,1.0146,1.0396,0.9616,1.0243,...,1.1071,1.0813,1.0924,0.7653,1.0887,0.9245,0.9370,,,
1,Segments_State_20231221_Draft.shp,0013_000.6,1.0,WFRC,WFRC,0.0,1.0146,1.0396,0.9616,1.0243,...,1.1071,1.0813,1.0924,0.7653,1.0887,0.9245,0.9370,,,
2,Segments_State_20231221_Draft.shp,0013_001.3,1.0,WFRC,WFRC,0.0,1.0142,1.0223,0.9907,1.0095,...,1.1063,1.0851,1.0946,0.7583,1.0925,0.8873,0.9653,,,
3,Segments_State_20231221_Draft.shp,0013_001.6,1.0,WFRC,WFRC,0.0,1.0142,1.0223,0.9907,1.0095,...,1.1063,1.0851,1.0946,0.7583,1.0925,0.8873,0.9653,,,
4,Segments_State_20231221_Draft.shp,0013_001.8,1.0,WFRC,WFRC,0.0,1.0146,1.0396,0.9616,1.0243,...,1.1071,1.0813,1.0924,0.7653,1.0887,0.9245,0.9370,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215001,WF_Segments_20240326_Draft.shp,MAG_6604,,MAG,MAG,,,,,,...,,,,,,,,Since 2001 w/o 2020,2023,504.0
215002,WF_Segments_20240326_Draft.shp,MAG_6604,,MAG,MAG,,,,,,...,,,,,,,,Since 2001 w/o 2020,2028,508.0
215003,WF_Segments_20240326_Draft.shp,MAG_6604,,MAG,MAG,,,,,,...,,,,,,,,Since 2001 w/o 2020,2032,511.0
215004,WF_Segments_20240326_Draft.shp,MAG_6604,,MAG,MAG,,,,,,...,,,,,,,,Since 2001 w/o 2020,2042,518.0


In [13]:
# export csv
forecast_results.to_csv('intermediate/linear-forecasts.csv', index=False)

In [14]:
# check
forecast_results[forecast_results['PROJGRP']=='Since 1981']

Unnamed: 0,SEGID,PLANAREA,F_AREA,SOURCE,PROJGRP,YEAR,linForecast
0,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,1981,461
1,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,2023,365
2,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,2028,353
3,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,2032,344
4,0006_000.0,UDOT,UDOT,AADTHistory.xlsx,Since 1981,2042,321
...,...,...,...,...,...,...,...
116443,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 1981,2023,29139
116444,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 1981,2028,32546
116445,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 1981,2032,35271
116446,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 1981,2042,42085
