In [1]:
import pandas as pd
import os
import geopandas as gpd
import re # regular expression
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
# GLOBAL VARIABLES
future_years = [2023, 2028, 2032, 2042, 2050]

In [3]:
dfProjGroupsLinear = pd.DataFrame([
#    ["Since 2012"         , 2012, 2021,     {}, "#FF0000", True ,  [5,5], 0.25],
#    ["Since 2017"         , 2017, 2021,     {}, "#00FF00", True ,  [5,5], 0.25],
    ["Since 1981"         , 1981, 2021,     {}, "#6948F4", False,  [5,5], 1.50],
    ["Since 2011 w/o 2020", 2011, 2021, {2020}, "#FF0000", False,  [5,5], 0.50],
#    ["Since 2016 w/o 2020", 2016, 2021, {2020}, "#00FF00", True , [10,5], 0.50],
    ["Since 2001 w/o 2020", 2001, 2021, {2020}, "#F28705", False , [10,5], 0.50]
], columns=('pgName','pgYearFrom','pgYearTo','pgYearsExclude','pgColor','pgHidden','pgBorderDash','pgBorderWidth'))

dfProjGroupsLinear.to_csv('intermediate/projection-groups.csv', index=False)

display(dfProjGroupsLinear)

Unnamed: 0,pgName,pgYearFrom,pgYearTo,pgYearsExclude,pgColor,pgHidden,pgBorderDash,pgBorderWidth
0,Since 1981,1981,2021,{},#6948F4,False,"[5, 5]",1.5
1,Since 2011 w/o 2020,2011,2021,{2020},#FF0000,False,"[5, 5]",0.5
2,Since 2001 w/o 2020,2001,2021,{2020},#F28705,False,"[10, 5]",0.5


In [4]:
# export AADT sources
dfAadt = pd.read_csv('intermediate/aadt.csv')
dfAadt

Unnamed: 0,SEGID,YEAR,AADT,SOURCE
0,0006_000.0,1981,325,AADTHistory.xlsx
1,0006_000.0,1982,335,AADTHistory.xlsx
2,0006_000.0,1983,430,AADTHistory.xlsx
3,0006_000.0,1984,580,AADTHistory.xlsx
4,0006_000.0,1985,585,AADTHistory.xlsx
...,...,...,...,...
178372,3483_000.0,2017,990,AADTHistory.xlsx
178373,3483_000.0,2018,1030,AADTHistory.xlsx
178374,3483_000.0,2019,1042,AADTHistory.xlsx
178375,3483_000.0,2020,1041,AADTHistory.xlsx


In [5]:
# Linear forecasts with assist from ChatGPT
# https://chat.openai.com/share/d127492a-ad78-4f45-afd0-50e29069db1a

# Initialize a list to store the individual result DataFrames
forecast_results_list = []

# Open the error file
with open('intermediate/linear-forecasts-errors.txt', 'w') as err_file:
    # Loop through the projection groups
    for index, row in dfProjGroupsLinear.iterrows():
        pgName = row['pgName']
        pgYearFrom = row['pgYearFrom']
        pgYearTo = row['pgYearTo']
        pgYearsExclude = set(row['pgYearsExclude'])
        
        display(pgName)

        # Group by SEGID and SOURCE and iterate through the groups
        for (segid, source), group in dfAadt.groupby(['SEGID', 'SOURCE']):
            
            # Filter the data according to the projection group criteria
            filtered_group = group[(group['YEAR'] >= pgYearFrom) & (group['YEAR'] <= pgYearTo)]
            filtered_group = filtered_group[~filtered_group['YEAR'].isin(pgYearsExclude)]
            
            # Check if the filtered group is empty
            if filtered_group.shape[0] == 0:
                error_msg = f"Skipping empty group for SEGID: {segid}, SOURCE: {source}, Projection Group: {pgName}"
                print (error_msg)
                err_file.write(error_msg + f"\n")
                continue

            X = filtered_group['YEAR'].values.reshape(-1, 1)
            y = filtered_group['AADT'].values

            model = LinearRegression()
            model.fit(X, y)

            
            # Predict for the specified future years
            aadt = model.predict(np.array([pgYearFrom] + future_years).reshape(-1, 1))

            # Round the forecasted values to the nearest integers
            aadt = np.rint(aadt).astype(int)

            # Create a dictionary to store results for this group
            result_dict = {'SEGID': segid, 'SOURCE': source, 'PROJGRP': pgName}
            result_dict.update({year: aadt for year, aadt in zip([pgYearFrom] + future_years, aadt)})

            # Convert the dictionary to a DataFrame and add to the list
            result_df = pd.DataFrame([result_dict])

            result_df_melt = result_df.melt(id_vars=['SEGID','SOURCE','PROJGRP'],var_name="YEAR", value_name="linForecast")

            forecast_results_list.append(result_df_melt)

# Concatenate all the individual result DataFrames
forecast_results = pd.concat(forecast_results_list, ignore_index=True)

# Now forecast_results contains the forecasts for the specified future years, along with SEGID, SOURCE, and Projection Group
display(forecast_results)


'Since 1981'

'Since 2011 w/o 2020'

'Since 2001 w/o 2020'

Unnamed: 0,SEGID,SOURCE,PROJGRP,YEAR,linForecast
0,0006_000.0,AADTHistory.xlsx,Since 1981,1981,465
1,0006_000.0,AADTHistory.xlsx,Since 1981,2023,357
2,0006_000.0,AADTHistory.xlsx,Since 1981,2028,344
3,0006_000.0,AADTHistory.xlsx,Since 1981,2032,334
4,0006_000.0,AADTHistory.xlsx,Since 1981,2042,308
...,...,...,...,...,...
127957,3483_000.0,AADTHistory.xlsx,Since 2001 w/o 2020,2023,1280
127958,3483_000.0,AADTHistory.xlsx,Since 2001 w/o 2020,2028,1527
127959,3483_000.0,AADTHistory.xlsx,Since 2001 w/o 2020,2032,1725
127960,3483_000.0,AADTHistory.xlsx,Since 2001 w/o 2020,2042,2219


In [6]:
# export csv
forecast_results.to_csv('intermediate/linear-forecasts.csv', index=False)

In [7]:
# check
forecast_results[forecast_results['PROJGRP']=='Since 1981']

Unnamed: 0,SEGID,SOURCE,PROJGRP,YEAR,linForecast
0,0006_000.0,AADTHistory.xlsx,Since 1981,1981,465
1,0006_000.0,AADTHistory.xlsx,Since 1981,2023,357
2,0006_000.0,AADTHistory.xlsx,Since 1981,2028,344
3,0006_000.0,AADTHistory.xlsx,Since 1981,2032,334
4,0006_000.0,AADTHistory.xlsx,Since 1981,2042,308
...,...,...,...,...,...
42649,3483_000.0,AADTHistory.xlsx,Since 1981,2023,1257
42650,3483_000.0,AADTHistory.xlsx,Since 1981,2028,1494
42651,3483_000.0,AADTHistory.xlsx,Since 1981,2032,1684
42652,3483_000.0,AADTHistory.xlsx,Since 1981,2042,2158
