In [28]:
import pandas as pd
import os
import geopandas as gpd
import re # regular expression
import numpy as np
from sklearn.linear_model import LinearRegression

In [29]:
# GLOBAL VARIABLES
historic_years = [2000, 2021]
future_years = [2023, 2028, 2032, 2042, 2050]

In [35]:
dfProjGroupsLinear = pd.DataFrame([
    ["Last 10 Years"             , 2012, 2021,     {}],
    ["Last 5 Years"              , 2017, 2021,     {}],
    ["Since 1981"                , 1981, 2021,     {}],
    ["Last 10 Years without 2020", 2011, 2021, {2020}],
    ["Last 5 Years without 2020" , 2016, 2021, {2020}],
    ["Since 2000 without 2020"   , 2000, 2021, {2020}],
], columns=('pgName','pgYearFrom','pgYearTo','pgYearsExclude'))

dfProjGroupsLinear.to_csv('intermediate/projection-groups.csv', index=False)

display(dfProjGroupsLinear)

Unnamed: 0,pgName,pgYearFrom,pgYearTo,pgYearsExclude
0,Last 10 Years,2012,2021,{}
1,Last 5 Years,2017,2021,{}
2,Since 1981,1981,2021,{}
3,Last 10 Years without 2020,2011,2021,{2020}
4,Last 5 Years without 2020,2016,2021,{2020}
5,Since 2000 without 2020,2000,2021,{2020}


In [31]:
# export AADT sources
dfAadt = pd.read_csv('intermediate/aadt.csv')
dfAadt

Unnamed: 0,SEGID,YEAR,AADT,SOURCE
0,0006_146.9,2019,1517,Segments_WF - 2023-08-01.shp
1,0006_149.9,2019,2441,Segments_WF - 2023-08-01.shp
2,0006_150.6,2019,2441,Segments_WF - 2023-08-01.shp
3,0006_152.6,2019,2417,Segments_WF - 2023-08-01.shp
4,0006_152.9,2019,3759,Segments_WF - 2023-08-01.shp
...,...,...,...,...
159921,3465_000.0,2021,493,AADTHistory.xlsx
159922,3466_000.0,2021,7681,AADTHistory.xlsx
159923,3466_000.1,2021,7681,AADTHistory.xlsx
159924,3466_000.3,2021,7681,AADTHistory.xlsx


In [37]:
# Linear forecasts with assist from ChatGPT?
# https://chat.openai.com/share/d127492a-ad78-4f45-afd0-50e29069db1a

# Initialize a list to store the individual result DataFrames
forecast_results_list = []

# Open the error file
with open('intermediate/linear-forecasts.txt', 'w') as err_file:
    # Loop through the projection groups
    for index, row in dfProjGroupsLinear.iterrows():
        pgName = row['pgName']
        pgYearFrom = row['pgYearFrom']
        pgYearTo = row['pgYearTo']
        pgYearsExclude = set(row['pgYearsExclude'])
        
        # Group by SEGID and SOURCE and iterate through the groups
        for (segid, source), group in dfAadt.groupby(['SEGID', 'SOURCE']):
            
            # Filter the data according to the projection group criteria
            filtered_group = group[(group['YEAR'] >= pgYearFrom) & (group['YEAR'] <= pgYearTo)]
            filtered_group = filtered_group[~filtered_group['YEAR'].isin(pgYearsExclude)]
            
            # Check if the filtered group is empty
            if filtered_group.shape[0] == 0:
                error_msg = f"Skipping empty group for SEGID: {segid}, SOURCE: {source}, Projection Group: {pgName}"
                print (error_msg)
                err_file.write(error_msg + f"\n")
                continue

            X = filtered_group['YEAR'].values.reshape(-1, 1)
            y = filtered_group['AADT'].values

            model = LinearRegression()
            model.fit(X, y)

            # Predict for the specified future years
            aadt = model.predict(np.array(historic_years + future_years).reshape(-1, 1))

            # Round the forecasted values to the nearest integers
            aadt = np.rint(aadt).astype(int)

            # Create a dictionary to store results for this group
            result_dict = {'SEGID': segid, 'SOURCE': source, 'Projection Group': pgName}
            result_dict.update({year: aadt for year, aadt in zip(historic_years + future_years, aadt)})

            # Convert the dictionary to a DataFrame and add to the list
            result_df = pd.DataFrame([result_dict])
            forecast_results_list.append(result_df)

# Concatenate all the individual result DataFrames
forecast_results = pd.concat(forecast_results_list, ignore_index=True)

# Now forecast_results contains the forecasts for the specified future years, along with SEGID, SOURCE, and Projection Group
display(forecast_results)


Skipping empty group for SEGID: WFRC_8221, SOURCE: Segments_WF - 2023-08-01.shp, Projection Group: Last 5 Years
Skipping empty group for SEGID: WFRC_8222, SOURCE: Segments_WF - 2023-08-01.shp, Projection Group: Last 5 Years
Skipping empty group for SEGID: WFRC_8223, SOURCE: Segments_WF - 2023-08-01.shp, Projection Group: Last 5 Years
Skipping empty group for SEGID: WFRC_8221, SOURCE: Segments_WF - 2023-08-01.shp, Projection Group: Last 5 Years without 2020
Skipping empty group for SEGID: WFRC_8222, SOURCE: Segments_WF - 2023-08-01.shp, Projection Group: Last 5 Years without 2020
Skipping empty group for SEGID: WFRC_8223, SOURCE: Segments_WF - 2023-08-01.shp, Projection Group: Last 5 Years without 2020


Unnamed: 0,SEGID,SOURCE,Projection Group,2000,2021,2023,2028,2032,2042,2050
0,0006_146.9,AADTHistory.xlsx,Last 10 Years,686,1667,1760,1994,2181,2648,3022
1,0006_146.9,Segments_WF - 2023-08-01.shp,Last 10 Years,904,1569,1633,1791,1918,2235,2488
2,0006_149.9,AADTHistory.xlsx,Last 10 Years,1817,2676,2758,2963,3126,3536,3863
3,0006_149.9,Segments_WF - 2023-08-01.shp,Last 10 Years,2141,2532,2569,2662,2737,2923,3072
4,0006_150.6,AADTHistory.xlsx,Last 10 Years,1817,2676,2758,2963,3126,3536,3863
...,...,...,...,...,...,...,...,...,...,...
44341,WFRC_8221,Segments_WF - 2023-08-01.shp,Since 2000 without 2020,3,3,3,3,3,3,3
44342,WFRC_8222,Segments_WF - 2023-08-01.shp,Since 2000 without 2020,3,3,3,3,3,3,3
44343,WFRC_8223,Segments_WF - 2023-08-01.shp,Since 2000 without 2020,3,3,3,3,3,3,3
44344,WFRC_8262,Segments_WF - 2023-08-01.shp,Since 2000 without 2020,13610,8859,8406,7275,6370,4107,2297


In [38]:
# export csv
forecast_results.to_csv('intermediate/linear-forecasts.csv')