In [1]:
import pandas as pd
from arcgis import GIS
gis = GIS()
import requests
import arcpy

bRedownloadLatest = True # will download feature class from agol, otherwise will use last download
bExportFeatureClass = True # export feature class
bClearFlags = True # will clear all flag fields from backup
bClearOverrides = False # will clear all override fields from backup

In [3]:
# don't update these that have been locked
lstLockForecastArea = ['WFRC']

dfSegmentSource = pd.DataFrame([
    ['Segments_State_20231221_Draft.shp' , [0,2,3,4,5]],
    ['WF_Segments_20240326_Draft.shp', [1]        ]
],columns=('SOURCE','SUBAREAID'))

dfSegmentSource = dfSegmentSource.explode('SUBAREAID')

seg_cols = ['SEGID','CO_FIPS','PLANAREA','SUBAREAID','F_AREA','SHAPE']

# get observed volume source (SHOULD ONLY HAVE ONE SOURCE PER SUBAREA)
dfObsVolSource = pd.read_csv('intermediate/obsvolsource.csv')
dfObsVolSource

Unnamed: 0,SOURCE,SUBAREAID
0,AADTHistory_2023.xlsx,0
1,AADTHistory.xlsx,2
2,AADTHistory.xlsx,3
3,AADTHistory.xlsx,4
4,AADTHistory.xlsx,5
5,WF_Segments_20240326_Draft.shp,1


In [4]:
# Prep flag categories

# flag name must be short enough to be in dbf column name
dfFlags = pd.DataFrame([
    ['FL_REV'   , 'Not reviewed'                    , "([NOTES]=='') & ([NOTES_FURR]=='')"],
    ['FL_LTPRV' , 'Less than previous forecast year', "(([MF2023]+[ADJ2023])<([M2019]+[aadtAdjFac])) | (([MF2028]+[ADJ2028])<([MF2023]+[ADJ2023])) | (([MF2032]+[ADJ2032])<([MF2028]+[ADJ2028])) | (([MF2042]+[ADJ2042])<([MF2032]+[ADJ2032])) | (([MF2050]+[ADJ2050])<([MF2042]+[ADJ2042]))"],
    ['FL_ZERO'  , 'Zero volume'                     , "(([MF2023]+[ADJ2023])<=0) | (([MF2028]+[ADJ2028])<=0) | (([MF2032]+[ADJ2032])<=0) | (([MF2042]+[ADJ2042])<=0) | (([MF2050]+[ADJ2050])<=0)"],
    ['FL_HIADJ' , 'Large model adjustment factor '  , "((([aadtAdjFac]/([M2019]+[aadtAdjFac]))>0.95) | (([aadtAdjFac]/([M2019]+[aadtAdjFac]))<-0.95))"],
    ['FL_50LT19', 'HPMS: 2050 < 2019'               , "(([M2019]+[aadtAdjFac])>0) & (([MF2050]+[ADJ2050])<([M2019]+[aadtAdjFac]))"],
    ['FL_503X19', 'HPMS: 2050 > 3 x 2019'           , "(([M2019]+[aadtAdjFac])>0) & (([MF2050]+[ADJ2050])>(3*([M2019]+[aadtAdjFac])))"],
    ['FL_FURREV', 'Further Review'                  , "([NOTES_FURR]!='')"],
    ['FL_SEG'   , 'Segment Note'                    , "([NOTES_SEG]!='')"]
], columns=('flagName','flagDescription','flagCriteria'))

dfFlags.to_json('_site-art-of-forecasting/data/flags.json', orient='records')

In [5]:
# CO_FIPS
county_mapping = {
     1: {'CO_NAME': 'Beaver'    },
     3: {'CO_NAME': 'Box Elder' },
     5: {'CO_NAME': 'Cache'     },
     7: {'CO_NAME': 'Carbon'    },
     9: {'CO_NAME': 'Daggett'   },
    11: {'CO_NAME': 'Davis'     },
    13: {'CO_NAME': 'Duchesne'  },
    15: {'CO_NAME': 'Emery'     },
    17: {'CO_NAME': 'Garfield'  },
    19: {'CO_NAME': 'Grand'     },
    21: {'CO_NAME': 'Iron'      },
    23: {'CO_NAME': 'Juab'      },
    25: {'CO_NAME': 'Kane'      },
    27: {'CO_NAME': 'Millard'   },
    29: {'CO_NAME': 'Morgan'    },
    31: {'CO_NAME': 'Piute'     },
    33: {'CO_NAME': 'Rich'      },
    35: {'CO_NAME': 'Salt Lake' },
    37: {'CO_NAME': 'San Juan'  },
    39: {'CO_NAME': 'Sanpete'   },
    41: {'CO_NAME': 'Sevier'    },
    43: {'CO_NAME': 'Summit'    },
    45: {'CO_NAME': 'Tooele'    },
    47: {'CO_NAME': 'Uintah'    },
    49: {'CO_NAME': 'Utah'      },
    51: {'CO_NAME': 'Wasatch'   },
    53: {'CO_NAME': 'Washington'},
    55: {'CO_NAME': 'Wayne'     },
    57: {'CO_NAME': 'Weber'     }
}


# Create JSONs

In [6]:
# convert csvs to jsons
lstSegidFiles = ['model-forecasts']
lstNonSegidFiles = ['aadt-sources', 'projection-groups']

for file in lstSegidFiles + lstNonSegidFiles:
    # Read CSV file
    pd.read_csv('intermediate/' + file + '.csv').to_json('_site-art-of-forecasting/data/' + file + '.json', orient='records')

In [7]:
dfSegSub =  pd.read_csv('intermediate/model-forecasts.csv',usecols=['SEGID','SUBAREAID']).drop_duplicates()
display(dfSegSub)

Unnamed: 0,SEGID,SUBAREAID
0,0013_000.0,
6,0013_000.6,
12,0013_001.3,
18,0013_001.6,
24,0013_001.8,
...,...,...
55767,MAG_6851,1.0
55773,MAG_6852,1.0
55779,MAG_6853,1.0
55785,WFRC_8119,1.0


In [8]:
# AADT 
dfAADT = pd.read_csv('intermediate/aadt.csv')
dfAADTWithSub = pd.merge(dfAADT, dfSegSub, on="SEGID")
display(dfAADTWithSub)

# Linear Forecast
dfLF =  pd.read_csv('intermediate/linear-forecasts.csv')
dfLFWithSub = pd.merge(dfLF, dfSegSub, on="SEGID")
display(dfLFWithSub)


Unnamed: 0,SOURCE,SEGID,YEAR,AADT,PLANAREA,F_AREA,SUBAREAID
0,Segments_State_20231221_Draft.shp,0013_000.0,2019,19514,WFRC,WFRC,
1,WF_Segments_20240326_Draft.shp,0013_000.0,1983,11780,WFRC,WFRC,
2,WF_Segments_20240326_Draft.shp,0013_000.0,1984,11910,WFRC,WFRC,
3,WF_Segments_20240326_Draft.shp,0013_000.0,1985,12035,WFRC,WFRC,
4,WF_Segments_20240326_Draft.shp,0013_000.0,1986,12075,WFRC,WFRC,
...,...,...,...,...,...,...,...
317337,AADTHistory_2023.xlsx,3483_000.0,2020,1041,UDOT,UDOT,0.0
317338,AADTHistory_2023.xlsx,3483_000.0,2021,1116,UDOT,UDOT,0.0
317339,AADTHistory_2023.xlsx,3483_000.0,2022,1104,UDOT,UDOT,0.0
317340,AADTHistory_2023.xlsx,3483_000.0,2023,1151,UDOT,UDOT,0.0


Unnamed: 0,SEGID,PLANAREA,F_AREA,SOURCE,PROJGRP,YEAR,linForecast,SUBAREAID
0,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,1981,457,0.0
1,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2023,373,0.0
2,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2028,363,0.0
3,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2032,355,0.0
4,0006_000.0,UDOT,UDOT,AADTHistory_2023.xlsx,Since 1981,2042,335,0.0
...,...,...,...,...,...,...,...,...
343579,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2023,27464,
343580,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2028,29782,
343581,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2032,31636,
343582,WFRC_8467,WFRC,WFRC,WFv901_Segments_20240226_Draft.shp,Since 2001 w/o 2020,2042,36272,


In [9]:
dfSegSub

Unnamed: 0,SEGID,SUBAREAID
0,0013_000.0,
6,0013_000.6,
12,0013_001.3,
18,0013_001.6,
24,0013_001.8,
...,...,...
55767,MAG_6851,1.0
55773,MAG_6852,1.0
55779,MAG_6853,1.0
55785,WFRC_8119,1.0


In [10]:
dfSegSub[dfSegSub['SEGID']=='0006_222.1']

Unnamed: 0,SEGID,SUBAREAID
36021,0006_222.1,0.0


In [11]:
# filter by on used
dfAADTWithSubObsVolSource = pd.merge(dfAADTWithSub,dfObsVolSource, on=("SOURCE","SUBAREAID"))
dfAADTWithSubObsVolSource.drop(columns=['SOURCE','SUBAREAID'],inplace=True)
dfAADTWithSubObsVolSource.to_json('_site-art-of-forecasting/data/aadt.json', orient='records')
display(dfAADTWithSubObsVolSource)

# filter by on used
dfLFWithSubObsVolSource = pd.merge(dfLFWithSub,dfObsVolSource, on=("SOURCE","SUBAREAID"))
dfLFWithSubObsVolSource.drop(columns=['SOURCE','SUBAREAID'],inplace=True)
dfLFWithSubObsVolSource.to_json('_site-art-of-forecasting/data/linear-forecasts.json', orient='records')
display(dfLFWithSubObsVolSource)

Unnamed: 0,SEGID,YEAR,AADT,PLANAREA,F_AREA
0,0131_000.0,2016,1000,WFRC,
1,0131_000.0,2017,1000,WFRC,
2,0131_000.0,2018,1013,WFRC,
3,0131_000.0,2019,1023,WFRC,
4,0131_000.0,2020,890,WFRC,
...,...,...,...,...,...
107266,3483_000.0,2019,1042,UDOT,UDOT
107267,3483_000.0,2020,1041,UDOT,UDOT
107268,3483_000.0,2021,1116,UDOT,UDOT
107269,3483_000.0,2022,1104,UDOT,UDOT


Unnamed: 0,SEGID,PLANAREA,F_AREA,PROJGRP,YEAR,linForecast
0,0006_000.0,UDOT,UDOT,Since 1981,1981,457
1,0006_000.0,UDOT,UDOT,Since 1981,2023,373
2,0006_000.0,UDOT,UDOT,Since 1981,2028,363
3,0006_000.0,UDOT,UDOT,Since 1981,2032,355
4,0006_000.0,UDOT,UDOT,Since 1981,2042,335
...,...,...,...,...,...,...
68215,1271_000.5,Cache,Cache,Since 2001 w/o 2020,2023,2063
68216,1271_000.5,Cache,Cache,Since 2001 w/o 2020,2028,2129
68217,1271_000.5,Cache,Cache,Since 2001 w/o 2020,2032,2182
68218,1271_000.5,Cache,Cache,Since 2001 w/o 2020,2042,2315


In [12]:
dfAADTWithSubObsVolSource[dfAADTWithSubObsVolSource['SEGID']=='1695_000.0']

Unnamed: 0,SEGID,YEAR,AADT,PLANAREA,F_AREA
98419,1695_000.0,2014,200,UDOT,UDOT
98420,1695_000.0,2015,210,UDOT,UDOT
98421,1695_000.0,2016,220,UDOT,UDOT
98422,1695_000.0,2017,4009,UDOT,UDOT
98423,1695_000.0,2018,4049,UDOT,UDOT
98424,1695_000.0,2019,4122,UDOT,UDOT
98425,1695_000.0,2020,3681,UDOT,UDOT
98426,1695_000.0,2021,3990,UDOT,UDOT
98427,1695_000.0,2022,4030,UDOT,UDOT
98428,1695_000.0,2023,4086,UDOT,UDOT


# Create Segment Feature Class

In [13]:
dfSegmentSource

Unnamed: 0,SOURCE,SUBAREAID
0,Segments_State_20231221_Draft.shp,0
0,Segments_State_20231221_Draft.shp,2
0,Segments_State_20231221_Draft.shp,3
0,Segments_State_20231221_Draft.shp,4
0,Segments_State_20231221_Draft.shp,5
1,WF_Segments_20240326_Draft.shp,1


In [14]:
sdfSegmentsCombined = pd.DataFrame()

for source in dfSegmentSource['SOURCE'].drop_duplicates():
    print(source)
    _df =  pd.DataFrame.spatial.from_featureclass('data/segments/' + source)

    _subareasFilter = dfSegmentSource[dfSegmentSource['SOURCE']==source]['SUBAREAID'].drop_duplicates()

    # filter out only segments with SUBAREAID that matches source
    _df_filtered = _df[_df['SUBAREAID'].isin(_subareasFilter)]

    if 'F_AREA' not in _df_filtered.columns:
        # If 'F_AREA' doesn't exist, create it and set its value equal to 'PLAN_AREA'
        _df_filtered['F_AREA'] = _df_filtered['PLANAREA']

    _df_filtered = _df_filtered[seg_cols]

    sdfSegmentsCombined = pd.concat([_df_filtered,sdfSegmentsCombined])

sdfSegmentsCombined


Segments_State_20231221_Draft.shp
WF_Segments_20240326_Draft.shp


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _df_filtered['F_AREA'] = _df_filtered['PLANAREA']


Unnamed: 0,SEGID,CO_FIPS,PLANAREA,SUBAREAID,F_AREA,SHAPE
0,0006_141.0,49.0,MAG,1.0,MAG,"{""paths"": [[[405824.11000000034, 4423860.33], ..."
1,0006_146.9,49.0,MAG,1.0,MAG,"{""paths"": [[[413442.55030000024, 4422753.7282]..."
2,0006_149.9,49.0,MAG,1.0,MAG,"{""paths"": [[[418330.7999999998, 4422866], [418..."
3,0006_150.6,49.0,MAG,1.0,MAG,"{""paths"": [[[419421.8803000003, 4422872.2963],..."
4,0006_152.6,49.0,MAG,1.0,MAG,"{""paths"": [[[422596.89969999995, 4422889.2949]..."
...,...,...,...,...,...,...
8427,UDOT_7090,45.0,UDOT,0.0,UDOT,"{""paths"": [[[388880.12200000044, 4497794.5009]..."
8428,UDOT_7091,45.0,UDOT,0.0,UDOT,"{""paths"": [[[388884.62009999994, 4491344.6401]..."
8429,UDOT_7092,45.0,UDOT,0.0,UDOT,"{""paths"": [[[392102.12590000033, 4494203.1196]..."
8430,UDOT_7093,45.0,UDOT,0.0,UDOT,"{""paths"": [[[394365.53309999965, 4504536.84039..."


In [15]:
duplicateSegsInTdms = pd.read_csv('intermediate/duplicateSegsInTdms.csv')
duplicateSegsInTdms

Unnamed: 0,SEGID,removeSUBAREAID
0,0015_042.2,3
1,0065_002.4,0
2,0150_029.4,0
3,0150_030.0,0
4,0150_033.3,0
5,1822_000.0,0
6,2863_000.0,4
7,2865_019.4,4


In [16]:
# Merge the dataframes on 'SEGID' and 'SUBAREAID'/'removeSUBAREAID', indicating records to remove
merged_df = pd.merge(sdfSegmentsCombined, duplicateSegsInTdms, left_on=['SEGID', 'SUBAREAID'], right_on=['SEGID', 'removeSUBAREAID'], how='left', indicator=True)

# Filter out the records that have a match
filtered_df = merged_df[merged_df['_merge'] == 'left_only']

# Drop the columns added from df1 and the merge indicator to get back to the original dfMdlVolWithFac format
final_df = filtered_df.drop(columns=['removeSUBAREAID', '_merge'])

# final_df will be your dfMdlVolWithFac dataframe with the matched records removed
sdfSegments = final_df
sdfSegments

Unnamed: 0,SEGID,CO_FIPS,PLANAREA,SUBAREAID,F_AREA,SHAPE
0,0006_141.0,49.0,MAG,1.0,MAG,"{'paths': [[[405824.11000000034, 4423860.33], ..."
1,0006_146.9,49.0,MAG,1.0,MAG,"{'paths': [[[413442.55030000024, 4422753.7282]..."
2,0006_149.9,49.0,MAG,1.0,MAG,"{'paths': [[[418330.7999999998, 4422866], [418..."
3,0006_150.6,49.0,MAG,1.0,MAG,"{'paths': [[[419421.8803000003, 4422872.2963],..."
4,0006_152.6,49.0,MAG,1.0,MAG,"{'paths': [[[422596.89969999995, 4422889.2949]..."
...,...,...,...,...,...,...
9640,UDOT_7090,45.0,UDOT,0.0,UDOT,"{'paths': [[[388880.12200000044, 4497794.5009]..."
9641,UDOT_7091,45.0,UDOT,0.0,UDOT,"{'paths': [[[388884.62009999994, 4491344.6401]..."
9642,UDOT_7092,45.0,UDOT,0.0,UDOT,"{'paths': [[[392102.12590000033, 4494203.1196]..."
9643,UDOT_7093,45.0,UDOT,0.0,UDOT,"{'paths': [[[394365.53309999965, 4504536.84039..."


In [17]:
dfDuplicates = sdfSegments[sdfSegments.duplicated(subset='SEGID', keep=False)]
dfDuplicates

Unnamed: 0,SEGID,CO_FIPS,PLANAREA,SUBAREAID,F_AREA,SHAPE


# Get Latest Download with Adjustments to be applied to Segments

In [18]:
if bRedownloadLatest:

    # Define the URL
    url = "https://services1.arcgis.com/taguadKoI1XFwivx/arcgis/rest/services/Forecasts_gdb/FeatureServer/0/query"

    # Base parameters for the request
    params = {
        "f": "json",
        "where": "1=1",
        "outFields": "*",
        "returnGeometry": "false",  # Assuming you only want attributes
    }

    all_features = []

    # Pagination parameters
    batch_size = 1000
    offset = 0

    while True:
        # Adjust parameters for pagination
        params['resultOffset'] = offset
        params['resultRecordCount'] = batch_size

        # Send the request
        response = requests.get(url, params=params)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()
            features = data.get('features', [])
            
            # Check if no features were returned, which means we're done
            if not features:
                break
            
            # Append features to the master list
            all_features.extend(features)
            
            # Increase the offset for next iteration
            offset += batch_size

        else:
            print("Error fetching data:", response.status_code)
            break

    # Extract attribute table
    attribute_table = [feature['attributes'] for feature in all_features]

    # Convert to DataFrame
    df = pd.DataFrame(attribute_table)

    from datetime import datetime

    # Generate a filename with the current timestamp in the specified folder
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f"backup\\forecastsegments_backup_{timestamp}.csv"

    # Save the DataFrame to a CSV file
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")
else:
    print("didn't download... will use latest")

Data saved to backup\forecastsegments_backup_20250219_160106.csv


In [19]:
# Define the URL
url = "https://services1.arcgis.com/taguadKoI1XFwivx/arcgis/rest/services/forecasts_logfile/FeatureServer/0/query"

# Base parameters for the request
params = {
    "f": "json",
    "where": "1=1",
    "outFields": "*",
    "returnGeometry": "false",  # If it's a stand-alone table, geometry isn't required.
    "outSR": "4326",  # Specify output spatial reference if needed
    "returnExceededLimitFeatures": "true"
}

all_features = []

# Pagination parameters
batch_size = 2000
offset = 0

while True:
    # Adjust parameters for pagination
    params['resultOffset'] = offset
    params['resultRecordCount'] = batch_size

    # Send the request
    response = requests.post(url, params=params)  # Use POST since some servers may have GET length limits

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        features = data.get('features', [])
        
        # Check if no features were returned, which means we're done
        if not features:
            break
        
        # Append features to the master list
        all_features.extend(features)
        
        # Increase the offset for next iteration
        offset += batch_size

    else:
        print("Error fetching data:", response.status_code)
        break

# Extract attribute table
attribute_table = [feature['attributes'] for feature in all_features]

# Convert to DataFrame
df = pd.DataFrame(attribute_table)

from datetime import datetime

# Generate a filename with the current timestamp in the specified folder
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"backup\\logfile_backup_{timestamp}.csv"

# Save the DataFrame to a CSV file
df.to_csv(filename, index=False)
print(f"Data saved to {filename}")

Data saved to backup\logfile_backup_20250219_160117.csv


In [20]:
#import datetime
#
## empty log file for AGOL
#dfLogFile = pd.DataFrame([
#    ['dummy','dummy',0,0,0,0,0,'dummy',datetime.datetime.now()]
#],columns=('SEGID','EDITKEY','ADJ2023','ADJ2028','ADJ2032','ADJ2042','ADJ2050','NOTES','TIMESTAMP'))
#
#dfLogFile.to_csv('results/forecasts-logfile.csv')
#
#dfLogFile

In [21]:
# Get data from last backup

import os

# List all backup files in the directory
backup_dir = "backup\\"
all_files = [f for f in os.listdir(backup_dir) if f.startswith('forecastsegments_backup_') and f.endswith('.csv')]

# Sort the files based on their timestamps
sorted_files = sorted(all_files, reverse=True)  # Latest timestamp will be first

# Check if there are any backup files
if sorted_files:
    latest_file = os.path.join(backup_dir, sorted_files[0])
    # Load the most recent backup into a dataframe
    dfLatest = pd.read_csv(latest_file,low_memory=False)

    # when duplicate errors we need to get rid of extra data
    dfLatest = dfLatest.groupby('SEGID').first().reset_index()

    print(f"Loaded {latest_file} into a DataFrame")

    if bClearFlags:
        # Specify columns to delete
        columns_to_delete = [col for col in dfLatest.columns if col.startswith('FL_')]

        # Remove the specified columns from df_filtered
        dfLatest.drop(columns_to_delete, axis=1, inplace=True)
    
        print(f"Dropped FL_ columns")

    if bClearOverrides:
        # Specify columns to delete
        columns_to_delete = [col for col in dfLatest.columns if col.startswith('OV_')]

        # Remove the specified columns from df_filtered
        dfLatest.drop(columns_to_delete, axis=1, inplace=True)
    
        print(f"Dropped OV_ columns")
else:
    dfLatest = pd.DataFrame()
    print("No backup files found in the directory.")

# You can then use dfLatest as your DataFrame


Loaded backup\forecastsegments_backup_20250219_160106.csv into a DataFrame
Dropped FL_ columns


In [22]:
display(sdfSegments)

Unnamed: 0,SEGID,CO_FIPS,PLANAREA,SUBAREAID,F_AREA,SHAPE
0,0006_141.0,49.0,MAG,1.0,MAG,"{'paths': [[[405824.11000000034, 4423860.33], ..."
1,0006_146.9,49.0,MAG,1.0,MAG,"{'paths': [[[413442.55030000024, 4422753.7282]..."
2,0006_149.9,49.0,MAG,1.0,MAG,"{'paths': [[[418330.7999999998, 4422866], [418..."
3,0006_150.6,49.0,MAG,1.0,MAG,"{'paths': [[[419421.8803000003, 4422872.2963],..."
4,0006_152.6,49.0,MAG,1.0,MAG,"{'paths': [[[422596.89969999995, 4422889.2949]..."
...,...,...,...,...,...,...
9640,UDOT_7090,45.0,UDOT,0.0,UDOT,"{'paths': [[[388880.12200000044, 4497794.5009]..."
9641,UDOT_7091,45.0,UDOT,0.0,UDOT,"{'paths': [[[388884.62009999994, 4491344.6401]..."
9642,UDOT_7092,45.0,UDOT,0.0,UDOT,"{'paths': [[[392102.12590000033, 4494203.1196]..."
9643,UDOT_7093,45.0,UDOT,0.0,UDOT,"{'paths': [[[394365.53309999965, 4504536.84039..."


In [23]:
dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')

dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'DYVOL' + dfModVolAdj['YEAR'].str.replace(".0","", regex=False)

dfModVolAdj_pvDYVOL = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFac'], columns='YEAR', values='DY_VOL')
dfModVolAdj_pvDYVOL.reset_index(inplace=True)
dfModVolAdj_pvDYVOL

display(dfModVolAdj_pvDYVOL)


YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,DYVOL2032,DYVOL2042,DYVOL2050
0,0006_000.0,0.0,0.984000,1.027600,1.03160,1.02960,286.0,126.5,99.7,101.7,103.8,108.1,111.9
1,0006_000.7,0.0,0.984000,1.027600,1.03160,1.02960,195.0,216.6,220.5,218.6,218.1,219.3,219.2
2,0006_016.0,0.0,0.984000,1.027600,1.03160,1.02960,334.0,79.9,77.7,72.7,70.5,69.5,65.1
3,0006_046.0,0.0,0.984000,1.027600,1.03160,1.02960,291.0,79.9,77.7,72.7,70.5,69.5,65.1
4,0006_060.2,0.0,0.984000,1.027600,1.03160,1.02960,287.0,83.6,81.1,76.1,73.9,73.1,68.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6412,UDOT_7094,0.0,0.984000,1.027600,1.03160,1.02960,0.0,1.0,1.0,1.0,1.0,1.0,1.0
6413,WFRC_8119,1.0,1.091800,1.010700,1.03240,1.02155,0.0,1.0,4391.0,6917.4,7390.1,11818.8,12044.4
6414,WFRC_8474,1.0,1.062112,1.001589,1.01381,1.00770,0.0,1.0,1.0,1.0,1.0,1.0,1.0
6415,WFRC_8475,1.0,1.062112,1.001589,1.01381,1.00770,0.0,1.0,1.0,1.0,1.0,1.0,1.0


In [24]:
# read in intermediate data
dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')
dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'M' + dfModVolAdj['YEAR'].str.replace(".0","", regex=False)

dfModVolAdj_pvModAadt = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFac'], columns='YEAR', values='modAadt')
dfModVolAdj_pvModAadt.reset_index(inplace=True)

dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')

dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'MF' + dfModVolAdj['YEAR'].str.replace(".0","", regex=False)

dfModVolAdj_pvModAadtAdj = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFac'], columns='YEAR', values='modForecast')
dfModVolAdj_pvModAadtAdj.reset_index(inplace=True)

dfModVolAdj_pv1 = pd.DataFrame.merge(dfModVolAdj_pvModAadt, dfModVolAdj_pvModAadtAdj, on=('SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFac'))


dfModVolAdj = pd.DataFrame.merge(dfModVolAdj_pvDYVOL, dfModVolAdj_pv1, on=('SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFac'))
display(dfModVolAdj)

#dfModVolAdj.columns = [str(col).split('.')[0] if '.' in str(col) else str(col) for col in dfModVolAdj.columns]


YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,...,M2028,M2032,M2042,M2050,MF2019,MF2023,MF2028,MF2032,MF2042,MF2050
0,0006_000.0,0.0,0.984000,1.027600,1.03160,1.02960,286.0,126.5,99.7,101.7,...,103.0,105.0,110.0,114.0,400.0,400.0,400.0,400.0,400.0,400.0
1,0006_000.7,0.0,0.984000,1.027600,1.03160,1.02960,195.0,216.6,220.5,218.6,...,222.0,222.0,223.0,223.0,400.0,400.0,400.0,400.0,400.0,400.0
2,0006_016.0,0.0,0.984000,1.027600,1.03160,1.02960,334.0,79.9,77.7,72.7,...,74.0,72.0,71.0,66.0,400.0,400.0,400.0,400.0,400.0,400.0
3,0006_046.0,0.0,0.984000,1.027600,1.03160,1.02960,291.0,79.9,77.7,72.7,...,74.0,72.0,71.0,66.0,350.0,350.0,350.0,350.0,350.0,350.0
4,0006_060.2,0.0,0.984000,1.027600,1.03160,1.02960,287.0,83.6,81.1,76.1,...,77.0,75.0,74.0,70.0,350.0,350.0,350.0,350.0,350.0,350.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6412,UDOT_7094,0.0,0.984000,1.027600,1.03160,1.02960,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
6413,WFRC_8119,1.0,1.091800,1.010700,1.03240,1.02155,0.0,1.0,4391.0,6917.4,...,6336.0,6769.0,10825.0,11032.0,0.0,4000.0,6300.0,6800.0,11000.0,11000.0
6414,WFRC_8474,1.0,1.062112,1.001589,1.01381,1.00770,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
6415,WFRC_8475,1.0,1.062112,1.001589,1.01381,1.00770,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
dfModVolAdj[dfModVolAdj['SUBAREAID']==1]

YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,...,M2028,M2032,M2042,M2050,MF2019,MF2023,MF2028,MF2032,MF2042,MF2050
22,0006_141.0,1.0,0.984000,1.027600,1.03160,1.02960,-48.0,1540.0,1660.0,1717.0,...,1745.0,1893.0,2140.0,2366.0,1500.0,1600.0,1700.0,1800.0,2100.0,2300.0
23,0006_146.9,1.0,0.984000,1.027600,1.03160,1.02960,-54.0,1546.0,1668.0,1727.0,...,1755.0,1899.0,2141.0,2368.0,1500.0,1600.0,1700.0,1800.0,2100.0,2300.0
24,0006_149.9,1.0,0.984000,1.027600,1.03160,1.02960,843.0,1572.9,1694.9,2078.6,...,2112.0,2131.0,2670.0,5242.0,2400.0,2600.0,3000.0,3000.0,3500.0,6100.0
25,0006_150.6,1.0,0.984000,1.027600,1.03160,1.02960,653.0,1759.8,1889.0,2280.5,...,2318.0,2339.0,2916.0,5485.0,2400.0,2600.0,3000.0,3000.0,3600.0,6100.0
26,0006_152.6,1.0,0.984000,1.027600,1.03160,1.02960,-133.0,2509.3,2621.6,3018.6,...,3068.0,3137.0,3761.0,6110.0,2400.0,2500.0,2900.0,3000.0,3600.0,6000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6324,MAG_6853,1.0,1.156800,1.011200,1.01050,1.01085,0.0,1.0,1.0,71973.8,...,62218.0,89460.0,97478.0,157093.0,0.0,0.0,62000.0,89500.0,97500.0,157000.0
6413,WFRC_8119,1.0,1.091800,1.010700,1.03240,1.02155,0.0,1.0,4391.0,6917.4,...,6336.0,6769.0,10825.0,11032.0,0.0,4000.0,6300.0,6800.0,11000.0,11000.0
6414,WFRC_8474,1.0,1.062112,1.001589,1.01381,1.00770,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
6415,WFRC_8475,1.0,1.062112,1.001589,1.01381,1.00770,0.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
dfModVolAdj[dfModVolAdj['SEGID']=='MAG_6501']

YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,...,M2028,M2032,M2042,M2050,MF2019,MF2023,MF2028,MF2032,MF2042,MF2050
6008,MAG_6501,1.0,0.984,1.0276,1.0316,1.0296,0.0,4.0,4.0,226.0,...,230.0,273.0,294.0,288.0,0.0,0.0,250.0,250.0,300.0,300.0


In [27]:
# join segment data and forecast data
_sdf = pd.DataFrame.merge(dfModVolAdj, sdfSegments, on=('SEGID','SUBAREAID'), how='right')

# add columns for Adjustments
_sdf['MF2019'].fillna(0,inplace=True)
_sdf['MF2023'].fillna(0,inplace=True)
_sdf['MF2028'].fillna(0,inplace=True)
_sdf['MF2032'].fillna(0,inplace=True)
_sdf['MF2042'].fillna(0,inplace=True)
_sdf['MF2050'].fillna(0,inplace=True)

_sdf['ADJ2019'   ] = 0
_sdf['ADJ2023'   ] = 0
_sdf['ADJ2028'   ] = 0
_sdf['ADJ2032'   ] = 0
_sdf['ADJ2042'   ] = 0
_sdf['ADJ2050'   ] = 0
_sdf['ADJHIST'   ] = 0
_sdf['NOTES'     ] = ""
_sdf['NOTES_FURR'] = ""
_sdf['NOTES_SEG' ] = ""


# replace with latest file
if dfLatest.empty:
    print("The latest dataframe is empty.")
else:
    print("The latest dataframe is not empty.")
        
    # Specify columns to include from latest download (FLAGS WILL NOT BE INCLUDED WHEN FILTERED PRIOR TO THIS STEP)
    columns_to_include = [col for col in dfLatest.columns if col.startswith('ADJ') or col.startswith('FL_') or col.startswith('OV_') or col == 'SEGID' or col == 'NOTES' or col == 'NOTES_FURR' or col == 'NOTES_SEG']

    # Filter df_filtered to only have the specified columns
    dfLatest_filtered = dfLatest[columns_to_include]

    # Merge sdfData with df_filtered on SEGID
    _sdfWithLatest = _sdf.merge(dfLatest_filtered, on='SEGID', how='left', suffixes=('_delete', ''))

    # Drop any duplicate columns (those with '_delete' suffix) after the merge 
    for column in _sdfWithLatest.columns:
        if column.endswith('_delete'):
            _sdfWithLatest.drop(column, axis=1, inplace=True)

    _sdfWithLatest['NOTES'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES'] = _sdfWithLatest['NOTES'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES']=='0'), 'NOTES'] = ''
    _sdfWithLatest['NOTES'] = _sdfWithLatest['NOTES'].str.strip()

    _sdfWithLatest['NOTES_FURR'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES_FURR'] = _sdfWithLatest['NOTES_FURR'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES_FURR']=='0'), 'NOTES_FURR'] = ''
    _sdfWithLatest['NOTES_FURR'] = _sdfWithLatest['NOTES_FURR'].str.strip()

    _sdfWithLatest['NOTES_SEG'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES_SEG'] = _sdfWithLatest['NOTES_SEG'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES_SEG']=='0'), 'NOTES_SEG'] = ''
    _sdfWithLatest['NOTES_SEG'] = _sdfWithLatest['NOTES_SEG'].str.strip()

# add flag columns
# Loop through the rows of 'dfFlags'
for index, row in dfFlags.iterrows():
    # Get the flag name from the 'flagName' column
    flag_name = row['flagName']

    # Get the flag criteria from the 'flagCriteria' column (if needed)
    flag_criteria = row['flagCriteria']

    # Add a new column to 'dfSegs' with the flag name and set it equal to evaluated criteria, prepare criteria string with replace
    expression = "_sdfWithLatest['" + flag_name + "'] = " + flag_criteria.replace("[", "_sdfWithLatest['").replace("]", "']")
    print(expression)

    # execute expression!
    exec(expression)
    #print(result)  # Output will be 11
    
    if bClearOverrides:
        display('flags overwritten')
        # add override column
        _sdfWithLatest[flag_name.replace('FL_','OV_')] = 0

_sdfWithLatest.fillna(0,inplace=True)

sdfSegmentsWData = _sdfWithLatest
sdfSegmentsWData

The latest dataframe is not empty.
_sdfWithLatest['FL_REV'] = (_sdfWithLatest['NOTES']=='') & (_sdfWithLatest['NOTES_FURR']=='')
_sdfWithLatest['FL_LTPRV'] = ((_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])<(_sdfWithLatest['M2019']+_sdfWithLatest['aadtAdjFac'])) | ((_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])<(_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])) | ((_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])<(_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])) | ((_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042'])<(_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])) | ((_sdfWithLatest['MF2050']+_sdfWithLatest['ADJ2050'])<(_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042']))
_sdfWithLatest['FL_ZERO'] = ((_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])<=0) | ((_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])<=0) | ((_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])<=0) | ((_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042'])<=0) | ((_sdfWithLatest['MF2050

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,...,OV_SEG,NOTES_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG
0,0006_141.0,1.0,0.9840,1.0276,1.0316,1.02960,-48.0,1540.0,1660.0,1717.0,...,0.0,,False,False,False,False,False,False,False,False
1,0006_146.9,1.0,0.9840,1.0276,1.0316,1.02960,-54.0,1546.0,1668.0,1727.0,...,0.0,,False,False,False,False,False,False,False,False
2,0006_149.9,1.0,0.9840,1.0276,1.0316,1.02960,843.0,1572.9,1694.9,2078.6,...,0.0,,False,False,False,False,False,False,False,False
3,0006_150.6,1.0,0.9840,1.0276,1.0316,1.02960,653.0,1759.8,1889.0,2280.5,...,0.0,,False,False,False,False,False,False,False,False
4,0006_152.6,1.0,0.9840,1.0276,1.0316,1.02960,-133.0,2509.3,2621.6,3018.6,...,0.0,,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9636,UDOT_7090,0.0,1.0918,1.0107,1.0324,1.02155,0.0,1.0,1.0,1.0,...,0.0,,True,True,True,False,True,False,False,False
9637,UDOT_7091,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,0.0,0.0,...,0.0,,True,True,True,False,False,True,False,False
9638,UDOT_7092,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,1.0,1.0,...,0.0,,True,True,True,False,True,False,False,False
9639,UDOT_7093,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,1.0,1.0,...,0.0,,False,True,True,False,True,False,False,False


In [28]:
_sdfWithLatest.columns

Index(['SEGID', 'SUBAREAID', 'FAC_WDAVG', 'FAC_SPR', 'FAC_FAL', 'FAC_SPRFAL',
       'aadtAdjFac', 'DYVOL2019', 'DYVOL2023', 'DYVOL2028', 'DYVOL2032',
       'DYVOL2042', 'DYVOL2050', 'M2019', 'M2023', 'M2028', 'M2032', 'M2042',
       'M2050', 'MF2019', 'MF2023', 'MF2028', 'MF2032', 'MF2042', 'MF2050',
       'CO_FIPS', 'PLANAREA', 'F_AREA', 'SHAPE', 'ADJHIST', 'ADJ2019',
       'ADJ2023', 'ADJ2028', 'ADJ2032', 'ADJ2042', 'ADJ2050', 'OV_REV',
       'OV_LTPRV', 'OV_ZERO', 'OV_HIADJ', 'OV_50LT19', 'OV_503X19',
       'OV_FURREV', 'NOTES', 'NOTES_FURR', 'OV_SEG', 'NOTES_SEG', 'FL_REV',
       'FL_LTPRV', 'FL_ZERO', 'FL_HIADJ', 'FL_50LT19', 'FL_503X19',
       'FL_FURREV', 'FL_SEG'],
      dtype='object')

In [29]:
sdfSegmentsWData[['SEGID','SUBAREAID','NOTES','NOTES_FURR','NOTES_SEG','FL_FURREV','OV_FURREV']]

Unnamed: 0,SEGID,SUBAREAID,NOTES,NOTES_FURR,NOTES_SEG,FL_FURREV,OV_FURREV
0,0006_141.0,1.0,none,,,False,0.0
1,0006_146.9,1.0,smoothing,,,False,0.0
2,0006_149.9,1.0,large increase in 2050 due to I-15 congestion,,,False,0.0
3,0006_150.6,1.0,large increase in 2050 due to I-15 congestion,,,False,0.0
4,0006_152.6,1.0,large increase in 2050 due to I-15 congestion,,,False,0.0
...,...,...,...,...,...,...,...
9636,UDOT_7090,0.0,,,,False,0.0
9637,UDOT_7091,0.0,,,,False,0.0
9638,UDOT_7092,0.0,,,,False,0.0
9639,UDOT_7093,0.0,not in plan.,,,False,0.0


In [30]:
sdfSegmentsWData[sdfSegmentsWData['NOTES_FURR']!=""]['NOTES_FURR']

25                                     Removed adjustment
34                             Updated to match UDOT area
35                             Updated to match UDOT area
370                 Fixed segment dip in corridor traffic
408                         Corrected counts, matched tdm
                              ...                        
9608    It looks like the model may be overestimating ...
9616                          DO NOT INCLUDE IN FINAL MAP
9617                          DO NOT INCLUDE IN FINAL MAP
9618                          DO NOT INCLUDE IN FINAL MAP
9619                          DO NOT INCLUDE IN FINAL MAP
Name: NOTES_FURR, Length: 187, dtype: object

In [31]:
# Add CO_NAME and ForecastArea columns to the DataFrame using multi-index mapping
sdfSegmentsWData['CO_NAME'] = sdfSegmentsWData.apply(lambda row: county_mapping.get(row['CO_FIPS'], {}).get('CO_NAME'), axis=1)
sdfSegmentsWData

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,...,NOTES_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG,CO_NAME
0,0006_141.0,1.0,0.9840,1.0276,1.0316,1.02960,-48.0,1540.0,1660.0,1717.0,...,,False,False,False,False,False,False,False,False,Utah
1,0006_146.9,1.0,0.9840,1.0276,1.0316,1.02960,-54.0,1546.0,1668.0,1727.0,...,,False,False,False,False,False,False,False,False,Utah
2,0006_149.9,1.0,0.9840,1.0276,1.0316,1.02960,843.0,1572.9,1694.9,2078.6,...,,False,False,False,False,False,False,False,False,Utah
3,0006_150.6,1.0,0.9840,1.0276,1.0316,1.02960,653.0,1759.8,1889.0,2280.5,...,,False,False,False,False,False,False,False,False,Utah
4,0006_152.6,1.0,0.9840,1.0276,1.0316,1.02960,-133.0,2509.3,2621.6,3018.6,...,,False,False,False,False,False,False,False,False,Utah
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9636,UDOT_7090,0.0,1.0918,1.0107,1.0324,1.02155,0.0,1.0,1.0,1.0,...,,True,True,True,False,True,False,False,False,Tooele
9637,UDOT_7091,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,0.0,0.0,...,,True,True,True,False,False,True,False,False,Tooele
9638,UDOT_7092,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,1.0,1.0,...,,True,True,True,False,True,False,False,False,Tooele
9639,UDOT_7093,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,1.0,1.0,...,,False,True,True,False,True,False,False,False,Tooele


In [32]:
sdfSegmentsWData[sdfSegmentsWData['SEGID']=='0056_060.6']

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,...,NOTES_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG,CO_NAME
6070,0056_060.6,5.0,1.0924,1.0104,1.0243,1.01735,-1922.0,20577.0,31000.5,31388.9,...,,False,False,False,False,False,False,False,False,Iron


In [33]:
from arcgis.features import FeatureLayer
from arcgis.geometry import filters
import pandas as pd

# URL to the ArcGIS Feature Service
url = "https://services1.arcgis.com/taguadKoI1XFwivx/arcgis/rest/services/Forecasts_gdb/FeatureServer/0"

# Create a FeatureLayer object
feature_layer = FeatureLayer(url)

# Query for all features in the layer (adjust the query as needed)
# Note: You might need to paginate requests for very large datasets
query_result = feature_layer.query(where="1=1", out_fields="*", return_geometry=True)

# Convert the query result to a Spatially Enabled DataFrame
sdf = query_result.sdf

# Filter the SDF based on 'F_AREA' values found in 'lstLockForecastArea'
filtered_sdf = sdf[sdf['F_AREA'].isin(lstLockForecastArea)]

# Assuming filtered_sdf is a valid Spatially Enabled DataFrame and ready for projection
# Attempt to project the SDF to UTM Zone 12N, NAD83
result = filtered_sdf.spatial.project(26912)

# Check if the projection operation was acknowledged with a True result
if result:
    # If the operation was successful, the spatial reference of filtered_sdf should now be updated.
    # You can work directly with filtered_sdf for further operations or analysis.
    projected_sdf = filtered_sdf
else:
    # If the result wasn't True or as expected, further investigation might be needed
    print("Projection operation did not succeed as expected.")

# Display the filtered Spatially Enabled DataFrame
display(projected_sdf)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._data[self.name] = vals


Unnamed: 0,OBJECTID,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,...,FL_FURREV,FL_SEG,CO_NAME,Shape__Length,Shape__Length_2,SEGTYPE,Shape__Length_3,Shape__Length_4,DONOTUSE,SHAPE
0,1,0013_000.0,1.0,1.0924,1.0104,1.0243,1.01735,6929.0,13747.6,14506.3,...,0,0,Box Elder,1403.620879,1403.621145,SR,1403.621408,1403.621672,0,"{""paths"": [[[415153.6708325151, 4593273.258784..."
1,2,0013_000.6,1.0,1.0946,1.0117,1.0095,1.01060,6300.0,14463.6,15048.1,...,0,0,Box Elder,1498.511493,1498.511764,SR,1498.512034,1498.512308,0,"{""paths"": [[[415188.8610227527, 4594320.899147..."
2,3,0013_001.3,1.0,1.0946,1.0117,1.0095,1.01060,3284.0,13413.7,14074.5,...,0,0,Box Elder,643.117860,643.117919,SR,643.117982,643.118040,0,"{""paths"": [[[415219.87107087666, 4595439.81957..."
3,4,0013_001.6,1.0,1.0946,1.0117,1.0095,1.01060,3228.0,14630.6,15022.6,...,0,0,Box Elder,365.812729,365.812896,SR,365.813064,365.813231,0,"{""paths"": [[[415233.3711693141, 4595919.959646..."
4,5,0013_001.8,1.0,1.0924,1.0104,1.0243,1.01735,5061.0,12599.0,12882.0,...,0,0,Box Elder,1483.506029,1483.506299,SR,1483.506568,1483.506837,0,"{""paths"": [[[415241.2712074834, 4596193.059958..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3238,3239,WFRC_8469,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,...,0,0,Salt Lake,1206.458573,1206.458783,PL,1206.459183,1206.459472,0,"{""paths"": [[[407209.4300318573, 4504033.708336..."
3239,3240,WFRC_8470,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,...,0,0,Salt Lake,1047.665090,1047.665367,PL,1047.665559,1047.665650,0,"{""paths"": [[[407719.25599099946, 4503425.47192..."
3240,3241,WFRC_8471,1.0,1.0924,1.0104,1.0243,1.01735,0.0,0.0,0.0,...,0,0,Salt Lake,858.554874,858.554952,PL,858.555081,858.555182,0,"{""paths"": [[[415730.6518810922, 4481611.312996..."
3241,3242,WFRC_8472,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,...,0,0,Salt Lake,536.957257,536.957297,PL,536.957348,536.957395,0,"{""paths"": [[[424304.5617075424, 4489343.910294..."


In [34]:
# merge locked with unlocked

if len(lstLockForecastArea):

    print('Combine locked and unlocked')

    sdfSegmentsWData_Locked = filtered_sdf[filtered_sdf['F_AREA'].isin(lstLockForecastArea)].copy()

    sdfSegmentsWData_Unlocked = sdfSegmentsWData[~sdfSegmentsWData['F_AREA'].isin(lstLockForecastArea)].copy()

    # Concatenate the locked and unlocked DataFrames
    sdfSegmentsWDataMerged = pd.concat([sdfSegmentsWData_Locked, sdfSegmentsWData_Unlocked])

    # Reset the index of the final DataFrame, if needed
    sdfSegmentsWDataMerged.reset_index(drop=True, inplace=True)

    sdfSegmentsWDataMerged.drop(columns=('OBJECTID'),inplace=True)

else:
    sdfSegmentsWDataMerged = sdfSegmentsWData

sdfSegmentsWDataMerged

Combine locked and unlocked


Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,...,FL_FURREV,FL_SEG,CO_NAME,Shape__Length,Shape__Length_2,SEGTYPE,Shape__Length_3,Shape__Length_4,DONOTUSE,SHAPE
0,0013_000.0,1.0,1.0924,1.0104,1.0243,1.01735,6929.0,13747.6,14506.3,15215.0,...,0,0,Box Elder,1403.620879,1403.621145,SR,1403.621408,1403.621672,0.0,"{'paths': [[[415153.6708325151, 4593273.258784..."
1,0013_000.6,1.0,1.0946,1.0117,1.0095,1.01060,6300.0,14463.6,15048.1,16066.0,...,0,0,Box Elder,1498.511493,1498.511764,SR,1498.512034,1498.512308,0.0,"{'paths': [[[415188.8610227527, 4594320.899147..."
2,0013_001.3,1.0,1.0946,1.0117,1.0095,1.01060,3284.0,13413.7,14074.5,15212.8,...,0,0,Box Elder,643.117860,643.117919,SR,643.117982,643.118040,0.0,"{'paths': [[[415219.87107087666, 4595439.81957..."
3,0013_001.6,1.0,1.0946,1.0117,1.0095,1.01060,3228.0,14630.6,15022.6,15944.8,...,0,0,Box Elder,365.812729,365.812896,SR,365.813064,365.813231,0.0,"{'paths': [[[415233.3711693141, 4595919.959646..."
4,0013_001.8,1.0,1.0924,1.0104,1.0243,1.01735,5061.0,12599.0,12882.0,13578.7,...,0,0,Box Elder,1483.506029,1483.506299,SR,1483.506568,1483.506837,0.0,"{'paths': [[[415241.2712074834, 4596193.059958..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9631,UDOT_7090,0.0,1.0918,1.0107,1.0324,1.02155,0.0,1.0,1.0,1.0,...,False,False,Tooele,,,,,,,"{'paths': [[[388880.12200000044, 4497794.5009]..."
9632,UDOT_7091,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,0.0,0.0,...,False,False,Tooele,,,,,,,"{'paths': [[[388884.62009999994, 4491344.6401]..."
9633,UDOT_7092,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,1.0,1.0,...,False,False,Tooele,,,,,,,"{'paths': [[[392102.12590000033, 4494203.1196]..."
9634,UDOT_7093,0.0,0.9840,1.0276,1.0316,1.02960,0.0,1.0,1.0,1.0,...,False,False,Tooele,,,,,,,"{'paths': [[[394365.53309999965, 4504536.84039..."


In [35]:
# export
#if bShapefileExport: #sdfSegmentsWDataMerged.spatial.to_featureclass('results/ForecastSegments/ForecastSegments.shp',sanitize_columns=False)
from arcgis.features import GeoAccessor

if bExportFeatureClass:
    # Define the geodatabase path
    gdb_path        = r"results\Forecasts.gdb"
    gdb_path_backup = r"backup\Forecasts_backup.gdb"

    # Check if the geodatabase exists, if not, create it
    if not arcpy.Exists(gdb_path):
        arcpy.CreateFileGDB_management(r"results", "Forecasts.gdb")
        
    # Check if the geodatabase exists, if not, create it
    if not arcpy.Exists(gdb_path_backup):
        arcpy.CreateFileGDB_management(r"backup", "Forecasts_backup.gdb")

    # Export SDF to the geodatabase as a feature class
    feature_class_name = "ForecastSegments"

    # Generate a filename with the current timestamp in the specified folder
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    feature_class_name_with_timestamp = f"ForecastSegments_{timestamp}"

    # file to replace
    sdfSegmentsWDataMerged.spatial.to_featureclass(location=f"{gdb_path_backup}\\{feature_class_name_with_timestamp}", sanitize_columns=False)
    print(f"Feature class {feature_class_name_with_timestamp} created in {gdb_path_backup}")

    # backup file
    sdfSegmentsWDataMerged.spatial.to_featureclass(location=f"{gdb_path}\\{feature_class_name}", sanitize_columns=False)

    print(f"Feature class {feature_class_name} created in {gdb_path}")

else:
    print ('not exported')


Feature class ForecastSegments_20250219_160132 created in backup\Forecasts_backup.gdb
Feature class ForecastSegments created in results\Forecasts.gdb


In [36]:
sdfSegmentsWDataMerged[sdfSegmentsWDataMerged['SEGID']=='1438_000.9']

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFac,DYVOL2019,DYVOL2023,DYVOL2028,...,FL_FURREV,FL_SEG,CO_NAME,Shape__Length,Shape__Length_2,SEGTYPE,Shape__Length_3,Shape__Length_4,DONOTUSE,SHAPE
1111,1438_000.9,1.0,1.0924,1.0104,1.0243,1.01735,3431.0,5546.1,5767.5,6566.3,...,0,0,Davis,2041.957131,2041.957431,FA,2041.957856,2041.958276,0.0,"{""paths"": [[[422184.7872143681, 4539112.133347..."


In [37]:
duplicated_segid = sdfSegmentsWData[sdfSegmentsWData.duplicated('SEGID', keep=False)]['SEGID'].unique()
duplicated_segid

array([], dtype=object)

In [38]:
sdfSegmentsWData[sdfSegmentsWData['F_AREA'].isna()][['SEGID','CO_FIPS','SUBAREAID','F_AREA']]


Unnamed: 0,SEGID,CO_FIPS,SUBAREAID,F_AREA


In [39]:
# export segment JSON
jsonSegment = sdfSegmentsWData[['SEGID','PLANAREA','SUBAREAID','CO_NAME','F_AREA']]

jsonSegment.to_json('_site-art-of-forecasting/data/segments.json', orient='records')

In [40]:
jsonSegment.groupby(['F_AREA','CO_NAME','SUBAREAID'],as_index=False).agg(numSegs=('SEGID','count'))

Unnamed: 0,F_AREA,CO_NAME,SUBAREAID,numSegs
0,Cache,Cache,2.0,489
1,Dixie,Washington,3.0,615
2,Iron,Iron,5.0,249
3,MAG,Juab,1.0,1
4,MAG,Utah,1.0,2089
5,MAG,Wasatch,4.0,173
6,Summit,Summit,4.0,205
7,UDOT,Beaver,0.0,96
8,UDOT,Box Elder,0.0,196
9,UDOT,Cache,0.0,2


In [41]:
# export shapefile as well to share with UDOT

from datetime import datetime

# Generate a filename with the current timestamp in the specified folder
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"results/ForecastSegments_{timestamp}.shp"

sdfSegmentsWData.spatial.to_featureclass(location=filename, sanitize_columns=False)


'e:\\GitHub\\Traffic-Volume-Forecasts\\results\\ForecastSegments_20250219_160144.shp'

In [42]:
dfLatest_filtered[~dfLatest_filtered['NOTES_SEG'].isna()]

Unnamed: 0,SEGID,ADJHIST,ADJ2019,ADJ2023,ADJ2028,ADJ2032,ADJ2042,ADJ2050,OV_REV,OV_LTPRV,OV_ZERO,OV_HIADJ,OV_50LT19,OV_503X19,OV_FURREV,NOTES,NOTES_FURR,OV_SEG,NOTES_SEG
76,0006_246.94,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0,"Need to fix this in the model, SEGID not broug..."
441,0021_075.9,0.0,0,-50,-250,-350,-600,-800,0,0,0,0,0,0,0,kept it to 300 in the future because the centr...,,0,centroid connector for the TAZ shouldn't be lo...
444,0021_077.2,0.0,0,200,400,600,1000,1400,0,0,0,0,0,0,0,followed trendlines,,0,This segment is not coded in the model link.
655,0036_050.8,0.0,0,-200,-600,-500,0,4800,0,0,0,0,0,0,0,Increased 2050; it looks like the segment summ...,,0,Check that the segment is summarizing the corr...
661,0036_053.7,-4500.0,0,0,-500,-1000,0,0,0,0,0,0,0,0,0,AADT tagged incorrectly. 2019 AADT should be 1...,,0,AADT tagged incorrectly. 2019 AADT should be 1...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9052,UDOT_7019,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,none,,0,in tdm?
9070,UDOT_7041,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,none,,0,why?
9071,UDOT_7042,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,Looks good. JL,,0,not in tdm?
9085,UDOT_7056,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0,split segment at 1000 N
