In [1]:
import pandas as pd
from arcgis import GIS
gis = GIS()
import requests
import arcpy

bRedownloadLatest = True # will download feature class from agol, otherwise will use last download
bExportFeatureClass = True # export feature class
bClearFlags = True # will clear all flag fields from backup
bClearOverrides = False # will clear all override fields from backup

In [2]:
# Prep flag categories

# flag name must be short enough to be in dbf column name
dfFlags = pd.DataFrame([
    ['FL_REV'   , 'Not reviewed'                    , "([NOTES]=='') & ([NOTES_FURREV]=='')"],
    ['FL_LTPRV' , 'Less than previous forecast year', "(([MF2023]+[ADJ2023])<([M2019]+[aadtAdjFactor])) | (([MF2028]+[ADJ2028])<([MF2023]+[ADJ2023])) | (([MF2032]+[ADJ2032])<([MF2028]+[ADJ2028])) | (([MF2042]+[ADJ2042])<([MF2032]+[ADJ2032])) | (([MF2050]+[ADJ2050])<([MF2042]+[ADJ2042]))"],
    ['FL_ZERO'  , 'Zero volume'                     , "(([MF2023]+[ADJ2023])<=0) | (([MF2028]+[ADJ2028])<=0) | (([MF2032]+[ADJ2032])<=0) | (([MF2042]+[ADJ2042])<=0) | (([MF2050]+[ADJ2050])<=0)"],
    ['FL_HIADJ' , 'Large model adjustment factor '  , "((([aadtAdjFactor]/([M2019]+[aadtAdjFactor]))>0.95) | (([aadtAdjFactor]/([M2019]+[aadtAdjFactor]))<-0.95))"],
    ['FL_50LT19', 'HPMS: 2050 < 2019'               , "(([MF2050]+[ADJ2050])<([M2019]+[aadtAdjFactor]))"],
    ['FL_503X19', 'HPMS: 2050 > 3 x 2019'           , "(([MF2050]+[ADJ2050])>(3*([M2019]+[aadtAdjFactor])))"],
    ['FL_FURREV', 'Further Review'                  , "([NOTES_FURREV]!='')"],
    ['FL_SEG'   , 'Segment Note'                    , "([NOTES_SEG]!='')"]
], columns=('flagName','flagDescription','flagCriteria'))

dfFlags.to_json('_site/data/flags.json', orient='records')

In [3]:
# CO_FIPS
county_mapping = {
     1: {'CO_NAME': 'Beaver'    },
     3: {'CO_NAME': 'Box Elder' },
     5: {'CO_NAME': 'Cache'     },
     7: {'CO_NAME': 'Carbon'    },
     9: {'CO_NAME': 'Daggett'   },
    11: {'CO_NAME': 'Davis'     },
    13: {'CO_NAME': 'Duchesne'  },
    15: {'CO_NAME': 'Emery'     },
    17: {'CO_NAME': 'Garfield'  },
    19: {'CO_NAME': 'Grand'     },
    21: {'CO_NAME': 'Iron'      },
    23: {'CO_NAME': 'Juab'      },
    25: {'CO_NAME': 'Kane'      },
    27: {'CO_NAME': 'Millard'   },
    29: {'CO_NAME': 'Morgan'    },
    31: {'CO_NAME': 'Piute'     },
    33: {'CO_NAME': 'Rich'      },
    35: {'CO_NAME': 'Salt Lake' },
    37: {'CO_NAME': 'San Juan'  },
    39: {'CO_NAME': 'Sanpete'   },
    41: {'CO_NAME': 'Sevier'    },
    43: {'CO_NAME': 'Summit'    },
    45: {'CO_NAME': 'Tooele'    },
    47: {'CO_NAME': 'Uintah'    },
    49: {'CO_NAME': 'Utah'      },
    51: {'CO_NAME': 'Wasatch'   },
    53: {'CO_NAME': 'Washington'},
    55: {'CO_NAME': 'Wayne'     },
    57: {'CO_NAME': 'Weber'     }
}


# Create JSONs

In [4]:
# convert csvs to jsons
lstSegidFiles = ['aadt', 'linear-forecasts', 'model-forecasts', 'previous-forecasts']
lstNonSegidFiles = ['aadt-sources', 'previous-forecasts-sources', 'projection-groups']

for file in lstSegidFiles + lstNonSegidFiles:
    # Read CSV file
    pd.read_csv('intermediate/' + file + '.csv').to_json('_site/data/' + file + '.json', orient='records')

# Create Segment Feature Class

In [11]:
fnSegments = 'data/segments/Segments_State_20231221_Draft'

seg_cols = ['SEGID','CO_FIPS','PLANAREA','SUBAREAID','F_AREA','SHAPE']

# USE ALL PLANAREAS since this app will be for entire state
# filter by PLANAREA in segments shapefile
#filterPlanArea = ['WFRC','MAG'] # must be an array... if only single item, the still include []

In [6]:
if bRedownloadLatest:

    # Define the URL
    url = "https://services1.arcgis.com/taguadKoI1XFwivx/arcgis/rest/services/Forecasts_gdb/FeatureServer/0/query"

    # Base parameters for the request
    params = {
        "f": "json",
        "where": "1=1",
        "outFields": "*",
        "returnGeometry": "false",  # Assuming you only want attributes
    }

    all_features = []

    # Pagination parameters
    batch_size = 2000
    offset = 0

    while True:
        # Adjust parameters for pagination
        params['resultOffset'] = offset
        params['resultRecordCount'] = batch_size

        # Send the request
        response = requests.get(url, params=params)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()
            features = data.get('features', [])
            
            # Check if no features were returned, which means we're done
            if not features:
                break
            
            # Append features to the master list
            all_features.extend(features)
            
            # Increase the offset for next iteration
            offset += batch_size

        else:
            print("Error fetching data:", response.status_code)
            break

    # Extract attribute table
    attribute_table = [feature['attributes'] for feature in all_features]

    # Convert to DataFrame
    df = pd.DataFrame(attribute_table)

    from datetime import datetime

    # Generate a filename with the current timestamp in the specified folder
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f"backup\\forecastsegments_backup_{timestamp}.csv"

    # Save the DataFrame to a CSV file
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")
else:
    print("didn't download... will use latest")

Data saved to backup\forecastsegments_backup_20231222_143434.csv


In [7]:
# Define the URL
url = "https://services1.arcgis.com/taguadKoI1XFwivx/arcgis/rest/services/forecasts_logfile/FeatureServer/0/query"

# Base parameters for the request
params = {
    "f": "json",
    "where": "1=1",
    "outFields": "*",
    "returnGeometry": "false",  # If it's a stand-alone table, geometry isn't required.
    "outSR": "4326",  # Specify output spatial reference if needed
    "returnExceededLimitFeatures": "true"
}

all_features = []

# Pagination parameters
batch_size = 2000
offset = 0

while True:
    # Adjust parameters for pagination
    params['resultOffset'] = offset
    params['resultRecordCount'] = batch_size

    # Send the request
    response = requests.post(url, params=params)  # Use POST since some servers may have GET length limits

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        features = data.get('features', [])
        
        # Check if no features were returned, which means we're done
        if not features:
            break
        
        # Append features to the master list
        all_features.extend(features)
        
        # Increase the offset for next iteration
        offset += batch_size

    else:
        print("Error fetching data:", response.status_code)
        break

# Extract attribute table
attribute_table = [feature['attributes'] for feature in all_features]

# Convert to DataFrame
df = pd.DataFrame(attribute_table)

from datetime import datetime

# Generate a filename with the current timestamp in the specified folder
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"backup\\logfile_backup_{timestamp}.csv"

# Save the DataFrame to a CSV file
df.to_csv(filename, index=False)
print(f"Data saved to {filename}")

Data saved to backup\logfile_backup_20231222_143438.csv


In [8]:
#import datetime
#
## empty log file for AGOL
#dfLogFile = pd.DataFrame([
#    ['dummy','dummy',0,0,0,0,0,'dummy',datetime.datetime.now()]
#],columns=('SEGID','EDITKEY','ADJ2023','ADJ2028','ADJ2032','ADJ2042','ADJ2050','NOTES','TIMESTAMP'))
#
#dfLogFile.to_csv('results/forecasts-logfile.csv')
#
#dfLogFile

In [9]:
# Get data from last backup

import os

# List all backup files in the directory
backup_dir = "backup\\"
all_files = [f for f in os.listdir(backup_dir) if f.startswith('forecastsegments_backup_') and f.endswith('.csv')]

# Sort the files based on their timestamps
sorted_files = sorted(all_files, reverse=True)  # Latest timestamp will be first

# Check if there are any backup files
if sorted_files:
    latest_file = os.path.join(backup_dir, sorted_files[0])
    # Load the most recent backup into a dataframe
    dfLatest = pd.read_csv(latest_file,low_memory=False)

    # when duplicate errors we need to get rid of extra data
    dfLatest = dfLatest.groupby('SEGID').first().reset_index()

    print(f"Loaded {latest_file} into a DataFrame")

    if bClearFlags:
        # Specify columns to delete
        columns_to_delete = [col for col in dfLatest.columns if col.startswith('FL_')]

        # Remove the specified columns from df_filtered
        dfLatest.drop(columns_to_delete, axis=1, inplace=True)
    
        print(f"Dropped FL_ columns")

    if bClearOverrides:
        # Specify columns to delete
        columns_to_delete = [col for col in dfLatest.columns if col.startswith('OV_')]

        # Remove the specified columns from df_filtered
        dfLatest.drop(columns_to_delete, axis=1, inplace=True)
    
        print(f"Dropped OV_ columns")
else:
    dfLatest = pd.DataFrame()
    print("No backup files found in the directory.")

# You can then use dfLatest as your DataFrame


Loaded backup\forecastsegments_backup_20231222_143434.csv into a DataFrame
Dropped FL_ columns


In [12]:
# read in segments, filter, and select only key columns
sdfSegments = pd.DataFrame.spatial.from_featureclass(fnSegments)
#sdfSegments = sdfSegments[sdfSegments['PLANAREA'].isin(filterPlanArea)]
sdfSegments = sdfSegments[seg_cols]

display(sdfSegments)

Unnamed: 0,SEGID,CO_FIPS,PLANAREA,SUBAREAID,F_AREA,SHAPE
0,0006_000.0,27,UDOT,0,UDOT,"{""paths"": [[[236177.7000000002, 4327541.25], [..."
1,0006_000.7,27,UDOT,0,UDOT,"{""paths"": [[[237241.1799999997, 4327399.720000..."
2,0006_016.0,27,UDOT,0,UDOT,"{""paths"": [[[261403.90000000037, 4327045.4], [..."
3,0006_046.0,27,UDOT,0,UDOT,"{""paths"": [[[305380.16000000015, 4325741.85], ..."
4,0006_060.2,27,UDOT,0,UDOT,"{""paths"": [[[324618, 4337936.9], [324647.40000..."
...,...,...,...,...,...,...
8962,WFRC_8469,35,WFRC,1,WFRC,"{""paths"": [[[407213.3742000004, 4504030.9891],..."
8963,WFRC_8470,35,WFRC,1,WFRC,"{""paths"": [[[407723.2000000002, 4503422.753000..."
8964,WFRC_8471,35,WFRC,1,WFRC,"{""paths"": [[[415734.59509999957, 4481608.6051]..."
8965,WFRC_8472,35,WFRC,1,WFRC,"{""paths"": [[[424308.5, 4489341.199999999], [42..."


In [13]:
dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')

dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'DYVOL' + dfModVolAdj['YEAR'].str.replace(".0","", regex=False)

dfModVolAdj_pvDYVOL = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'], columns='YEAR', values='DY_VOL')
dfModVolAdj_pvDYVOL.reset_index(inplace=True)
dfModVolAdj_pvDYVOL

display(dfModVolAdj_pvDYVOL)


YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,DYVOL2032,DYVOL2042,DYVOL2050
0,0006_000.0,0,0.9840,1.0276,1.0316,1.02960,286.0,126.7,99.7,101.7,103.6,107.9,111.9
1,0006_000.7,0,0.9840,1.0276,1.0316,1.02960,198.0,213.3,216.0,215.8,214.8,214.8,216.2
2,0006_016.0,0,0.9840,1.0276,1.0316,1.02960,337.0,76.6,73.1,70.1,67.1,65.2,62.3
3,0006_046.0,0,0.9840,1.0276,1.0316,1.02960,294.0,76.6,73.1,70.1,67.1,65.2,62.3
4,0006_060.2,0,0.9840,1.0276,1.0316,1.02960,290.0,80.4,76.4,73.7,70.7,68.6,65.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8019,WFRC_8465,1,1.0924,1.0104,1.0243,1.01735,0.0,,,,43904.8,46904.0,46714.9
8020,WFRC_8466,1,1.0946,1.0117,1.0095,1.01060,0.0,9085.9,9069.4,10057.7,11136.7,13513.3,15633.9
8021,WFRC_8467,1,1.0924,1.0104,1.0243,1.01735,0.0,,17525.6,20873.4,22385.3,24758.8,26566.7
8022,WFRC_8471,1,1.0924,1.0104,1.0243,1.01735,0.0,,,,14074.6,17213.9,19117.2


In [14]:
# read in intermediate data
dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')
dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'M' + dfModVolAdj['YEAR']

dfModVolAdj_pvModAadt = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'], columns='YEAR', values='modAadt')
dfModVolAdj_pvModAadt.reset_index(inplace=True)

dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')

dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'MF' + dfModVolAdj['YEAR'].str.replace(".0","", regex=False)

dfModVolAdj_pvModAadtAdj = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'], columns='YEAR', values='modForecast')
dfModVolAdj_pvModAadtAdj.reset_index(inplace=True)

dfModVolAdj_pv1 = pd.DataFrame.merge(dfModVolAdj_pvModAadt, dfModVolAdj_pvModAadtAdj, on=('SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'))


dfModVolAdj = pd.DataFrame.merge(dfModVolAdj_pvDYVOL, dfModVolAdj_pv1, on=('SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'))
display(dfModVolAdj)

#dfModVolAdj.columns = [str(col).split('.')[0] if '.' in str(col) else str(col) for col in dfModVolAdj.columns]


YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,M2028,M2032,M2042,M2050,MF2019,MF2023,MF2028,MF2032,MF2042,MF2050
0,0006_000.0,0,0.9840,1.0276,1.0316,1.02960,286.0,126.7,99.7,101.7,...,103.0,105.0,110.0,114.0,400.0,400.0,400.0,400.0,400.0,400.0
1,0006_000.7,0,0.9840,1.0276,1.0316,1.02960,198.0,213.3,216.0,215.8,...,219.0,218.0,218.0,220.0,400.0,400.0,400.0,400.0,400.0,400.0
2,0006_016.0,0,0.9840,1.0276,1.0316,1.02960,337.0,76.6,73.1,70.1,...,71.0,68.0,66.0,63.0,400.0,400.0,400.0,400.0,400.0,400.0
3,0006_046.0,0,0.9840,1.0276,1.0316,1.02960,294.0,76.6,73.1,70.1,...,71.0,68.0,66.0,63.0,350.0,350.0,350.0,350.0,350.0,350.0
4,0006_060.2,0,0.9840,1.0276,1.0316,1.02960,290.0,80.4,76.4,73.7,...,75.0,72.0,70.0,67.0,350.0,350.0,350.0,350.0,350.0,350.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8019,WFRC_8465,1,1.0924,1.0104,1.0243,1.01735,0.0,,,,...,,40191.0,42937.0,42764.0,,,,40000.0,43000.0,43000.0
8020,WFRC_8466,1,1.0946,1.0117,1.0095,1.01060,0.0,9085.9,9069.4,10057.7,...,9188.0,10174.0,12345.0,14283.0,8300.0,8300.0,9200.0,10000.0,12500.0,14500.0
8021,WFRC_8467,1,1.0924,1.0104,1.0243,1.01735,0.0,,17525.6,20873.4,...,19108.0,20492.0,22665.0,24320.0,,16000.0,19000.0,20500.0,22500.0,24500.0
8022,WFRC_8471,1,1.0924,1.0104,1.0243,1.01735,0.0,,,,...,,12884.0,15758.0,17500.0,,,,13000.0,16000.0,17500.0


In [15]:
dfModVolAdj[dfModVolAdj['SUBAREAID']==4]

YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,M2028,M2032,M2042,M2050,MF2019,MF2023,MF2028,MF2032,MF2042,MF2050
595,0032_000.0,4,0.9593,0.9859,1.0030,0.99445,-11.0,2932.9,3178.5,2985.3,...,3129.0,3339.0,4414.0,5483.0,3100.0,3300.0,3100.0,3300.0,4400.0,5500.0
596,0032_009.0,4,0.9593,0.9859,1.0030,0.99445,-125.0,3041.0,3221.0,3024.0,...,3170.0,3495.0,4233.0,5070.0,3100.0,3300.0,3000.0,3400.0,4100.0,4900.0
597,0032_009.8,4,0.9840,1.0276,1.0316,1.02960,-342.0,2740.1,2941.9,3017.9,...,2979.0,3270.0,4033.0,4802.0,2400.0,2600.0,2600.0,2900.0,3700.0,4500.0
598,0032_010.4,4,0.9593,0.9859,1.0030,0.99445,1134.0,4124.5,4103.0,4813.5,...,5046.0,5415.0,7024.0,8562.0,5500.0,5400.0,6200.0,6500.0,8200.0,9700.0
599,0032_011.4,4,1.0918,1.0107,1.0324,1.02155,1180.0,4769.8,4996.3,6135.6,...,5501.0,5848.0,7164.0,8580.0,5500.0,5700.0,6700.0,7000.0,8300.0,9800.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6645,3136_001.0,4,1.0918,1.0107,1.0324,1.02155,94.0,453.3,289.8,320.7,...,288.0,308.0,618.0,772.0,500.0,350.0,400.0,400.0,700.0,850.0
7671,UDOT_7017,4,0.9840,1.0276,1.0316,1.02960,0.0,359.0,413.0,390.0,...,385.0,398.0,420.0,432.0,350.0,400.0,400.0,400.0,400.0,450.0
7672,UDOT_7018,4,1.0918,1.0107,1.0324,1.02155,0.0,212.0,273.0,293.0,...,263.0,273.0,325.0,360.0,200.0,250.0,250.0,250.0,300.0,350.0
7673,UDOT_7020,4,1.0924,1.0104,1.0243,1.01735,0.0,461.0,450.0,451.0,...,406.0,402.0,422.0,388.0,400.0,400.0,400.0,400.0,400.0,400.0


In [16]:
# join segment data and forecast data
_sdf = pd.DataFrame.merge(dfModVolAdj, sdfSegments, on=('SEGID','SUBAREAID'), how='right')

# add columns for Adjustments
_sdf['MF2019'].fillna(0,inplace=True)
_sdf['MF2023'].fillna(0,inplace=True)
_sdf['MF2028'].fillna(0,inplace=True)
_sdf['MF2032'].fillna(0,inplace=True)
_sdf['MF2042'].fillna(0,inplace=True)
_sdf['MF2050'].fillna(0,inplace=True)
_sdf['ADJ2019'     ] = 0
_sdf['ADJ2023'     ] = 0
_sdf['ADJ2028'     ] = 0
_sdf['ADJ2032'     ] = 0
_sdf['ADJ2042'     ] = 0
_sdf['ADJ2050'     ] = 0
_sdf['ADJHIST'     ] = 0
_sdf['NOTES'       ] = ""
_sdf['NOTES_FURREV'] = ""
_sdf['NOTES_SEG'   ] = ""


# replace with latest file
if dfLatest.empty:
    print("The latest dataframe is empty.")
else:
    print("The latest dataframe is not empty.")
        
    # Specify columns to include from latest download (FLAGS WILL NOT BE INCLUDED WHEN FILTERED PRIOR TO THIS STEP)
    columns_to_include = [col for col in dfLatest.columns if col.startswith('ADJ') or col.startswith('FL_') or col.startswith('OV_') or col == 'SEGID' or col == 'NOTES' or col == 'NOTES_FURREV' or col == 'NOTES_SEG']

    # Filter df_filtered to only have the specified columns
    dfLatest_filtered = dfLatest[columns_to_include]

    # Merge sdfData with df_filtered on SEGID
    _sdfWithLatest = _sdf.merge(dfLatest_filtered, on='SEGID', how='left', suffixes=('_delete', ''))

    # Drop any duplicate columns (those with '_delete' suffix) after the merge 
    for column in _sdfWithLatest.columns:
        if column.endswith('_delete'):
            _sdfWithLatest.drop(column, axis=1, inplace=True)

    _sdfWithLatest['NOTES'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES'] = _sdfWithLatest['NOTES'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES']=='0'), 'NOTES'] = ''
    _sdfWithLatest['NOTES'] = _sdfWithLatest['NOTES'].str.strip()

    _sdfWithLatest['NOTES_FURREV'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES_FURREV'] = _sdfWithLatest['NOTES_FURREV'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES_FURREV']=='0'), 'NOTES_FURREV'] = ''
    _sdfWithLatest['NOTES_FURREV'] = _sdfWithLatest['NOTES_FURREV'].str.strip()

    _sdfWithLatest['NOTES_SEG'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES_SEG'] = _sdfWithLatest['NOTES_SEG'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES_SEG']=='0'), 'NOTES_SEG'] = ''
    _sdfWithLatest['NOTES_SEG'] = _sdfWithLatest['NOTES_SEG'].str.strip()

# add flag columns
# Loop through the rows of 'dfFlags'
for index, row in dfFlags.iterrows():
    # Get the flag name from the 'flagName' column
    flag_name = row['flagName']

    # Get the flag criteria from the 'flagCriteria' column (if needed)
    flag_criteria = row['flagCriteria']

    # Add a new column to 'dfSegs' with the flag name and set it equal to evaluated criteria, prepare criteria string with replace
    expression = "_sdfWithLatest['" + flag_name + "'] = " + flag_criteria.replace("[", "_sdfWithLatest['").replace("]", "']")
    print(expression)

    # execute expression!
    exec(expression)
    #print(result)  # Output will be 11
    
    if bClearOverrides:
        display('flags overwritten')
        # add override column
        _sdfWithLatest[flag_name.replace('FL_','OV_')] = 0

_sdfWithLatest.fillna(0,inplace=True)

sdfSegmentsWData = _sdfWithLatest
sdfSegmentsWData

The latest dataframe is not empty.
_sdfWithLatest['FL_REV'] = (_sdfWithLatest['NOTES']=='') & (_sdfWithLatest['NOTES_FURREV']=='')
_sdfWithLatest['FL_LTPRV'] = ((_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])<(_sdfWithLatest['M2019']+_sdfWithLatest['aadtAdjFactor'])) | ((_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])<(_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])) | ((_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])<(_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])) | ((_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042'])<(_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])) | ((_sdfWithLatest['MF2050']+_sdfWithLatest['ADJ2050'])<(_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042']))
_sdfWithLatest['FL_ZERO'] = ((_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])<=0) | ((_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])<=0) | ((_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])<=0) | ((_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042'])<=0) | ((_sdfWithLatest['M

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,NOTES_FURREV,OV_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG
0,0006_000.0,0,0.9840,1.0276,1.0316,1.02960,286.0,126.7,99.7,101.7,...,,0.0,True,True,False,False,True,False,False,False
1,0006_000.7,0,0.9840,1.0276,1.0316,1.02960,198.0,213.3,216.0,215.8,...,,0.0,True,True,False,False,True,False,False,False
2,0006_016.0,0,0.9840,1.0276,1.0316,1.02960,337.0,76.6,73.1,70.1,...,,0.0,True,True,False,False,True,False,False,False
3,0006_046.0,0,0.9840,1.0276,1.0316,1.02960,294.0,76.6,73.1,70.1,...,,0.0,True,True,False,False,True,False,False,False
4,0006_060.2,0,0.9840,1.0276,1.0316,1.02960,290.0,80.4,76.4,73.7,...,,0.0,True,True,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8962,WFRC_8469,1,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,,0.0,True,False,False,False,False,False,False,False
8963,WFRC_8470,1,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,,0.0,True,False,False,False,False,False,False,False
8964,WFRC_8471,1,1.0924,1.0104,1.0243,1.01735,0.0,0.0,0.0,0.0,...,,0.0,True,False,False,False,False,False,False,False
8965,WFRC_8472,1,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,,0.0,True,False,False,False,False,False,False,False


In [17]:
sdfSegmentsWData[['SEGID','NOTES_FURREV','NOTES_SEG','FL_FURREV','OV_FURREV']]

Unnamed: 0,SEGID,NOTES_FURREV,NOTES_SEG,FL_FURREV,OV_FURREV
0,0006_000.0,,,False,0.0
1,0006_000.7,,,False,0.0
2,0006_016.0,,,False,0.0
3,0006_046.0,,,False,0.0
4,0006_060.2,,,False,0.0
...,...,...,...,...,...
8962,WFRC_8469,,,False,0.0
8963,WFRC_8470,,,False,0.0
8964,WFRC_8471,,,False,0.0
8965,WFRC_8472,,,False,0.0


In [18]:
# Add CO_NAME and ForecastArea columns to the DataFrame using multi-index mapping
sdfSegmentsWData['CO_NAME'] = sdfSegmentsWData.apply(lambda row: county_mapping.get(row['CO_FIPS'], {}).get('CO_NAME'), axis=1)
sdfSegmentsWData

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,OV_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG,CO_NAME
0,0006_000.0,0,0.9840,1.0276,1.0316,1.02960,286.0,126.7,99.7,101.7,...,0.0,True,True,False,False,True,False,False,False,Millard
1,0006_000.7,0,0.9840,1.0276,1.0316,1.02960,198.0,213.3,216.0,215.8,...,0.0,True,True,False,False,True,False,False,False,Millard
2,0006_016.0,0,0.9840,1.0276,1.0316,1.02960,337.0,76.6,73.1,70.1,...,0.0,True,True,False,False,True,False,False,False,Millard
3,0006_046.0,0,0.9840,1.0276,1.0316,1.02960,294.0,76.6,73.1,70.1,...,0.0,True,True,False,False,True,False,False,False,Millard
4,0006_060.2,0,0.9840,1.0276,1.0316,1.02960,290.0,80.4,76.4,73.7,...,0.0,True,True,False,False,True,False,False,False,Millard
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8962,WFRC_8469,1,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,0.0,True,False,False,False,False,False,False,False,Salt Lake
8963,WFRC_8470,1,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,0.0,True,False,False,False,False,False,False,False,Salt Lake
8964,WFRC_8471,1,1.0924,1.0104,1.0243,1.01735,0.0,0.0,0.0,0.0,...,0.0,True,False,False,False,False,False,False,False,Salt Lake
8965,WFRC_8472,1,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,0.0,True,False,False,False,False,False,False,False,Salt Lake


In [19]:
sdfSegmentsWData[sdfSegmentsWData['SEGID']=='0056_060.6']

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,OV_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG,CO_NAME
947,0056_060.6,5,1.0924,1.0104,1.0243,1.01735,-1922.0,20577.0,31000.5,31388.9,...,0.0,True,False,False,False,False,False,False,False,Iron


In [20]:
# export
#if bShapefileExport: #sdfSegmentsWData.spatial.to_featureclass('results/ForecastSegments/ForecastSegments.shp',sanitize_columns=False)
from arcgis.features import GeoAccessor

if bExportFeatureClass:
    # Define the geodatabase path
    gdb_path        = r"results\Forecasts.gdb"
    gdb_path_backup = r"backup\Forecasts_backup.gdb"

    # Check if the geodatabase exists, if not, create it
    if not arcpy.Exists(gdb_path):
        arcpy.CreateFileGDB_management(r"results", "Forecasts.gdb")
        
    # Check if the geodatabase exists, if not, create it
    if not arcpy.Exists(gdb_path_backup):
        arcpy.CreateFileGDB_management(r"backup", "Forecasts_backup.gdb")

    # Export SDF to the geodatabase as a feature class
    feature_class_name = "ForecastSegments"

    # Generate a filename with the current timestamp in the specified folder
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    feature_class_name_with_timestamp = f"ForecastSegments_{timestamp}"

    # file to replace
    sdfSegmentsWData.spatial.to_featureclass(location=f"{gdb_path_backup}\\{feature_class_name_with_timestamp}", sanitize_columns=False)
    print(f"Feature class {feature_class_name_with_timestamp} created in {gdb_path_backup}")

    # backup file
    sdfSegmentsWData.spatial.to_featureclass(location=f"{gdb_path}\\{feature_class_name}", sanitize_columns=False)

    print(f"Feature class {feature_class_name} created in {gdb_path}")

else:
    print ('not exported')


Feature class ForecastSegments_20231222_143716 created in backup\Forecasts_backup.gdb
Feature class ForecastSegments created in results\Forecasts.gdb


In [21]:
sdfSegmentsWData[sdfSegmentsWData['SEGID']=='1438_000.9']

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,OV_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG,CO_NAME
3793,1438_000.9,1,1.0924,1.0104,1.0243,1.01735,3084.0,5925.7,6246.8,7554.2,...,0.0,False,True,False,False,False,False,True,False,Davis


In [22]:
duplicated_segid = sdfSegmentsWData[sdfSegmentsWData.duplicated('SEGID', keep=False)]['SEGID'].unique()
duplicated_segid

array([], dtype=object)

In [23]:
sdfSegmentsWData[sdfSegmentsWData['F_AREA'].isna()][['SEGID','CO_FIPS','SUBAREAID','F_AREA']]


Unnamed: 0,SEGID,CO_FIPS,SUBAREAID,F_AREA


In [24]:
# export segment JSON
jsonSegment = sdfSegmentsWData[['SEGID','PLANAREA','SUBAREAID','CO_NAME','F_AREA']]

jsonSegment.to_json('_site/data/segments.json', orient='records')

In [25]:
jsonSegment.groupby(['F_AREA','CO_NAME','SUBAREAID'],as_index=False).agg(numSegs=('SEGID','count'))

Unnamed: 0,F_AREA,CO_NAME,SUBAREAID,numSegs
0,Cache,Cache,2,489
1,Dixie,Washington,3,615
2,Iron,Iron,5,249
3,MAG,Utah,1,1430
4,MAG,Wasatch,4,174
5,Summit,Summit,4,205
6,UDOT,Beaver,0,96
7,UDOT,Box Elder,0,196
8,UDOT,Cache,0,2
9,UDOT,Carbon,0,166


In [26]:
# export shapefile as well to share with UDOT

from datetime import datetime

# Generate a filename with the current timestamp in the specified folder
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"results/ForecastSegments_{timestamp}.shp"

sdfSegmentsWData.spatial.to_featureclass(location=filename, sanitize_columns=False)


'e:\\GitHub\\Traffic-Volume-Forecasts\\results\\ForecastSegments_20231222_143727.shp'

In [27]:
dfLatest_filtered[~dfLatest_filtered['NOTES_SEG'].isna()]

Unnamed: 0,SEGID,NOTES_SEG,ADJHIST,ADJ2019,ADJ2023,ADJ2028,ADJ2032,ADJ2042,ADJ2050,OV_REV,OV_LTPRV,OV_ZERO,OV_HIADJ,OV_50LT19,OV_503X19,OV_FURREV,NOTES,NOTES_FURREV,OV_SEG
1329,0085_011.9,fix 2023 and rerun,0.0,0,0,0,0,0,0,0,0,0,0,0,1,0,,,0
3770,1427_000.8,CHECK OBSERVED DATA,0.0,0,0,0,1000,0,0,0,0,0,0,0,0,0,Smoothing,,0
4575,2036_001.3,Fix seg id in TDM,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0
4582,2038_000.5,fix segid in tdm,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0
4941,2120_001.2,Fix segid in tdm,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0
5110,2161_003.4,Fix in TDM,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0
5249,2193.000.0,Fix segid in seg shapefile,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,,,0
5510,2292_008.4,remove segment from shapefile,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0
5514,2292_017.0,Delete in seg shapefile,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0
5518,2300_000.0,add seg id to TDM,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0
