In [1]:
import pandas as pd
from arcgis import GIS
gis = GIS()
import requests
import arcpy

wfonly = True # IS THIS A WF ONLY ITERATION

bRedownloadLatest = True # will download feature class from agol, otherwise will use last download
bExportFeatureClass = True # export feature class
bClearFlags = True # will clear all flag fields from backup
bClearOverrides = False # will clear all override fields from backup

In [2]:
# Prep flag categories

# flag name must be short enough to be in dbf column name
dfFlags = pd.DataFrame([
    ['FL_REV'   , 'Not reviewed'                    , "([NOTES]=='') & ([NOTES_FURREV]=='')"],
    ['FL_LTPRV' , 'Less than previous forecast year', "(([MF2023]+[ADJ2023])<([M2019]+[aadtAdjFactor])) | (([MF2028]+[ADJ2028])<([MF2023]+[ADJ2023])) | (([MF2032]+[ADJ2032])<([MF2028]+[ADJ2028])) | (([MF2042]+[ADJ2042])<([MF2032]+[ADJ2032])) | (([MF2050]+[ADJ2050])<([MF2042]+[ADJ2042]))"],
    ['FL_ZERO'  , 'Zero volume'                     , "(([MF2023]+[ADJ2023])<=0) | (([MF2028]+[ADJ2028])<=0) | (([MF2032]+[ADJ2032])<=0) | (([MF2042]+[ADJ2042])<=0) | (([MF2050]+[ADJ2050])<=0)"],
    ['FL_HIADJ' , 'Large model adjustment factor '  , "((([aadtAdjFactor]/([M2019]+[aadtAdjFactor]))>0.95) | (([aadtAdjFactor]/([M2019]+[aadtAdjFactor]))<-0.95))"],
    ['FL_50LT19', 'HPMS: 2050 < 2019'               , "(([MF2050]+[ADJ2050])<([M2019]+[aadtAdjFactor]))"],
    ['FL_503X19', 'HPMS: 2050 > 3 x 2019'           , "(([MF2050]+[ADJ2050])>(3*([M2019]+[aadtAdjFactor])))"],
    ['FL_FURREV', 'Further Review'                  , "([NOTES_FURREV]!='')"],
    ['FL_SEG'   , 'Segment Note'                    , "([NOTES_SEG]!='')"]
], columns=('flagName','flagDescription','flagCriteria'))

dfFlags.to_json('_site/data/flags.json', orient='records')

In [3]:
# CO_FIPS
county_mapping = {
     1: {'CO_NAME': 'Beaver'    },
     3: {'CO_NAME': 'Box Elder' },
     5: {'CO_NAME': 'Cache'     },
     7: {'CO_NAME': 'Carbon'    },
     9: {'CO_NAME': 'Daggett'   },
    11: {'CO_NAME': 'Davis'     },
    13: {'CO_NAME': 'Duchesne'  },
    15: {'CO_NAME': 'Emery'     },
    17: {'CO_NAME': 'Garfield'  },
    19: {'CO_NAME': 'Grand'     },
    21: {'CO_NAME': 'Iron'      },
    23: {'CO_NAME': 'Juab'      },
    25: {'CO_NAME': 'Kane'      },
    27: {'CO_NAME': 'Millard'   },
    29: {'CO_NAME': 'Morgan'    },
    31: {'CO_NAME': 'Piute'     },
    33: {'CO_NAME': 'Rich'      },
    35: {'CO_NAME': 'Salt Lake' },
    37: {'CO_NAME': 'San Juan'  },
    39: {'CO_NAME': 'Sanpete'   },
    41: {'CO_NAME': 'Sevier'    },
    43: {'CO_NAME': 'Summit'    },
    45: {'CO_NAME': 'Tooele'    },
    47: {'CO_NAME': 'Uintah'    },
    49: {'CO_NAME': 'Utah'      },
    51: {'CO_NAME': 'Wasatch'   },
    53: {'CO_NAME': 'Washington'},
    55: {'CO_NAME': 'Wayne'     },
    57: {'CO_NAME': 'Weber'     }
}


# Create JSONs

In [4]:
# convert csvs to jsons
lstSegidFiles = ['aadt', 'linear-forecasts', 'model-forecasts', 'previous-forecasts']
lstNonSegidFiles = ['aadt-sources', 'previous-forecasts-sources', 'projection-groups']

for file in lstSegidFiles + lstNonSegidFiles:
    # Read CSV file
    pd.read_csv('intermediate/' + file + '.csv').to_json('_site/data/' + file + '.json', orient='records')

# Create Segment Feature Class

In [5]:
fnSegments = 'data/segments/WFv901_Segments_20240226_GeoKeys_AADTs_Factors.shp'

seg_cols = ['SEGID','CO_FIPS','PLANAREA','SUBAREAID','F_AREA','SHAPE']

# USE ALL PLANAREAS since this app will be for entire state
# filter by PLANAREA in segments shapefile
#filterPlanArea = ['WFRC','MAG'] # must be an array... if only single item, the still include []

In [6]:
if bRedownloadLatest:

    # Define the URL
    url = "https://services1.arcgis.com/taguadKoI1XFwivx/arcgis/rest/services/ForecastsWFRCFinal_gdb/FeatureServer/0/query"

    # Base parameters for the request
    params = {
        "f": "json",
        "where": "1=1",
        "outFields": "*",
        "returnGeometry": "false",  # Assuming you only want attributes
    }

    all_features = []

    # Pagination parameters
    batch_size = 2000
    offset = 0

    while True:
        # Adjust parameters for pagination
        params['resultOffset'] = offset
        params['resultRecordCount'] = batch_size

        # Send the request
        response = requests.get(url, params=params)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()
            features = data.get('features', [])
            
            # Check if no features were returned, which means we're done
            if not features:
                break
            
            # Append features to the master list
            all_features.extend(features)
            
            # Increase the offset for next iteration
            offset += batch_size

        else:
            print("Error fetching data:", response.status_code)
            break

    # Extract attribute table
    attribute_table = [feature['attributes'] for feature in all_features]

    # Convert to DataFrame
    df = pd.DataFrame(attribute_table)

    from datetime import datetime

    # Generate a filename with the current timestamp in the specified folder
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f"backup\\forecastsegments_wfrcfinal_backup_{timestamp}.csv"

    # Save the DataFrame to a CSV file
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")
else:
    print("didn't download... will use latest")

Data saved to backup\forecastsegments_wfrcfinal_backup_20240227_175716.csv


In [7]:
# Define the URL
url = "https://services1.arcgis.com/taguadKoI1XFwivx/arcgis/rest/services/forecasts_logfile/FeatureServer/0/query"

# Base parameters for the request
params = {
    "f": "json",
    "where": "1=1",
    "outFields": "*",
    "returnGeometry": "false",  # If it's a stand-alone table, geometry isn't required.
    "outSR": "4326",  # Specify output spatial reference if needed
    "returnExceededLimitFeatures": "true"
}

all_features = []

# Pagination parameters
batch_size = 2000
offset = 0

while True:
    # Adjust parameters for pagination
    params['resultOffset'] = offset
    params['resultRecordCount'] = batch_size

    # Send the request
    response = requests.post(url, params=params)  # Use POST since some servers may have GET length limits

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        features = data.get('features', [])
        
        # Check if no features were returned, which means we're done
        if not features:
            break
        
        # Append features to the master list
        all_features.extend(features)
        
        # Increase the offset for next iteration
        offset += batch_size

    else:
        print("Error fetching data:", response.status_code)
        break

# Extract attribute table
attribute_table = [feature['attributes'] for feature in all_features]

# Convert to DataFrame
df = pd.DataFrame(attribute_table)

from datetime import datetime

# Generate a filename with the current timestamp in the specified folder
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"backup\\logfile_backup_{timestamp}.csv"

# Save the DataFrame to a CSV file
df.to_csv(filename, index=False)
print(f"Data saved to {filename}")

Data saved to backup\logfile_backup_20240227_175723.csv


In [8]:
#import datetime
#
## empty log file for AGOL
#dfLogFile = pd.DataFrame([
#    ['dummy','dummy',0,0,0,0,0,'dummy',datetime.datetime.now()]
#],columns=('SEGID','EDITKEY','ADJ2023','ADJ2028','ADJ2032','ADJ2042','ADJ2050','NOTES','TIMESTAMP'))
#
#dfLogFile.to_csv('results/forecasts-logfile.csv')
#
#dfLogFile

In [9]:
# Get data from last backup

import os

# List all backup files in the directory
backup_dir = "backup\\"
all_files = [f for f in os.listdir(backup_dir) if f.startswith('forecastsegments_backup_') and f.endswith('.csv')]

# Sort the files based on their timestamps
sorted_files = sorted(all_files, reverse=True)  # Latest timestamp will be first

# Check if there are any backup files
if sorted_files:
    latest_file = os.path.join(backup_dir, sorted_files[0])
    # Load the most recent backup into a dataframe
    dfLatest = pd.read_csv(latest_file,low_memory=False)

    # when duplicate errors we need to get rid of extra data
    dfLatest = dfLatest.groupby('SEGID').first().reset_index()

    print(f"Loaded {latest_file} into a DataFrame")

    if bClearFlags:
        # Specify columns to delete
        columns_to_delete = [col for col in dfLatest.columns if col.startswith('FL_')]

        # Remove the specified columns from df_filtered
        dfLatest.drop(columns_to_delete, axis=1, inplace=True)
    
        print(f"Dropped FL_ columns")

    if bClearOverrides:
        # Specify columns to delete
        columns_to_delete = [col for col in dfLatest.columns if col.startswith('OV_')]

        # Remove the specified columns from df_filtered
        dfLatest.drop(columns_to_delete, axis=1, inplace=True)
    
        print(f"Dropped OV_ columns")
else:
    dfLatest = pd.DataFrame()
    print("No backup files found in the directory.")

# You can then use dfLatest as your DataFrame


Loaded backup\forecastsegments_backup_20240227_102245.csv into a DataFrame
Dropped FL_ columns


In [10]:
# read in segments, filter, and select only key columns
sdfSegments = pd.DataFrame.spatial.from_featureclass(fnSegments)
#sdfSegments = sdfSegments[sdfSegments['PLANAREA'].isin(filterPlanArea)]

# Add forecast area
if wfonly:
    sdfSegments['F_AREA'] = sdfSegments['PLANAREA']

sdfSegments = sdfSegments[seg_cols]

display(sdfSegments)

  sdfSegments['F_AREA'] = sdfSegments['PLANAREA']


Unnamed: 0,SEGID,CO_FIPS,PLANAREA,SUBAREAID,F_AREA,SHAPE
0,0006_141.0,49.0,MAG,1.0,MAG,"{""paths"": [[[405824.11000000034, 4423860.33], ..."
1,0006_146.9,49.0,MAG,1.0,MAG,"{""paths"": [[[413442.55030000024, 4422753.7282]..."
2,0006_149.9,49.0,MAG,1.0,MAG,"{""paths"": [[[418330.7999999998, 4422866], [418..."
3,0006_150.6,49.0,MAG,1.0,MAG,"{""paths"": [[[419421.8803000003, 4422872.2963],..."
4,0006_152.6,49.0,MAG,1.0,MAG,"{""paths"": [[[422596.89969999995, 4422889.2949]..."
...,...,...,...,...,...,...
4975,WFRC_8469,35.0,WFRC,1.0,WFRC,"{""paths"": [[[407213.3742000004, 4504030.9891],..."
4976,WFRC_8470,35.0,WFRC,1.0,WFRC,"{""paths"": [[[407723.2000000002, 4503422.753000..."
4977,WFRC_8471,35.0,WFRC,1.0,WFRC,"{""paths"": [[[415734.59509999957, 4481608.6051]..."
4978,WFRC_8472,35.0,WFRC,1.0,WFRC,"{""paths"": [[[424308.5, 4489341.199999999], [42..."


In [11]:
dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')

dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'DYVOL' + dfModVolAdj['YEAR'].str.replace(".0","", regex=False)

dfModVolAdj_pvDYVOL = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'], columns='YEAR', values='DY_VOL')
dfModVolAdj_pvDYVOL.reset_index(inplace=True)
dfModVolAdj_pvDYVOL

display(dfModVolAdj_pvDYVOL)


YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,DYVOL2032,DYVOL2042,DYVOL2050
0,0006_141.0,1.0,0.9840,1.0276,1.0316,1.02960,-32.0,1524.0,1655.0,1737.0,1851.0,2094.0,2361.0
1,0006_146.9,1.0,0.9840,1.0276,1.0316,1.02960,-36.0,1528.0,1661.0,1747.0,1858.0,2098.0,2365.0
2,0006_149.9,1.0,0.9840,1.0276,1.0316,1.02960,857.0,1558.4,1680.1,2049.6,2056.3,2609.6,5174.6
3,0006_150.6,1.0,0.9840,1.0276,1.0316,1.02960,664.0,1748.2,1867.2,2241.2,2254.0,2847.4,5398.4
4,0006_152.6,1.0,0.9840,1.0276,1.0316,1.02960,-160.0,2535.8,2634.8,3002.5,3043.1,3649.3,6000.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4576,WFRC_8465,1.0,1.0924,1.0104,1.0243,1.01735,0.0,,,,44066.0,46952.8,46751.7
4577,WFRC_8466,1.0,1.0946,1.0117,1.0095,1.01060,0.0,9082.2,9055.3,10036.0,11066.0,13533.5,15608.7
4578,WFRC_8467,1.0,1.0924,1.0104,1.0243,1.01735,0.0,,17522.8,21032.6,22212.8,25093.4,26571.9
4579,WFRC_8471,1.0,1.0924,1.0104,1.0243,1.01735,0.0,,,,15006.8,18175.7,20343.9


In [12]:
# read in intermediate data
dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')
dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'M' + dfModVolAdj['YEAR']

dfModVolAdj_pvModAadt = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'], columns='YEAR', values='modAadt')
dfModVolAdj_pvModAadt.reset_index(inplace=True)

dfModVolAdj = pd.read_csv('intermediate/model-forecasts.csv')

dfModVolAdj['YEAR'] = dfModVolAdj['YEAR'].astype(str)
dfModVolAdj['YEAR'] = 'MF' + dfModVolAdj['YEAR'].str.replace(".0","", regex=False)

dfModVolAdj_pvModAadtAdj = dfModVolAdj.pivot_table(index=['SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'], columns='YEAR', values='modForecast')
dfModVolAdj_pvModAadtAdj.reset_index(inplace=True)

dfModVolAdj_pv1 = pd.DataFrame.merge(dfModVolAdj_pvModAadt, dfModVolAdj_pvModAadtAdj, on=('SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'))


dfModVolAdj = pd.DataFrame.merge(dfModVolAdj_pvDYVOL, dfModVolAdj_pv1, on=('SEGID','SUBAREAID','FAC_WDAVG','FAC_SPR','FAC_FAL','FAC_SPRFAL','aadtAdjFactor'))
display(dfModVolAdj)

#dfModVolAdj.columns = [str(col).split('.')[0] if '.' in str(col) else str(col) for col in dfModVolAdj.columns]


YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,M2028,M2032,M2042,M2050,MF2019,MF2023,MF2028,MF2032,MF2042,MF2050
0,0006_141.0,1.0,0.9840,1.0276,1.0316,1.02960,-32.0,1524.0,1655.0,1737.0,...,1765.0,1881.0,2128.0,2399.0,1500.0,1600.0,1700.0,1800.0,2100.0,2400.0
1,0006_146.9,1.0,0.9840,1.0276,1.0316,1.02960,-36.0,1528.0,1661.0,1747.0,...,1775.0,1888.0,2132.0,2403.0,1500.0,1700.0,1700.0,1900.0,2100.0,2400.0
2,0006_149.9,1.0,0.9840,1.0276,1.0316,1.02960,857.0,1558.4,1680.1,2049.6,...,2083.0,2090.0,2652.0,5259.0,2400.0,2600.0,2900.0,2900.0,3500.0,6100.0
3,0006_150.6,1.0,0.9840,1.0276,1.0316,1.02960,664.0,1748.2,1867.2,2241.2,...,2278.0,2291.0,2894.0,5486.0,2400.0,2600.0,2900.0,3000.0,3600.0,6200.0
4,0006_152.6,1.0,0.9840,1.0276,1.0316,1.02960,-160.0,2535.8,2634.8,3002.5,...,3051.0,3093.0,3709.0,6098.0,2400.0,2500.0,2900.0,2900.0,3500.0,5900.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4576,WFRC_8465,1.0,1.0924,1.0104,1.0243,1.01735,0.0,,,,...,,40339.0,42981.0,42797.0,,,,40500.0,43000.0,43000.0
4577,WFRC_8466,1.0,1.0946,1.0117,1.0095,1.01060,0.0,9082.2,9055.3,10036.0,...,9169.0,10110.0,12364.0,14260.0,8300.0,8300.0,9200.0,10000.0,12500.0,14500.0
4578,WFRC_8467,1.0,1.0924,1.0104,1.0243,1.01735,0.0,,17522.8,21032.6,...,19254.0,20334.0,22971.0,24324.0,,16000.0,19500.0,20500.0,23000.0,24500.0
4579,WFRC_8471,1.0,1.0924,1.0104,1.0243,1.01735,0.0,,,,...,,13737.0,16638.0,18623.0,,,,13500.0,16500.0,18500.0


In [13]:
dfModVolAdj[dfModVolAdj['SUBAREAID']==4]

YEAR,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,M2028,M2032,M2042,M2050,MF2019,MF2023,MF2028,MF2032,MF2042,MF2050


In [14]:
# join segment data and forecast data
_sdf = pd.DataFrame.merge(dfModVolAdj, sdfSegments, on=('SEGID','SUBAREAID'), how='right')

# add columns for Adjustments
_sdf['MF2019'].fillna(0,inplace=True)
_sdf['MF2023'].fillna(0,inplace=True)
_sdf['MF2028'].fillna(0,inplace=True)
_sdf['MF2032'].fillna(0,inplace=True)
_sdf['MF2042'].fillna(0,inplace=True)
_sdf['MF2050'].fillna(0,inplace=True)
_sdf['ADJ2019'     ] = 0
_sdf['ADJ2023'     ] = 0
_sdf['ADJ2028'     ] = 0
_sdf['ADJ2032'     ] = 0
_sdf['ADJ2042'     ] = 0
_sdf['ADJ2050'     ] = 0
_sdf['ADJHIST'     ] = 0
_sdf['NOTES'       ] = ""
_sdf['NOTES_FURREV'] = ""
_sdf['NOTES_SEG'   ] = ""


# replace with latest file
if dfLatest.empty:
    print("The latest dataframe is empty.")
else:
    print("The latest dataframe is not empty.")
        
    # Specify columns to include from latest download (FLAGS WILL NOT BE INCLUDED WHEN FILTERED PRIOR TO THIS STEP)
    columns_to_include = [col for col in dfLatest.columns if col.startswith('ADJ') or col.startswith('FL_') or col.startswith('OV_') or col == 'SEGID' or col == 'NOTES' or col == 'NOTES_FURREV' or col == 'NOTES_SEG']

    # Filter df_filtered to only have the specified columns
    dfLatest_filtered = dfLatest[columns_to_include]

    # Merge sdfData with df_filtered on SEGID
    _sdfWithLatest = _sdf.merge(dfLatest_filtered, on='SEGID', how='left', suffixes=('_delete', ''))

    # Drop any duplicate columns (those with '_delete' suffix) after the merge 
    for column in _sdfWithLatest.columns:
        if column.endswith('_delete'):
            _sdfWithLatest.drop(column, axis=1, inplace=True)

    _sdfWithLatest['NOTES'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES'] = _sdfWithLatest['NOTES'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES']=='0'), 'NOTES'] = ''
    _sdfWithLatest['NOTES'] = _sdfWithLatest['NOTES'].str.strip()

    _sdfWithLatest['NOTES_FURREV'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES_FURREV'] = _sdfWithLatest['NOTES_FURREV'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES_FURREV']=='0'), 'NOTES_FURREV'] = ''
    _sdfWithLatest['NOTES_FURREV'] = _sdfWithLatest['NOTES_FURREV'].str.strip()

    _sdfWithLatest['NOTES_SEG'].fillna('0', inplace=True)
    _sdfWithLatest['NOTES_SEG'] = _sdfWithLatest['NOTES_SEG'].astype(str)
    _sdfWithLatest.loc[(_sdfWithLatest['NOTES_SEG']=='0'), 'NOTES_SEG'] = ''
    _sdfWithLatest['NOTES_SEG'] = _sdfWithLatest['NOTES_SEG'].str.strip()

# add flag columns
# Loop through the rows of 'dfFlags'
for index, row in dfFlags.iterrows():
    # Get the flag name from the 'flagName' column
    flag_name = row['flagName']

    # Get the flag criteria from the 'flagCriteria' column (if needed)
    flag_criteria = row['flagCriteria']

    # Add a new column to 'dfSegs' with the flag name and set it equal to evaluated criteria, prepare criteria string with replace
    expression = "_sdfWithLatest['" + flag_name + "'] = " + flag_criteria.replace("[", "_sdfWithLatest['").replace("]", "']")
    print(expression)

    # execute expression!
    exec(expression)
    #print(result)  # Output will be 11
    
    if bClearOverrides:
        display('flags overwritten')
        # add override column
        _sdfWithLatest[flag_name.replace('FL_','OV_')] = 0

_sdfWithLatest.fillna(0,inplace=True)

sdfSegmentsWData = _sdfWithLatest
sdfSegmentsWData

The latest dataframe is not empty.
_sdfWithLatest['FL_REV'] = (_sdfWithLatest['NOTES']=='') & (_sdfWithLatest['NOTES_FURREV']=='')
_sdfWithLatest['FL_LTPRV'] = ((_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])<(_sdfWithLatest['M2019']+_sdfWithLatest['aadtAdjFactor'])) | ((_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])<(_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])) | ((_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])<(_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])) | ((_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042'])<(_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])) | ((_sdfWithLatest['MF2050']+_sdfWithLatest['ADJ2050'])<(_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042']))
_sdfWithLatest['FL_ZERO'] = ((_sdfWithLatest['MF2023']+_sdfWithLatest['ADJ2023'])<=0) | ((_sdfWithLatest['MF2028']+_sdfWithLatest['ADJ2028'])<=0) | ((_sdfWithLatest['MF2032']+_sdfWithLatest['ADJ2032'])<=0) | ((_sdfWithLatest['MF2042']+_sdfWithLatest['ADJ2042'])<=0) | ((_sdfWithLatest['M

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,OV_SEG,NOTES_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG
0,0006_141.0,1.0,0.9840,1.0276,1.0316,1.02960,-32.0,1524.0,1655.0,1737.0,...,0.0,,False,False,False,False,False,False,False,False
1,0006_146.9,1.0,0.9840,1.0276,1.0316,1.02960,-36.0,1528.0,1661.0,1747.0,...,0.0,,False,False,False,False,False,False,False,False
2,0006_149.9,1.0,0.9840,1.0276,1.0316,1.02960,857.0,1558.4,1680.1,2049.6,...,0.0,,False,False,False,False,False,False,False,False
3,0006_150.6,1.0,0.9840,1.0276,1.0316,1.02960,664.0,1748.2,1867.2,2241.2,...,0.0,,False,False,False,False,False,False,False,False
4,0006_152.6,1.0,0.9840,1.0276,1.0316,1.02960,-160.0,2535.8,2634.8,3002.5,...,0.0,,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4975,WFRC_8469,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,0.0,,False,False,True,False,False,False,False,False
4976,WFRC_8470,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,0.0,,False,False,True,False,False,False,False,False
4977,WFRC_8471,1.0,1.0924,1.0104,1.0243,1.01735,0.0,0.0,0.0,0.0,...,0.0,,False,False,True,False,False,False,False,False
4978,WFRC_8472,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,0.0,,False,False,True,False,False,False,False,False


In [15]:
sdfSegmentsWData[['SEGID','NOTES_FURREV','NOTES_SEG','FL_FURREV','OV_FURREV']]

Unnamed: 0,SEGID,NOTES_FURREV,NOTES_SEG,FL_FURREV,OV_FURREV
0,0006_141.0,,,False,0.0
1,0006_146.9,,,False,0.0
2,0006_149.9,,,False,0.0
3,0006_150.6,,,False,0.0
4,0006_152.6,,,False,0.0
...,...,...,...,...,...
4975,WFRC_8469,,,False,0.0
4976,WFRC_8470,,,False,0.0
4977,WFRC_8471,,,False,0.0
4978,WFRC_8472,,,False,0.0


In [16]:
# Add CO_NAME and ForecastArea columns to the DataFrame using multi-index mapping
sdfSegmentsWData['CO_NAME'] = sdfSegmentsWData.apply(lambda row: county_mapping.get(row['CO_FIPS'], {}).get('CO_NAME'), axis=1)
sdfSegmentsWData

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,NOTES_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG,CO_NAME
0,0006_141.0,1.0,0.9840,1.0276,1.0316,1.02960,-32.0,1524.0,1655.0,1737.0,...,,False,False,False,False,False,False,False,False,Utah
1,0006_146.9,1.0,0.9840,1.0276,1.0316,1.02960,-36.0,1528.0,1661.0,1747.0,...,,False,False,False,False,False,False,False,False,Utah
2,0006_149.9,1.0,0.9840,1.0276,1.0316,1.02960,857.0,1558.4,1680.1,2049.6,...,,False,False,False,False,False,False,False,False,Utah
3,0006_150.6,1.0,0.9840,1.0276,1.0316,1.02960,664.0,1748.2,1867.2,2241.2,...,,False,False,False,False,False,False,False,False,Utah
4,0006_152.6,1.0,0.9840,1.0276,1.0316,1.02960,-160.0,2535.8,2634.8,3002.5,...,,False,False,False,False,False,False,False,False,Utah
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4975,WFRC_8469,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,,False,False,True,False,False,False,False,False,Salt Lake
4976,WFRC_8470,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,,False,False,True,False,False,False,False,False,Salt Lake
4977,WFRC_8471,1.0,1.0924,1.0104,1.0243,1.01735,0.0,0.0,0.0,0.0,...,,False,False,True,False,False,False,False,False,Salt Lake
4978,WFRC_8472,1.0,0.0000,0.0000,0.0000,0.00000,0.0,0.0,0.0,0.0,...,,False,False,True,False,False,False,False,False,Salt Lake


In [17]:
sdfSegmentsWData[sdfSegmentsWData['SEGID']=='0056_060.6']

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,NOTES_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG,CO_NAME


In [18]:
# export
#if bShapefileExport: #sdfSegmentsWData.spatial.to_featureclass('results/ForecastSegments/ForecastSegments.shp',sanitize_columns=False)
from arcgis.features import GeoAccessor

if bExportFeatureClass:
    # Define the geodatabase path
    gdb_path        = r"results\ForecastsWFRCFinal.gdb"
    gdb_path_backup = r"backup\ForecastsWFRCFinal_backup.gdb"

    # Check if the geodatabase exists, if not, create it
    if not arcpy.Exists(gdb_path):
        arcpy.CreateFileGDB_management(r"results", "ForecastsWFRCFinal.gdb")
        
    # Check if the geodatabase exists, if not, create it
    if not arcpy.Exists(gdb_path_backup):
        arcpy.CreateFileGDB_management(r"backup", "ForecastsWFRCFinal_backup.gdb")

    # Export SDF to the geodatabase as a feature class
    feature_class_name = "ForecastSegments"

    # Generate a filename with the current timestamp in the specified folder
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    feature_class_name_with_timestamp = f"ForecastSegments_{timestamp}"

    # file to replace
    sdfSegmentsWData.spatial.to_featureclass(location=f"{gdb_path_backup}\\{feature_class_name_with_timestamp}", sanitize_columns=False)
    print(f"Feature class {feature_class_name_with_timestamp} created in {gdb_path_backup}")

    # backup file
    sdfSegmentsWData.spatial.to_featureclass(location=f"{gdb_path}\\{feature_class_name}", sanitize_columns=False)

    print(f"Feature class {feature_class_name} created in {gdb_path}")

else:
    print ('not exported')


Feature class ForecastSegments_20240227_175725 created in backup\ForecastsWFRCFinal_backup.gdb
Feature class ForecastSegments created in results\ForecastsWFRCFinal.gdb


In [19]:
sdfSegmentsWData[sdfSegmentsWData['SEGID']=='1438_000.9']

Unnamed: 0,SEGID,SUBAREAID,FAC_WDAVG,FAC_SPR,FAC_FAL,FAC_SPRFAL,aadtAdjFactor,DYVOL2019,DYVOL2023,DYVOL2028,...,NOTES_SEG,FL_REV,FL_LTPRV,FL_ZERO,FL_HIADJ,FL_50LT19,FL_503X19,FL_FURREV,FL_SEG,CO_NAME
1550,1438_000.9,1.0,1.0924,1.0104,1.0243,1.01735,3431.0,5546.1,5767.5,6566.3,...,,False,True,False,False,False,False,False,False,Davis


In [20]:
duplicated_segid = sdfSegmentsWData[sdfSegmentsWData.duplicated('SEGID', keep=False)]['SEGID'].unique()
duplicated_segid

array([], dtype=object)

In [21]:
sdfSegmentsWData[sdfSegmentsWData['F_AREA'].isna()][['SEGID','CO_FIPS','SUBAREAID','F_AREA']]


Unnamed: 0,SEGID,CO_FIPS,SUBAREAID,F_AREA


In [22]:
# export segment JSON
jsonSegment = sdfSegmentsWData[['SEGID','PLANAREA','SUBAREAID','CO_NAME','F_AREA']]

jsonSegment.to_json('_site/data/segments.json', orient='records')

In [23]:
jsonSegment.groupby(['F_AREA','CO_NAME','SUBAREAID'],as_index=False).agg(numSegs=('SEGID','count'))

Unnamed: 0,F_AREA,CO_NAME,SUBAREAID,numSegs
0,MAG,Juab,1.0,1
1,MAG,Utah,1.0,1736
2,WFRC,Box Elder,1.0,121
3,WFRC,Davis,1.0,524
4,WFRC,Salt Lake,1.0,2023
5,WFRC,Weber,1.0,575


In [24]:
# export shapefile as well to share with UDOT

from datetime import datetime

# Generate a filename with the current timestamp in the specified folder
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"results/ForecastSegments_{timestamp}.shp"

sdfSegmentsWData.spatial.to_featureclass(location=filename, sanitize_columns=False)


'e:\\GitHub\\Traffic-Volume-Forecasts\\results\\ForecastSegments_20240227_175732.shp'

In [25]:
dfLatest_filtered[~dfLatest_filtered['NOTES_SEG'].isna()]

Unnamed: 0,SEGID,ADJHIST,ADJ2019,ADJ2023,ADJ2028,ADJ2032,ADJ2042,ADJ2050,OV_REV,OV_LTPRV,OV_ZERO,OV_HIADJ,OV_50LT19,OV_503X19,OV_FURREV,NOTES,NOTES_FURREV,OV_SEG,NOTES_SEG
55,0006_221.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0,Add link to TDM network
437,0021_075.9,0,0,-50,-250,-350,-600,-800,0,0,0,0,0,0,0,,kept it to 300 in the future because the centr...,0,centroid connector for the TAZ shouldn't be lo...
440,0021_077.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0,This segment is not coded in the model link.
454,0023_000.0,0,0,0,0,0,100,100,0,0,0,0,0,0,0,Reviewed\nIt Appears correct,2042 and 2050 may be too conservative,0,0023_000.0
455,0023_001.6,0,0,0,100,0,200,200,0,1,0,0,0,0,0,Reviewed\nTraffic count less than previous for...,,0,0023_001.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8307,MAG_6219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,new road in 2050,,0,Segments in this area need looking at
8362,UDOT_7019,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0,in tdm?
8380,UDOT_7041,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0,why?
8381,UDOT_7042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,0,not in tdm?
