# Input Data, Variables, and Parameters

In [27]:
import numpy as np
import os
import pandas as pd
import arcpy

# Import the required ArcGIS API for Python modules
import arcgis
from arcgis.gis import GIS
gis = arcgis.GIS()
from arcgis.geoanalytics import manage_data

from IPython.display import display, Markdown

strSegments = r"input\master\Segments-WF-v901-2024-02-20.shp"
strGeographies = r"arcgis\SegmentsGeographyUpdate\Geographies\FactorGeographies_20210824.shp"
strMasterNetworkWFRC = r"_wfrc\MasterNet_v901_2024-02-20.csv"

In [28]:
dirWork         = os.getcwd()
dirInput        = os.path.join(dirWork, r'input'         )
dirIntermediate = os.path.join(dirWork, r'intermediate'  )
dirResults      = os.path.join(dirWork, r'results'       )
dirParams       = os.path.join(dirWork, r'params'        )
dirWebApp       = os.path.join(dirResults, r'webapp_data')

#for the geography shapefile, what are all the column names for the various factor geographies
#lGeoFieldNames  = ['WDFACGEO','SSNFACGEO','COFACGEO']
lGeoFieldNames  = ['COFACGEO'] # for update only get combined factors

In [29]:
ProcessGDB = "process.gdb"

#name of new data features
fcTAZAreaType         = os.path.join(dirIntermediate, ProcessGDB + "\TAZAreaType"                 )
fcAreaType            = os.path.join(dirIntermediate, ProcessGDB + "\AreaType"                    )
fcAreaTypeSegment     = os.path.join(dirIntermediate, ProcessGDB + "\AreaTypeSegmentIntersect"    )
fcSegmentsWithAADT    = os.path.join(dirIntermediate, ProcessGDB + "\SegmentsWithAADT"            )
fcGeographies         = os.path.join(dirIntermediate, ProcessGDB + "\Geographies"                 )
#changed to being programmatically defined
#fcSegmentsGeographies = os.path.join(dirIntermediate, ProcessGDB + "\SegmentsGeographiesIntersect")

def deleteIfExists(obj):
    if arcpy.Exists(obj): arcpy.Delete_management(obj)

#if processing geodatabase doesn't exist, create it
print("Checking if " + ProcessGDB + " exists...")
if not arcpy.Exists(os.path.join(dirIntermediate, ProcessGDB)):
    print("Creating " + ProcessGDB + "...")
    arcpy.management.CreateFileGDB(dirIntermediate, ProcessGDB)
else:
    print(ProcessGDB + " exists...")

Checking if process.gdb exists...
process.gdb exists...


In [30]:
#read segment files
sdfSegmentsAll      = pd.DataFrame.spatial.from_featureclass(os.path.join(dirWork, strSegments))

#All SegIDs
dfAllSegIDs = sdfSegmentsAll[['SEGID']]

#read factor geographies
sdfGeographies = pd.DataFrame.spatial.from_featureclass(os.path.join(dirWork, strGeographies))

In [31]:
#CHECK FOR DUPLICATE SEGIDs
_df = sdfSegmentsAll.groupby(['SEGID'], as_index=False).agg(COUNT=('SEGID','count'))
_df[_df['COUNT']>1]

Unnamed: 0,SEGID,COUNT
62,0015_266.5,4
1542,1446_000.0,4
2898,2831_000.0,4
3968,MAG_6044,4
4133,MAG_6215,4
4180,MAG_6263,4
4232,MAG_6315,4
4236,MAG_6319,4


In [32]:
#areatype overrides - filename only here, so that it can be an iterative process later in the notebook
fnATOverride = os.path.join(dirParams, r'area_type_overrides_segments.csv')

#functional type group overrides - filename only here, so that it can be an iterative process later in the notebook
fnFTGroupOverride = os.path.join(dirParams, r'functional_type_group_overrides.csv')

#read parameter tables
#csv: , parse_dates=['DATEFROM','DATETO']
prmVolumeGroups                           = pd.read_csv(os.path.join(dirParams, r'volume_groups.csv'                             )                      )
prmFacGrpVolClassToVolGrp                 = pd.read_csv(os.path.join(dirParams, r'facgroup_volume_class_to_volume_group.csv'     )                      )
prmStationGrpToFacGrpFields               = pd.read_csv(os.path.join(dirParams, r'station_group_to_facgroup.csv'                 )                      )
prmStationGrpToFacGrpFieldsFacGeoOverride = pd.read_csv(os.path.join(dirParams, r'station_group_to_facgroup_facgeo_overrides.csv'),dtype = {'ROUTE':str})
prmFacATGrpToAreaType                     = pd.read_csv(os.path.join(dirParams, r'area_type_group_to_area_type.csv'              )                      )

# filter by groupings we're only going to use in this update: lGeoFieldNames
prmStationGrpToFacGrpFields = prmStationGrpToFacGrpFields[prmStationGrpToFacGrpFields['FACGEOGROUP'].isin(lGeoFieldNames)]
prmStationGrpToFacGrpFieldsFacGeoOverride = prmStationGrpToFacGrpFieldsFacGeoOverride[prmStationGrpToFacGrpFieldsFacGeoOverride['FACGEOGROUP'].isin(lGeoFieldNames)]

In [33]:
# read USTM taz / urbanization / network
sdfTAZ = pd.DataFrame.spatial.from_featureclass(os.path.join(dirInput, r"USTM_v3.0 - 2023-08-17_DRAFT_Data\BY_2019\TAZ.shp"))
dfUrbanization = pd.read_csv(os.path.join(dirInput, r"USTM_v3.0 - 2023-08-17_DRAFT_Data\BY_2019\Urbanization_BY2019.csv"),low_memory=False)
dfUSTMMasterNetwork = pd.read_csv(os.path.join(dirInput, r"USTM_v3.0 - 2023-08-17_DRAFT_Data\USTMv3_MasterNet_Snapped_20230817_Link.csv"),low_memory=False)

# read WFRC network
dfWFRCMasterNetwork = pd.read_csv(os.path.join(dirInput, strMasterNetworkWFRC),low_memory=False)

In [34]:
#read processed data
dfCCSFactors = pd.read_csv(os.path.join(dirIntermediate, r'CCS_Factors_AllGroupings.csv'))

# Prepare Factor Group Lookup Table

In [35]:
prmFacGrpVolClassToVolGrp

Unnamed: 0,FACGRP,FACVOLCLASS,VOLUMEGROUP
0,Southeast,1-LowVolume,SERuralLow
1,Southeast,2-MidVolume,SERuralMid
2,StatewideArterialExpressway,1-LowVolume,UTRuralLow
3,StatewideArterialExpressway,2-MidVolume,UTRuralMid
4,StatewideArterialExpressway,3-HighVolume,UTRuralHigh


In [36]:
prmVolumeGroups

Unnamed: 0,VOLUMEGROUP,VOLUMEFROM,VOLUMETO
0,All,0,1000000
1,SERuralLow,0,1500
2,SERuralMid,1500,1000000
3,UTRuralLow,0,2500
4,UTRuralMid,2500,12500
5,UTRuralHigh,12500,1000000


In [37]:
prmStationGrpToFacGrpFields

Unnamed: 0,FACGEOGROUP,STATIONGROUP,FACGRP,FACATGROUP,FACVOLCLASS
35,COFACGEO,COA,I80NevSR36Freeway,0-AllAreaTypes,0-AllVolumes
36,COFACGEO,COB,I80SR36SR154Freeway,0-AllAreaTypes,0-AllVolumes
37,COFACGEO,COC,I80SR154SR186Freeway,0-AllAreaTypes,0-AllVolumes
38,COFACGEO,COD,I80SR186US40Freeway,0-AllAreaTypes,0-AllVolumes
39,COFACGEO,COY,I80US40WyoFreeway,0-AllAreaTypes,0-AllVolumes
...,...,...,...,...,...
67,COFACGEO,CO5,I15Freeway,2-Transition,0-AllVolumes
68,COFACGEO,CO6,I15Freeway,345-Suburban/Urban/CBD,0-AllVolumes
69,COFACGEO,CO8,I215Freeway,0-AllAreaTypes,0-AllVolumes
70,COFACGEO,CO9,I70Freeway,0-AllAreaTypes,0-AllVolumes


In [38]:
#default
prmStationGrpToFacGrpFields['FTCOMBOGROUP'] = 'ArterialExpressway'
prmStationGrpToFacGrpFields['FACGEO'] = ''

#split up FACGRP value into two fields
prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGRP'].str.contains('ArterialExpressway'),'FTCOMBOGROUP'] = 'ArterialExpressway'
prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGRP'].str.contains('ArterialExpressway'),'FACGEO'] = prmStationGrpToFacGrpFields['FACGRP'].str.replace('ArterialExpressway', '', regex=False)
prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGRP'].str.contains('Freeway'),'FTCOMBOGROUP'] = 'Freeway'
prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGRP'].str.contains('Freeway'),'FACGEO'] = prmStationGrpToFacGrpFields['FACGRP'].str.replace('Freeway', '', regex=False)
prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGRP'].str.contains('Arterial') & -(prmStationGrpToFacGrpFields['FACGRP'].str.contains('Expressway')),'FTCOMBOGROUP'] = 'Arterial'
prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGRP'].str.contains('Arterial') & -(prmStationGrpToFacGrpFields['FACGRP'].str.contains('Expressway')),'FACGEO'] = prmStationGrpToFacGrpFields['FACGRP'].str.replace('Arterial', '', regex=False)
prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGRP'].str.contains('Expressway') & -(prmStationGrpToFacGrpFields['FACGRP'].str.contains('Arterial')),'FTCOMBOGROUP'] = 'Expressway'
prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGRP'].str.contains('Expressway') & -(prmStationGrpToFacGrpFields['FACGRP'].str.contains('Arterial')),'FACGEO'] = prmStationGrpToFacGrpFields['FACGRP'].str.replace('Expressway', '', regex=False)

prmStationGrpToFacGrpFields.loc[prmStationGrpToFacGrpFields['FACGEO']=='','FACGEO'] = prmStationGrpToFacGrpFields['FACGRP']

prmStationGrpToFacGrpFields

Unnamed: 0,FACGEOGROUP,STATIONGROUP,FACGRP,FACATGROUP,FACVOLCLASS,FTCOMBOGROUP,FACGEO
35,COFACGEO,COA,I80NevSR36Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80NevSR36
36,COFACGEO,COB,I80SR36SR154Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80SR36SR154
37,COFACGEO,COC,I80SR154SR186Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80SR154SR186
38,COFACGEO,COD,I80SR186US40Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80SR186US40
39,COFACGEO,COY,I80US40WyoFreeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80US40Wyo
...,...,...,...,...,...,...,...
67,COFACGEO,CO5,I15Freeway,2-Transition,0-AllVolumes,Freeway,I15
68,COFACGEO,CO6,I15Freeway,345-Suburban/Urban/CBD,0-AllVolumes,Freeway,I15
69,COFACGEO,CO8,I215Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I215
70,COFACGEO,CO9,I70Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I70


In [39]:
#split up combined FT Groups

dFTComboToFTGroup = {
     'FTCOMBOGROUP' : ['Arterial','Expressway','Freeway','ArterialExpressway','ArterialExpressway','ExpresswayFreeway','ExpresswayFreeway'],
     'FTGROUP':       ['Arterial','Expressway','Freeway','Arterial'          ,'Expressway'        ,'Expressway'       ,'Freeway'          ]
}
dfFTComboToFTGroup = pd.DataFrame(dFTComboToFTGroup)
display(dfFTComboToFTGroup)

prmStationGrpToFacGrpFields = pd.DataFrame.merge(prmStationGrpToFacGrpFields,dfFTComboToFTGroup,on='FTCOMBOGROUP')
display(prmStationGrpToFacGrpFields)

Unnamed: 0,FTCOMBOGROUP,FTGROUP
0,Arterial,Arterial
1,Expressway,Expressway
2,Freeway,Freeway
3,ArterialExpressway,Arterial
4,ArterialExpressway,Expressway
5,ExpresswayFreeway,Expressway
6,ExpresswayFreeway,Freeway


Unnamed: 0,FACGEOGROUP,STATIONGROUP,FACGRP,FACATGROUP,FACVOLCLASS,FTCOMBOGROUP,FACGEO,FTGROUP
0,COFACGEO,COA,I80NevSR36Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80NevSR36,Freeway
1,COFACGEO,COB,I80SR36SR154Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80SR36SR154,Freeway
2,COFACGEO,COC,I80SR154SR186Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80SR154SR186,Freeway
3,COFACGEO,COD,I80SR186US40Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80SR186US40,Freeway
4,COFACGEO,COY,I80US40WyoFreeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80US40Wyo,Freeway
...,...,...,...,...,...,...,...,...
49,COFACGEO,CO2,BigCottonwood,0-AllAreaTypes,0-AllVolumes,ArterialExpressway,BigCottonwood,Expressway
50,COFACGEO,CO3,CedarBreaks,0-AllAreaTypes,0-AllVolumes,ArterialExpressway,CedarBreaks,Arterial
51,COFACGEO,CO3,CedarBreaks,0-AllAreaTypes,0-AllVolumes,ArterialExpressway,CedarBreaks,Expressway
52,COFACGEO,COV,WashingtonArterial,0-AllAreaTypes,0-AllVolumes,Arterial,Washington,Arterial


In [40]:
prmStationGrpToFacGrpFieldsAT = pd.DataFrame.merge(prmStationGrpToFacGrpFields,prmFacATGrpToAreaType,on='FACATGROUP')
prmStationGrpToFacGrpFieldsAT

Unnamed: 0,FACGEOGROUP,STATIONGROUP,FACGRP,FACATGROUP,FACVOLCLASS,FTCOMBOGROUP,FACGEO,FTGROUP,AREATYPE
0,COFACGEO,COA,I80NevSR36Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80NevSR36,Freeway,1
1,COFACGEO,COA,I80NevSR36Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80NevSR36,Freeway,2
2,COFACGEO,COA,I80NevSR36Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80NevSR36,Freeway,3
3,COFACGEO,COA,I80NevSR36Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80NevSR36,Freeway,4
4,COFACGEO,COA,I80NevSR36Freeway,0-AllAreaTypes,0-AllVolumes,Freeway,I80NevSR36,Freeway,5
...,...,...,...,...,...,...,...,...,...
209,COFACGEO,COT,StatewideArterialExpressway,3-Suburban,0-AllVolumes,ArterialExpressway,Statewide,Expressway,3
210,COFACGEO,COU,StatewideArterialExpressway,45-Urban/CBD,0-AllVolumes,ArterialExpressway,Statewide,Arterial,4
211,COFACGEO,COU,StatewideArterialExpressway,45-Urban/CBD,0-AllVolumes,ArterialExpressway,Statewide,Arterial,5
212,COFACGEO,COU,StatewideArterialExpressway,45-Urban/CBD,0-AllVolumes,ArterialExpressway,Statewide,Expressway,4


In [41]:
dfFacFieldLookup = pd.DataFrame.merge(prmStationGrpToFacGrpFields,prmFacGrpVolClassToVolGrp,on=('FACGRP','FACVOLCLASS'),how='left')
dfFacFieldLookup.loc[(dfFacFieldLookup['FACVOLCLASS']=="0-AllVolumes"), 'VOLUMEGROUP'] = 'All'
dfFacFieldLookup = pd.DataFrame.merge(dfFacFieldLookup,prmVolumeGroups,on='VOLUMEGROUP')
dfFacFieldLookup = pd.DataFrame.merge(dfFacFieldLookup,prmFacATGrpToAreaType,on='FACATGROUP')
dfFacFieldLookup = dfFacFieldLookup[['FACGEO','FTGROUP','AREATYPE','VOLUMEFROM','VOLUMETO','STATIONGROUP','FACGEOGROUP']]
dfFacFieldLookup = dfFacFieldLookup.sort_values(by=['FACGEO','FTGROUP','AREATYPE','VOLUMEFROM','VOLUMETO','STATIONGROUP'])
pd.set_option('display.max_rows', dfFacFieldLookup.shape[0]+1)
display(dfFacFieldLookup)
pd.set_option('display.max_rows', 10)

Unnamed: 0,FACGEO,FTGROUP,AREATYPE,VOLUMEFROM,VOLUMETO,STATIONGROUP,FACGEOGROUP
125,AlpineLoop,Arterial,1,0,1000000,CO0,COFACGEO
126,AlpineLoop,Arterial,2,0,1000000,CO0,COFACGEO
127,AlpineLoop,Arterial,3,0,1000000,CO0,COFACGEO
128,AlpineLoop,Arterial,4,0,1000000,CO0,COFACGEO
129,AlpineLoop,Arterial,5,0,1000000,CO0,COFACGEO
130,AlpineLoop,Expressway,1,0,1000000,CO0,COFACGEO
131,AlpineLoop,Expressway,2,0,1000000,CO0,COFACGEO
132,AlpineLoop,Expressway,3,0,1000000,CO0,COFACGEO
133,AlpineLoop,Expressway,4,0,1000000,CO0,COFACGEO
134,AlpineLoop,Expressway,5,0,1000000,CO0,COFACGEO


In [47]:
dfFacFieldLookup.to_csv(os.path.join(dirResults,'CCSFactors_GeoAtypeVol_Lookup.csv'))