#                 Initialize

In [1]:
# IMPORTANT!!
# This file contains several process that require different libraries. For some reason they are not all compatible in the python environment that I've been running on.
# So, BigQuery library is not compatible with arcpy which isn't compatible with the chart generating libraries.
# So, here are some toggles that can be used when running and then reloading the kernel or restarting with a different kernel

# REIMPORT DATA OR NOT from Google Biq Query -- IF TRUE MUST USE argicpro-py3-billenv
bReimportFromBQ = False

# GENERATE INTERACTIVE CHARTS
bGenerateInteractiveCharts = False

# PREPARE BASE SHAPEFILES OR NOT
bPrepareBaseShapefiles = False

In [2]:
import pandas as pd
import os
import numpy as np
# from IPython.display import display, Markdown

In [3]:
# directories
working_directory       = os.getcwd()
data_folder             = os.path.join(working_directory, "data"         )
sl_folder               = os.path.join(data_folder, "Streetlight-OD-Data")
intermediate_folder     = os.path.join(working_directory, "intermediate" )
results_folder          = os.path.join(working_directory, "results"      )
webapplayers_folder     = os.path.join(results_folder   , r"webapplayers" )
webapplayersshps_folder = os.path.join(results_folder   , r"webapplayers\shapefiles" )
webapplayerszips_folder = os.path.join(results_folder   , r"webapplayers\shapefiles_zips" )

# input
shp_TAZ__               = os.path.join(r"K:\TDM\0 - ModelDev\2 - Sandbox\v9.0Beta\01 - Input Dev\1_TAZ\Update_TAZ - 2021-11-08 - Update Subarea\0 - USTM_v2.1d - 2021-09-22\USTM_TAZ_2021_09_22.shp")
csvStreetLightTAZ       = os.path.join(data_folder, "StreetLight_TAZ_2019_09_22.csv")

# intermediate files
shp_MDIST               = os.path.join(intermediate_folder, r"shapefiles\USTM_TAZ_2021_09_22_MDIST.shp")
shp_LDIST               = os.path.join(intermediate_folder, r"shapefiles\USTM_TAZ_2021_09_22_LDIST.shp")

shp_TAZ___simple        = os.path.join(intermediate_folder, r"shapefiles\USTM_TAZ_2021_09_22_TAZ___simple.shp")
shp_MDIST_simple        = os.path.join(intermediate_folder, r"shapefiles\USTM_TAZ_2021_09_22_MDIST_simple.shp")
shp_LDIST_simple        = os.path.join(intermediate_folder, r"shapefiles\USTM_TAZ_2021_09_22_LDIST_simple.shp")

shp_TAZ___simple_pnt    = os.path.join(intermediate_folder, r"shapefiles\USTM_TAZ_2021_09_22_TAZ___simple_Pnt.shp")
shp_MDIST_simple_pnt    = os.path.join(intermediate_folder, r"shapefiles\USTM_TAZ_2021_09_22_MDIST_simple_Pnt.shp")
shp_LDIST_simple_pnt    = os.path.join(intermediate_folder, r"shapefiles\USTM_TAZ_2021_09_22_LDIST_simple_Pnt.shp")

# ease of use
daytype0 = '0: All Days (Mo-Su)'
daytype1 = '1: Weekday (Tu-Th)'
daytype2 = '2: Weekend Day (Sa-Su)'
dataper1 = '1. All year'
dataper2 = '2. Sep-Nov'
dataper3 = '3. Dec-Feb'
dataper4 = '4. Mar-May'
dataper5 = '5. Jun-Aug'
daypart0 = '0: All Day (12am-12am)'
daypart1 = '1: Early AM (12am-6am)'
daypart2 = '2: Peak AM (6am-9am)'
daypart3 = '3: Mid-Day (9am-3pm)'
daypart4 = '4: Peak PM (3pm-6pm)'
daypart5 = '5: Late PM (6pm-12am)'

dfDayType = pd.DataFrame({'daytype_code':[       0,       1,       2],
                          'day_type'    :[daytype0,daytype1,daytype2]})
display(dfDayType)

dfDataPer = pd.DataFrame({'dataper_code':[       1,       2,       3,       4,       5],
                          'data_period':[dataper1,dataper2,dataper3,dataper4,dataper5]})
display(dfDataPer)

dfDayPart = pd.DataFrame({'daypart_code':[       0,       1,       2,       3,       4,       5],
                          'day_part'    :[daypart0,daypart1,daypart2,daypart3,daypart4,daypart5]})
display(dfDayPart)

# big data field names
fnCounts = 'o_d_traffic_sample_trip_counts'
fnVolume = 'o_d_traffic_calibrated_trip_volume'

# show numbers with commas
pd.options.display.float_format = '{:,.0f}'.format

Unnamed: 0,daytype_code,day_type
0,0,0: All Days (Mo-Su)
1,1,1: Weekday (Tu-Th)
2,2,2: Weekend Day (Sa-Su)


Unnamed: 0,dataper_code,data_period
0,1,1. All year
1,2,2. Sep-Nov
2,3,3. Dec-Feb
3,4,4. Mar-May
4,5,5. Jun-Aug


Unnamed: 0,daypart_code,day_part
0,0,0: All Day (12am-12am)
1,1,1: Early AM (12am-6am)
2,2,2: Peak AM (6am-9am)
3,3,3: Mid-Day (9am-3pm)
4,4,4: Peak PM (3pm-6pm)
5,5,5: Late PM (6pm-12am)


In [None]:
#read in streetlight taz file and create a WF only dataframe
dfStreetLightTAZ        = pd.read_csv(csvStreetLightTAZ)
dfWFRCTAZtoSLTAZ        = dfStreetLightTAZ[dfStreetLightTAZ['SUBAREAID']==1]

In [4]:
# Special Generators (ID from TDM), WFRC TAZ, Type
dSpecGen = [
    ['ENSIGN'       , 1029, 'WFRC College'                     ,'No' ],
    ['WESTMIN'      , 1263, 'WFRC College'                     ,'No' ],
    ['UOFU_MAIN'    , 1051, 'WFRC College'                     ,'No' ],
    ['UOFU_MED'     , 1007, 'WFRC College'                     ,'No' ],
    ['UOFU_SAND'    , 1908, 'WFRC College'                     ,'Yes'],
    ['USU_BRIG'     ,   82, 'WFRC College'                     ,'Yes'],
    ['USU_KAY'      ,  760, 'WFRC College'                     ,'Yes'],
    ['USU_SL'       , 1577, 'WFRC College'                     ,'Yes'],
    ['WSU_MAIN'     ,  437, 'WFRC College'                     ,'Yes'],
    ['WSU_DAVIS'    ,  693, 'WFRC College'                     ,'No' ],
    ['WSU_FARM'     ,  780, 'WFRC College'                     ,'Yes'],
    ['WSU_WEST'     ,  521, 'WFRC College'                     ,'No' ],
    ['SLCC_MAIN'    , 1580, 'WFRC College'                     ,'Yes'],
    ['SLCC_SC'      , 1231, 'WFRC College'                     ,'No' ],
    ['SLCC_JD'      , 1776, 'WFRC College'                     ,'No' ],
    ['SLCC_MEAD'    , 1491, 'WFRC College'                     ,'No' ],
    ['SLCC_ML'      , 1886, 'WFRC College'                     ,'No' ],
    ['SLCC_LB'      , 1085, 'WFRC College'                     ,'Yes'],
    ['SLCC_AIRP'    ,  979, 'WFRC College'                     ,'No' ],
    ['SLCC_WEST'    ,  959, 'WFRC College'                     ,'No' ],
    ['SLCC_HM'      , 2031, 'WFRC College'                     ,'Yes'],
    ['SLCC_WVC'     , 1356, 'WFRC College'                     ,'Yes'],
    ['BYU'          , 2939, 'MAG College'                      ,'No' ],
    ['USU_OREM'     , 2888, 'MAG College'                      ,'Yes'],
    ['UVU_MAIN'     , 2848, 'MAG College'                      ,'No' ],
    ['UVU_GENEVA'   , 2882, 'MAG College'                      ,'No' ],
    ['UVU_THANKP'   , 2606, 'MAG College'                      ,'No' ],
    ['UVU_VINE'     , 2809, 'MAG College'                      ,'No' ],
    ['UVU_PAYSON'   , 3336, 'MAG College'                      ,'No' ],
    ['Lagoon'       ,  781, 'Special Generator - Trip Table'   ,'No' ],
    ['Airport'      ,  965, 'Special Generator - Trip Table'   ,'No' ],
    ['TempleSquare' , 1035, 'Special Generator - No Trip Table','No' ],
    ['SLC_Library'  , 1147, 'Special Generator - No Trip Table','No' ]
]

dfSpecGen = pd.DataFrame(dSpecGen, columns = ['SpecGen','SA_TAZID','Type','RegenFGDB'])

# export json for webapp
dfSpecGen['value'] = dfSpecGen['SpecGen']
dfSpecGen['label'] = dfSpecGen['SpecGen']
dfSpecGen.to_json(os.path.join(results_folder,'specgen.json'),orient='records')
#j = dfSpecGen.set_index('SpecGen').to_json(orient='index')
#print(j)
#f = open(os.path.join(results_folder,'specgen.json'), "w")
#f.write("[" + j + "]")
#f.close()
#dfSpecGenTOJSON.to_json(,orient='index')

# add TAZ to end of name to specify that it is the TAZ itself and not the special generator
# dfSpecGen['SpecGen'] = dfSpecGen['SpecGen'].astype(str) + " TAZ" 

# join to StreetLight TAZ to get associated SL_COTAZIDs for each TAZ
dfSpecGenWithSLTAZ = pd.DataFrame.merge(dfSpecGen,dfWFRCTAZtoSLTAZ[['SA_TAZID','SL_COTAZID']],on='SA_TAZID')

dSpecGenSL_ManualChange = [
    ['WSU_MAIN'  , '570284_1', 'SubtractFromSG'  ], # subtract southern sf residentail neighborhood
    ['WSU_MAIN'  , '570284_2', 'SubtractFromSG'  ], # subtract northern sf residentail neighborhood
    ['UVU_MAIN'  , '490632_1', 'SubtractFromSG'  ], # subtract southern sf residentail neighborhood 
    ['UVU_GENEVA', '490666_1', 'SubtractFromSG'  ], # subtract southern apartments
    ['UVU_GENEVA', '490666_2', 'SubtractFromSG'  ], # subtract industrial land
    ['UVU_GENEVA', '490666_3', 'SubtractFromSG'  ], # subtract northern apartments
    ['UVU_GENEVA', '490666_4', 'SubtractFromSG'  ], # subtract 
    ['Airport'   , '350042_0', 'SubtractFromDest']  # this zone shouldn't have much travel with airport zones. perhaps taxi-ing passengers
]

# generate lsit for use in SQL script
dfSpecGenList = dfSpecGenWithSLTAZ["SL_COTAZID"].tolist()

Unnamed: 0,SpecGen,SA_TAZID,Type,RegenFGDB,value,label,SL_COTAZID
44,BYU,2939,MAG College,No,BYU,BYU,490723_1
45,BYU,2939,MAG College,No,BYU,BYU,490723_2
46,BYU,2939,MAG College,No,BYU,BYU,490723_3
47,BYU,2939,MAG College,No,BYU,BYU,490723_4
48,BYU,2939,MAG College,No,BYU,BYU,490723_5
49,BYU,2939,MAG College,No,BYU,BYU,490723_6
50,BYU,2939,MAG College,No,BYU,BYU,490723_7
51,BYU,2939,MAG College,No,BYU,BYU,490723_8


In [None]:
dSpecGen_Insights = [
    ['ENSIGN'       , 1029, 'WFRC College'                     ,'No' ],
    ['WESTMIN'      , 1263, 'WFRC College'                     ,'No' ],
    ['UOFU_MAIN'    , 1051, 'WFRC College'                     ,'No' ],
    ['UOFU_MED'     , 1007, 'WFRC College'                     ,'No' ],
    ['UOFU_SAND'    , 1908, 'WFRC College'                     ,'Yes'],
    ['USU_BRIG'     ,   82, 'WFRC College'                     ,'Yes'],
    ['USU_KAY'      ,  760, 'WFRC College'                     ,'Yes'],
    ['USU_SL'       , 1577, 'WFRC College'                     ,'Yes'],
    ['WSU_MAIN'     ,  437, 'WFRC College'                     ,'Yes'],
    ['WSU_DAVIS'    ,  693, 'WFRC College'                     ,'No' ],
    ['WSU_FARM'     ,  780, 'WFRC College'                     ,'Yes'],
    ['WSU_WEST'     ,  521, 'WFRC College'                     ,'No' ],
    ['SLCC_MAIN'    , 1580, 'WFRC College'                     ,'Yes'],
    ['SLCC_SC'      , 1231, 'WFRC College'                     ,'No' ],
    ['SLCC_JD'      , 1776, 'WFRC College'                     ,'No' ],
    ['SLCC_MEAD'    , 1491, 'WFRC College'                     ,'No' ],
    ['SLCC_ML'      , 1886, 'WFRC College'                     ,'No' ],
    ['SLCC_LB'      , 1085, 'WFRC College'                     ,'Yes'],
    ['SLCC_AIRP'    ,  979, 'WFRC College'                     ,'No' ],
    ['SLCC_WEST'    ,  959, 'WFRC College'                     ,'No' ],
    ['SLCC_HM'      , 2031, 'WFRC College'                     ,'Yes'],
    ['SLCC_WVC'     , 1356, 'WFRC College'                     ,'Yes'],
    ['BYU'          , 2939, 'MAG College'                      ,'No' ],
    ['USU_OREM'     , 2888, 'MAG College'                      ,'Yes'],
    ['UVU_MAIN'     , 2848, 'MAG College'                      ,'No' ],
    ['UVU_GENEVA'   , 2882, 'MAG College'                      ,'No' ],
    ['UVU_THANKP'   , 2606, 'MAG College'                      ,'No' ],
    ['UVU_VINE'     , 2809, 'MAG College'                      ,'No' ],
    ['UVU_PAYSON'   , 3336, 'MAG College'                      ,'No' ],
    ['Lagoon'       ,  781, 'Special Generator - Trip Table'   ,'No' ],
    ['Airport'      ,  965, 'Special Generator - Trip Table'   ,'No' ],
    ['TempleSquare' , 1035, 'Special Generator - No Trip Table','No' ],
    ['SLC_Library'  , 1147, 'Special Generator - No Trip Table','No' ]
]

#                             Import Data

In [5]:
# create query of all subareaid to subareaid truck flows using join to taz table with subareaid field

# import only if TRUE, MUST BE RUN with bReimportFromBQ=True ATLEAST ONCE TO POPULATE CSVs
# otherwise read in CSVs that have already been created
if bReimportFromBQ:

    # google cloud big query libaries
    from google.cloud import bigquery
    from google.oauth2 import service_account

    key_path = r"C:\Users\bhereth\streetlight-temp-analysis-e2b201d26862.json"

    credentials = service_account.Credentials.from_service_account_file(
        key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
    )

    client = bigquery.Client(credentials=credentials, project=credentials.project_id,)

    dTables = [
        'ut-udot-adap-prod.streetlight_data.udot_personal_all_year_2019',
        'ut-udot-adap-prod.streetlight_data.udot_personal_fall_2019'    ,
        'ut-udot-adap-prod.streetlight_data.udot_personal_spring_2019'  ,
        'ut-udot-adap-prod.streetlight_data.udot_personal_summer_2019'  ,
        'ut-udot-adap-prod.streetlight_data.udot_personal_winter_2019'  ,
    ]

    strSQL_Select = "SELECT origin_zone_name, destination_zone_name, LEFT(day_type, 1) AS daytype_code, LEFT(day_part, 1) AS daypart_code, LEFT(data_period, 1) AS dataper_code, " + fnCounts + ", " + fnVolume

    # construct where clause. can use dfSpecGenList but need to replace [] with ()
    strSQL_Where = "WHERE (origin_zone_name IN " + str(dfSpecGenList).replace('[', '(').replace(']',')') + ") OR (destination_zone_name IN " + str(dfSpecGenList).replace('[', '(').replace(']',')') + ")"

    # initialize dataframe for appending
    dfSpecGenODData = pd.DataFrame()

    for table in dTables:
        strSQL = strSQL_Select + " FROM " + table + " " +strSQL_Where;
        # print(strSQL);

        # query Google Big Query
        dfQuery = client.query(strSQL).to_dataframe()
        dfQuery['daytype_code'] = dfQuery['daytype_code'].astype(np.int64)
        dfQuery['daypart_code'] = dfQuery['daypart_code'].astype(np.int64)
        dfQuery['dataper_code'] = dfQuery['dataper_code'].astype(np.int64)
        dfSpecGenODData = dfSpecGenODData.append(dfQuery)

    # write CSV for future use when import is turned off
    dfSpecGenODData.to_csv(os.path.join(intermediate_folder,'specgen_od_data.csv'))
    display("CSV Exported")

else:
    # read CSVs
    dfSpecGenODData = pd.read_csv(os.path.join(intermediate_folder,'specgen_od_data.csv'))
    display("CSV Read In")

display(dfSpecGenODData)

'CSV Read In'

Unnamed: 0.1,Unnamed: 0,origin_zone_name,destination_zone_name,daytype_code,daypart_code,dataper_code,o_d_traffic_sample_trip_counts,o_d_traffic_calibrated_trip_volume
0,0,490723_5,490721_0,2,3,1,13,4
1,1,490757_0,490666_1,2,0,1,13,4
2,2,351145_0,110200_0,2,3,1,13,4
3,3,110086_0,110199_1,2,3,1,13,4
4,4,570077_0,350060_1,2,0,1,13,4
...,...,...,...,...,...,...,...,...
5597128,881960,350326_1,350775_0,0,0,3,6,2
5597129,881961,490723_5,490935_0,0,4,3,6,2
5597130,881962,490723_1,490606_0,0,3,3,6,2
5597131,881963,350387_0,350060_1,0,3,3,6,2


#            Process Data

In [6]:
# Merge to Get Origin SpecGen
dfSLData = pd.DataFrame.merge(dfSpecGenWithSLTAZ,dfSpecGenODData,left_on='SL_COTAZID',right_on='origin_zone_name',how='right')
dfSLData = dfSLData.fillna("")
dfSLData = dfSLData.rename(columns=({'SpecGen':'SpecGenO'}))
dfSLData = dfSLData.drop(columns=(['SA_TAZID','Type','SL_COTAZID']))

# Merge to Get Destination SpecGen
dfSLData = pd.DataFrame.merge(dfSpecGenWithSLTAZ,dfSLData,left_on='SL_COTAZID',right_on='destination_zone_name',how='right')
dfSLData = dfSLData.fillna("")
dfSLData = dfSLData.rename(columns=({'SpecGen':'SpecGenD'}))
dfSLData = dfSLData.drop(columns=(['SA_TAZID','Type','SL_COTAZID']))

dfSLData['SpecGen']=""
dfSLData['Intrazonal']=0
dfSLData.loc[(dfSLData['SpecGenO']==dfSLData['SpecGenD']),'Intrazonal']=1

dfSLData.loc[(dfSLData['SpecGenO']!=""),'SpecGen']=dfSLData['SpecGenO']
dfSLData.loc[(dfSLData['SpecGenD']!=""),'SpecGen']=dfSLData['SpecGenD']

dfSLData


Unnamed: 0.1,SpecGenD,RegenFGDB_x,value_x,label_x,SpecGenO,RegenFGDB_y,value_y,label_y,Unnamed: 0,origin_zone_name,destination_zone_name,daytype_code,daypart_code,dataper_code,o_d_traffic_sample_trip_counts,o_d_traffic_calibrated_trip_volume,SpecGen,Intrazonal
0,,,,,BYU,No,BYU,BYU,0,490723_5,490721_0,2,3,1,13,4,BYU,0
1,UVU_GENEVA,No,UVU_GENEVA,UVU_GENEVA,,,,,1,490757_0,490666_1,2,0,1,13,4,UVU_GENEVA,0
2,Lagoon,No,Lagoon,Lagoon,,,,,2,351145_0,110200_0,2,3,1,13,4,Lagoon,0
3,WSU_FARM,Yes,WSU_FARM,WSU_FARM,,,,,3,110086_0,110199_1,2,3,1,13,4,WSU_FARM,0
4,Airport,No,Airport,Airport,,,,,4,570077_0,350060_1,2,0,1,13,4,Airport,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5597128,,,,,SLCC_SC,No,SLCC_SC,SLCC_SC,881960,350326_1,350775_0,0,0,3,6,2,SLCC_SC,0
5597129,,,,,BYU,No,BYU,BYU,881961,490723_5,490935_0,0,4,3,6,2,BYU,0
5597130,,,,,BYU,No,BYU,BYU,881962,490723_1,490606_0,0,3,3,6,2,BYU,0
5597131,Airport,No,Airport,Airport,,,,,881963,350387_0,350060_1,0,3,3,6,2,Airport,0


In [7]:
# show some stats
display(dfSLData.groupby(['SpecGen','Intrazonal']).agg(Counts=(fnCounts,'sum'),Volume=(fnVolume,'sum')))

# df with only intrazonals
dfSLData_Intrazonals = dfSLData[dfSLData['Intrazonal']==1]

# df with no intrazonals
dfSLData_noIntrazonals = dfSLData[dfSLData['Intrazonal']==0]

display(dfSLData_noIntrazonals)

Unnamed: 0_level_0,Unnamed: 1_level_0,Counts,Volume
SpecGen,Intrazonal,Unnamed: 2_level_1,Unnamed: 3_level_1
Airport,0,5715464,1949991
Airport,1,1012952,344616
BYU,0,4793520,1571103
BYU,1,690352,218295
ENSIGN,0,321684,102820
...,...,...,...
WSU_FARM,1,18568,6231
WSU_MAIN,0,1452596,469914
WSU_MAIN,1,61872,19567
WSU_WEST,0,588784,202372


Unnamed: 0.1,SpecGenD,RegenFGDB_x,value_x,label_x,SpecGenO,RegenFGDB_y,value_y,label_y,Unnamed: 0,origin_zone_name,destination_zone_name,daytype_code,daypart_code,dataper_code,o_d_traffic_sample_trip_counts,o_d_traffic_calibrated_trip_volume,SpecGen,Intrazonal
0,,,,,BYU,No,BYU,BYU,0,490723_5,490721_0,2,3,1,13,4,BYU,0
1,UVU_GENEVA,No,UVU_GENEVA,UVU_GENEVA,,,,,1,490757_0,490666_1,2,0,1,13,4,UVU_GENEVA,0
2,Lagoon,No,Lagoon,Lagoon,,,,,2,351145_0,110200_0,2,3,1,13,4,Lagoon,0
3,WSU_FARM,Yes,WSU_FARM,WSU_FARM,,,,,3,110086_0,110199_1,2,3,1,13,4,WSU_FARM,0
4,Airport,No,Airport,Airport,,,,,4,570077_0,350060_1,2,0,1,13,4,Airport,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5597128,,,,,SLCC_SC,No,SLCC_SC,SLCC_SC,881960,350326_1,350775_0,0,0,3,6,2,SLCC_SC,0
5597129,,,,,BYU,No,BYU,BYU,881961,490723_5,490935_0,0,4,3,6,2,BYU,0
5597130,,,,,BYU,No,BYU,BYU,881962,490723_1,490606_0,0,3,3,6,2,BYU,0
5597131,Airport,No,Airport,Airport,,,,,881963,350387_0,350060_1,0,3,3,6,2,Airport,0


In [8]:
#checking why BYU has intrazonals in webapp. this should be empty
dfSLData_noIntrazonals[(dfSLData_noIntrazonals['SpecGenO']=='BYU') & dfSLData_noIntrazonals['SpecGenD']=='BYU']

Unnamed: 0.1,SpecGenD,RegenFGDB_x,value_x,label_x,SpecGenO,RegenFGDB_y,value_y,label_y,Unnamed: 0,origin_zone_name,destination_zone_name,daytype_code,daypart_code,dataper_code,o_d_traffic_sample_trip_counts,o_d_traffic_calibrated_trip_volume,SpecGen,Intrazonal


## Create Percent Distribution by TAZ, MDIST, LDIST

In [9]:
dfSLData_noIntrazonals_GroupTotals = dfSLData_noIntrazonals.groupby(['SpecGen','daytype_code','daypart_code','dataper_code'],as_index=False).agg(TotalVolume=(fnVolume,'sum'))
dfSLData_noIntrazonals_withGroupTotals = pd.DataFrame.merge(dfSLData_noIntrazonals,dfSLData_noIntrazonals_GroupTotals,on=('SpecGen','daytype_code','daypart_code','dataper_code'))
dfSLData_noIntrazonals_withGroupTotals['PercentTotal'] = dfSLData_noIntrazonals_withGroupTotals[fnVolume] / dfSLData_noIntrazonals_withGroupTotals['TotalVolume']

# show numbers with commas
pd.options.display.float_format = '{:,.8f}'.format

display(dfSLData_noIntrazonals_withGroupTotals)

# show numbers with commas
pd.options.display.float_format = '{:,.0f}'.format

Unnamed: 0.1,SpecGenD,RegenFGDB_x,value_x,label_x,SpecGenO,RegenFGDB_y,value_y,label_y,Unnamed: 0,origin_zone_name,destination_zone_name,daytype_code,daypart_code,dataper_code,o_d_traffic_sample_trip_counts,o_d_traffic_calibrated_trip_volume,SpecGen,Intrazonal,TotalVolume,PercentTotal
0,,,,,BYU,No,BYU,BYU,0,490723_5,490721_0,2,3,1,13,3.62500000,BYU,0,15154.73076923,0.00023920
1,BYU,No,BYU,BYU,,,,,531,490781_0,490723_7,2,3,1,9,2.50961538,BYU,0,15154.73076923,0.00016560
2,BYU,No,BYU,BYU,,,,,629,490813_0,490723_1,2,3,1,6,1.67307692,BYU,0,15154.73076923,0.00011040
3,BYU,No,BYU,BYU,,,,,1347,490777_2,490723_6,2,3,1,45,12.54807692,BYU,0,15154.73076923,0.00082800
4,,,,,BYU,No,BYU,BYU,1382,490723_3,490721_0,2,3,1,32,8.92307692,BYU,0,15154.73076923,0.00058880
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5577739,UVU_PAYSON,No,UVU_PAYSON,UVU_PAYSON,,,,,670869,490933_0,491120_0,2,1,3,1,1.16000000,UVU_PAYSON,0,20.88000000,0.05555556
5577740,,,,,UVU_PAYSON,No,UVU_PAYSON,UVU_PAYSON,670879,491120_0,491230_0,2,1,3,1,1.16000000,UVU_PAYSON,0,20.88000000,0.05555556
5577741,UVU_PAYSON,No,UVU_PAYSON,UVU_PAYSON,,,,,765553,491082_0,491120_0,2,1,3,2,2.32000000,UVU_PAYSON,0,20.88000000,0.11111111
5577742,,,,,UVU_PAYSON,No,UVU_PAYSON,UVU_PAYSON,766170,491120_0,491145_0,2,1,3,1,1.16000000,UVU_PAYSON,0,20.88000000,0.05555556


In [10]:
dfSLDataAggCodesOnly = dfSLData_noIntrazonals.groupby(['SpecGen','daytype_code','daypart_code','dataper_code'], as_index=False).agg(Volume=(fnVolume,'sum'),Counts=(fnCounts,'sum'))

display(dfSLDataAggCodesOnly)

Unnamed: 0,SpecGen,daytype_code,daypart_code,dataper_code,Volume,Counts
0,Airport,0,0,1,66956,842716
1,Airport,0,0,2,61061,191605
2,Airport,0,0,3,65854,204376
3,Airport,0,0,4,65950,209222
4,Airport,0,0,5,74868,237513
...,...,...,...,...,...,...
2965,WSU_WEST,2,5,1,1491,5346
2966,WSU_WEST,2,5,2,1482,1329
2967,WSU_WEST,2,5,3,1289,1111
2968,WSU_WEST,2,5,4,1566,1404


In [11]:
dfSLDataAggDT     = pd.DataFrame.merge(dfSLDataAggCodesOnly    ,dfDayType,on='daytype_code')
dfSLDataAggDTDP   = pd.DataFrame.merge(dfSLDataAggDT           ,dfDayPart,on='daypart_code')
dfSLDataAggDTDPDP = pd.DataFrame.merge(dfSLDataAggDTDP         ,dfDataPer,on='dataper_code')
dfSLDataAgg = dfSLDataAggDTDPDP
dfSLDataAgg

Unnamed: 0,SpecGen,daytype_code,daypart_code,dataper_code,Volume,Counts,day_type,day_part,data_period
0,Airport,0,0,1,66956,842716,0: All Days (Mo-Su),0: All Day (12am-12am),1. All year
1,BYU,0,0,1,56597,712340,0: All Days (Mo-Su),0: All Day (12am-12am),1. All year
2,ENSIGN,0,0,1,3778,47546,0: All Days (Mo-Su),0: All Day (12am-12am),1. All year
3,Lagoon,0,0,1,9743,122630,0: All Days (Mo-Su),0: All Day (12am-12am),1. All year
4,SLCC_AIRP,0,0,1,11649,146619,0: All Days (Mo-Su),0: All Day (12am-12am),1. All year
...,...,...,...,...,...,...,...,...,...
2965,WESTMIN,2,5,5,420,391,2: Weekend Day (Sa-Su),5: Late PM (6pm-12am),5. Jun-Aug
2966,WSU_DAVIS,2,5,5,2326,2166,2: Weekend Day (Sa-Su),5: Late PM (6pm-12am),5. Jun-Aug
2967,WSU_FARM,2,5,5,9603,8941,2: Weekend Day (Sa-Su),5: Late PM (6pm-12am),5. Jun-Aug
2968,WSU_MAIN,2,5,5,2252,2097,2: Weekend Day (Sa-Su),5: Late PM (6pm-12am),5. Jun-Aug


In [12]:
dfSLDataAgg['data_period'].unique()

array(['1. All year', '2. Sep-Nov', '3. Dec-Feb', '4. Mar-May',
       '5. Jun-Aug'], dtype=object)

In [13]:
dfSLDailyTotals = dfSLDataAgg[(dfSLDataAgg['day_part']==daypart0)].copy()
dfSLTimeOfDayVolumes = dfSLDataAgg[(dfSLDataAgg['day_part']!=daypart0)].copy()

display(dfSLDailyTotals[dfSLDailyTotals['SpecGen']=='Airport'])
display(dfSLTimeOfDayVolumes[dfSLTimeOfDayVolumes['SpecGen']=='Airport'])

Unnamed: 0,SpecGen,daytype_code,daypart_code,dataper_code,Volume,Counts,day_type,day_part,data_period
0,Airport,0,0,1,66956,842716,0: All Days (Mo-Su),0: All Day (12am-12am),1. All year
33,Airport,1,0,1,69332,375349,1: Weekday (Tu-Th),0: All Day (12am-12am),1. All year
66,Airport,2,0,1,58781,210801,2: Weekend Day (Sa-Su),0: All Day (12am-12am),1. All year
594,Airport,0,0,2,61061,191605,0: All Days (Mo-Su),0: All Day (12am-12am),2. Sep-Nov
627,Airport,1,0,2,63801,85802,1: Weekday (Tu-Th),0: All Day (12am-12am),2. Sep-Nov
660,Airport,2,0,2,52917,47443,2: Weekend Day (Sa-Su),0: All Day (12am-12am),2. Sep-Nov
1188,Airport,0,0,3,65854,204376,0: All Days (Mo-Su),0: All Day (12am-12am),3. Dec-Feb
1221,Airport,1,0,3,66730,92041,1: Weekday (Tu-Th),0: All Day (12am-12am),3. Dec-Feb
1254,Airport,2,0,3,58251,50216,2: Weekend Day (Sa-Su),0: All Day (12am-12am),3. Dec-Feb
1782,Airport,0,0,4,65950,209222,0: All Days (Mo-Su),0: All Day (12am-12am),4. Mar-May


Unnamed: 0,SpecGen,daytype_code,daypart_code,dataper_code,Volume,Counts,day_type,day_part,data_period
99,Airport,0,1,1,6698,84307,0: All Days (Mo-Su),1: Early AM (12am-6am),1. All year
132,Airport,1,1,1,6746,36524,1: Weekday (Tu-Th),1: Early AM (12am-6am),1. All year
165,Airport,2,1,1,5914,21208,2: Weekend Day (Sa-Su),1: Early AM (12am-6am),1. All year
198,Airport,0,2,1,9746,122661,0: All Days (Mo-Su),2: Peak AM (6am-9am),1. All year
231,Airport,1,2,1,10097,54665,1: Weekday (Tu-Th),2: Peak AM (6am-9am),1. All year
...,...,...,...,...,...,...,...,...,...
2805,Airport,1,4,5,12596,16939,1: Weekday (Tu-Th),4: Peak PM (3pm-6pm),5. Jun-Aug
2838,Airport,2,4,5,9817,9140,2: Weekend Day (Sa-Su),4: Peak PM (3pm-6pm),5. Jun-Aug
2871,Airport,0,5,5,18322,58124,0: All Days (Mo-Su),5: Late PM (6pm-12am),5. Jun-Aug
2904,Airport,1,5,5,18719,25174,1: Weekday (Tu-Th),5: Late PM (6pm-12am),5. Jun-Aug


# Export Data

In [14]:
dfSpecGen

Unnamed: 0,SpecGen,SA_TAZID,Type,RegenFGDB,value,label
0,ENSIGN,1029,WFRC College,No,ENSIGN,ENSIGN
1,WESTMIN,1263,WFRC College,No,WESTMIN,WESTMIN
2,UOFU_MAIN,1051,WFRC College,No,UOFU_MAIN,UOFU_MAIN
3,UOFU_MED,1007,WFRC College,No,UOFU_MED,UOFU_MED
4,UOFU_SAND,1908,WFRC College,Yes,UOFU_SAND,UOFU_SAND
5,USU_BRIG,82,WFRC College,Yes,USU_BRIG,USU_BRIG
6,USU_KAY,760,WFRC College,Yes,USU_KAY,USU_KAY
7,USU_SL,1577,WFRC College,Yes,USU_SL,USU_SL
8,WSU_MAIN,437,WFRC College,Yes,WSU_MAIN,WSU_MAIN
9,WSU_DAVIS,693,WFRC College,No,WSU_DAVIS,WSU_DAVIS


In [15]:
# create data frames and jsons for each grouping

dfDayType['value'] = dfDayType['daytype_code']
dfDayType['label'] = dfDayType['day_type'].str[3:]
# display(dfDayType)

dfDayPart['value'] = dfDayPart['daypart_code']
dfDayPart['label'] = dfDayPart['day_part'].str[3:]
# display(dfDayPart)

dfDataPer['value'] = dfDataPer['dataper_code']
dfDataPer['label'] = dfDataPer['data_period'].str[3:]
# display(dfDataPer)

dfDayType.to_json(os.path.join(results_folder,'codes_daytype.json'),orient='records')
dfDayPart.to_json(os.path.join(results_folder,'codes_daypart.json'),orient='records')
dfDataPer.to_json(os.path.join(results_folder,'codes_dataper.json'),orient='records')

In [16]:
# export csv and json data

dfSLDailyTotals.to_csv(os.path.join(results_folder,"SpecGenTAZ_SLDailyTotals.csv"))
dfSLTimeOfDayVolumes.to_csv(os.path.join(results_folder,"SpecGenTAZ_SLTimeOfDayVolumes.csv"))

dfSLDailyTotals.to_json(os.path.join(results_folder,"SpecGenTAZ_SLDailyTotals.json"),orient='records')
dfSLTimeOfDayVolumes.to_json(os.path.join(results_folder,"SpecGenTAZ_SLTimeOfDayVolumes.json"),orient='records')

# Summary Tables and Charts by Special Generator

In [17]:
def getSeasonChartData(sg):
    df = dfSLDailyTotals[(dfSLDailyTotals['SpecGen']==sg)].copy()
    df['daytype_index'] = df['day_type'].str[0]
    df['dataper_index'] = df['data_period'].str[0]
    df = df[['day_type','data_period','Volume','dataper_index']]
    df = pd.pivot_table(df, values='Volume', index=['dataper_index'], columns=['day_type'], aggfunc=np.sum)
    df = df.reset_index()
    return df

def getTimeOfDayChartData(sg,dataper):
    df = dfSLTimeOfDayVolumes[(dfSLTimeOfDayVolumes['SpecGen']==sg) & (dfSLTimeOfDayVolumes['data_period']==dataper)].copy()
    df['daytype_index'] = df['day_type'].str[0]
    df['daypart_index'] = df['day_part'].str[0]
    df = df[['day_type','Volume','daypart_index']]
    df = pd.pivot_table(df, values='Volume', index=['daypart_index'], columns=['day_type'], aggfunc=np.sum)
    df = df.reset_index()
    return df

# initialize
dfSsnData = getSeasonChartData("Airport")
display(dfSsnData)

dfTodData = getTimeOfDayChartData("Airport","1. All year")
display(dfTodData)

day_type,dataper_index,0: All Days (Mo-Su),1: Weekday (Tu-Th),2: Weekend Day (Sa-Su)
0,1,66956,69332,58781
1,2,61061,63801,52917
2,3,65854,66730,58251
3,4,65950,68949,56904
4,5,74868,77915,66727


day_type,daypart_index,0: All Days (Mo-Su),1: Weekday (Tu-Th),2: Weekend Day (Sa-Su)
0,1,6698,6746,5914
1,2,9746,10097,8272
2,3,24055,25065,20909
3,4,10459,11106,8892
4,5,15997,16317,14795


In [18]:
if bGenerateInteractiveCharts: 
    import ipywidgets as widgets
    import bqplot as bq

    from IPython.display import display
    import math
    from bqplot import pyplot as plt

    # Special generators widget
    ddSpecGen = widgets.Dropdown(
        options=dfSLDataAgg['SpecGen'].unique(),
        # value='2',
        description='Special Generator:',
        disabled=False,
    )

    # data period widget
    ddDataPeriod = widgets.Dropdown(
        options=dfSLDataAgg['data_period'].unique(),
        # value='2',
        description='Season:',
        disabled=False,
    )



    button = widgets.Button(
        description='click me to raise an exception',
        layout={'width': '300px'}
    )

    # initialize

    fig_ssn = plt.figure(title=ddSpecGen.value + ' Seasonality',
                        layout=widgets.Layout(width='700px',height='400px'),
                        fig_margin={'top':50, 'bottom':50, 'left':150, 'right':150},
                        legend_location="top-left")

    bar_chart_ssn  = plt.bar(x = dfSsnData['dataper_index'].tolist(), y= [dfSsnData[daytype0].tolist(),dfSsnData[daytype1].tolist(),dfSsnData[daytype2].tolist()],
                            labels = dfSLDataAgg['day_type'].unique().tolist(),
                            display_legend=True)

    fig_ssn.axes[0].label = "All year - - - Sep-Nov - - - Dec-Feb - - - Mar-May - - - Jun-Aug"
    fig_ssn.axes[1].label = "Average Daily Trip Ends"


    fig_tod = plt.figure(title=ddSpecGen.value + ' Time of Day Distribution - ' + ddDataPeriod.value,
                        layout=widgets.Layout(width='700px',height='400px'),
                        fig_margin={'top':50, 'bottom':50, 'left':150, 'right':150},
                        legend_location="top-left")

    bar_chart_tod  = plt.bar(x = dfTodData['daypart_index'].tolist(), y= [dfTodData[daytype0].tolist(),dfTodData[daytype1].tolist(),dfTodData[daytype2].tolist()],
                            labels = dfSLDataAgg['day_type'].unique().tolist(),
                            display_legend=True)

    fig_tod.axes[0].label = "Early AM ------- AM --------- Midday --------- PM ------- Late EV"
    fig_tod.axes[1].label = "Average Period Trip Ends"

    fig_ssn.axes[0].tick_style  =  dict({'font-family': 'Verdana','font-size': '15px','font-weight': 'normal'})
    fig_tod.axes[0].tick_style  =  dict({'font-family': 'Verdana','font-size': '15px','font-weight': 'normal'})

    fig_ssn.axes[1].tick_style  =  dict({'font-family': 'Verdana','font-size': '9px','font-weight': 'normal'})
    fig_tod.axes[1].tick_style  =  dict({'font-family': 'Verdana','font-size': '9px','font-weight': 'normal'})

    fig_tod.axes[0].tick_values = ['A','B','C','D','E']

    bar_chart_ssn.type = "grouped"
    bar_chart_ssn.colors = ["blue","orange","green"]

    bar_chart_tod.type = "grouped"
    bar_chart_tod.colors = ["blue","orange","green"]

    ssn = widgets.VBox(
        children=(ddSpecGen,fig_ssn)
    )
    tod = widgets.VBox(
        children=(ddDataPeriod,fig_tod)
    )

    app = widgets.HBox(
        children=(ssn,tod)
    )



    # debug_view.capture(clear_output=True)
    def refreshApp(event):
        dfTodData = getTimeOfDayChartData(ddSpecGen.value,ddDataPeriod.value)
        dfSsnData = getSeasonChartData(ddSpecGen.value)

        y_max_ssn = math.ceil( dfSLDailyTotals[(dfSLDailyTotals['SpecGen']==ddSpecGen.value)].select_dtypes(include=['float']).max().max()/1000) * 1000
        y_max_tod = math.ceil( dfSLTimeOfDayVolumes[(dfSLTimeOfDayVolumes['SpecGen']==ddSpecGen.value)].select_dtypes(include=['float']).max().max()/1000) * 1000

        bar_chart_ssn.y = [dfSsnData[daytype0].tolist(),dfSsnData[daytype1].tolist(),dfSsnData[daytype2].tolist()]
        bar_chart_tod.y = [dfTodData[daytype0].tolist(),dfTodData[daytype1].tolist(),dfTodData[daytype2].tolist()]

        #plt.ylim(0, y_max_tod)
        fig_ssn.axes[1].scale.max = y_max_ssn + 15000 #add a bit to account for legend space
        fig_tod.axes[1].scale.max = y_max_tod + 10000 #add a bit to account for legend space

        
        fig_ssn.title = ddSpecGen.value + ' Seasonality'
        fig_tod.title = ddSpecGen.value + ' Time of Day Distribution - ' + ddDataPeriod.value[2:]
        #bar_chart_tod.colors = ["orange","blue","green"]
        app


    ddSpecGen.observe(refreshApp)
    ddDataPeriod.observe(refreshApp)


    display(app)

#### CURRENTLY IN TDM: Airport Special Generator Control Total 2019: 32,700 / Lagoon Special Generator Control Total 2019: 7,434

# Generate Spatial Layers for WebApp

In [19]:
import arcpy
#      import arcgis libraries
from arcgis.gis import *
gis = GIS()

In [20]:
# import into spatially-enabled DataFrame
sdfTAZ__ = pd.DataFrame.spatial.from_featureclass(shp_TAZ__)
# sdfTAZ__

In [21]:
sdfTAZ__.columns

Index(['FID', 'TAZID', 'SA_TAZID', 'CO_IDX', 'CO_TAZID', 'SUBAREAID', 'ACRES',
       'DEVACRES', 'DEVPBLEPCT', 'X', 'Y', 'ADJ_XY', 'CO_FIPS', 'CO_NAME',
       'CITY_NAME', 'DISTSUPER', 'DSUP_NAME', 'DISTLRG', 'DLRG_NAME',
       'DISTMED', 'DMED_NAME', 'DISTSML', 'DSML_NAME', 'SORT', 'TAZID_V21B',
       'SATAZ_V21B', 'COTAZ_V21B', 'SUBID_V21B', 'TAZID_V21A', 'SATAZ_V21A',
       'COTAZ_V21A', 'SUBID_V21A', 'SHAPE'],
      dtype='object')

In [22]:
dfStreetLightTAZ.columns

Index(['OID_', 'TAZID', 'SA_TAZID', 'CO_IDX', 'CO_TAZID', 'SL_SPLIT',
       'SL_TAZID', 'SL_COTAZID', 'SUBAREAID', 'ACRES', 'DEVACRES',
       'DEVPBLEPCT', 'X', 'Y', 'ADJ_XY', 'CO_FIPS', 'CO_NAME', 'CITY_NAME',
       'DISTSUPER', 'DSUP_NAME', 'DISTLRG', 'DLRG_NAME', 'DISTMED',
       'DMED_NAME', 'DISTSML', 'DSML_NAME', 'SORT', 'TAZID_V21B', 'SATAZ_V21B',
       'COTAZ_V21B', 'SUBID_V21B', 'TAZID_V21A', 'SATAZ_V21A', 'COTAZ_V21A',
       'SUBID_V21A'],
      dtype='object')

## Prepare Data

In [23]:
dfTazDist = dfStreetLightTAZ[['SL_COTAZID','CO_TAZID','DISTMED','DISTLRG']]
dfTazDist

Unnamed: 0,SL_COTAZID,CO_TAZID,DISTMED,DISTLRG
0,1003_0,1003,1,1
1,1005_0,1005,1,1
2,1006_0,1006,1,1
3,1015_0,1015,1,1
4,1010_0,1010,1,1
...,...,...,...,...
10223,21074_0,21074,5,2
10224,21359_2,21359,6,2
10225,21359_3,21359,6,2
10226,21298_0,21298,7,2


In [24]:
# check that all totals = 1
dfCheck = dfSLData_noIntrazonals_withGroupTotals.groupby(['SpecGen','daytype_code','daypart_code','dataper_code'],as_index=False).agg(SumPercentTotal=('PercentTotal','sum'))
display(dfCheck[(dfCheck['SumPercentTotal']>1.0001) | dfCheck['SumPercentTotal']<0.9999])

# show total trips
dfSLData_noIntrazonals_withGroupTotals['TotalVolume'].sum()

Unnamed: 0,SpecGen,daytype_code,daypart_code,dataper_code,SumPercentTotal


80368374062.98984

In [25]:
#checking why BYU has intrazonals in webapp. this should be empty
df = dfSLData_noIntrazonals_withGroupTotals[(dfSLData_noIntrazonals_withGroupTotals['SpecGen']=='BYU') & (dfSLData_noIntrazonals_withGroupTotals['daytype_code']==0) & (dfSLData_noIntrazonals_withGroupTotals['daypart_code']==0)  & (dfSLData_noIntrazonals_withGroupTotals['dataper_code']==1)]
print(df[[fnVolume]].sum().sum())
print(df[['PercentTotal']].sum().sum())

56596.87671232876
1.0000000000001714


In [26]:
# TAZ, MDIST, LDIST levels

dfSLDataMap = dfSLData_noIntrazonals_withGroupTotals.copy()
dfSLDataMap['TPRCode'] = dfSLDataMap['daytype_code'].astype(str) + dfSLDataMap['daypart_code'].astype(str)  +dfSLDataMap['dataper_code'].astype(str) 

dfSLDataMap['NonSG_SLTAZ'] = 0
dfSLDataMap.loc[(dfSLDataMap['SpecGenD']==dfSLDataMap['SpecGen']),'NonSG_SLTAZ'] = dfSLDataMap['origin_zone_name'     ]
dfSLDataMap.loc[(dfSLDataMap['SpecGenO']==dfSLDataMap['SpecGen']),'NonSG_SLTAZ'] = dfSLDataMap['destination_zone_name']

dfSLDataMap_NonSGSLTaz = dfSLDataMap[['SpecGen','TPRCode','NonSG_SLTAZ',fnVolume,'PercentTotal']]

display(dfSLDataMap_NonSGSLTaz)

Unnamed: 0,SpecGen,TPRCode,NonSG_SLTAZ,o_d_traffic_calibrated_trip_volume,PercentTotal
0,BYU,231,490721_0,4,0
1,BYU,231,490781_0,3,0
2,BYU,231,490813_0,2,0
3,BYU,231,490777_2,13,0
4,BYU,231,490721_0,9,0
...,...,...,...,...,...
5577739,UVU_PAYSON,213,490933_0,1,0
5577740,UVU_PAYSON,213,491230_0,1,0
5577741,UVU_PAYSON,213,491082_0,2,0
5577742,UVU_PAYSON,213,491145_0,1,0


In [27]:
display(dfSLDataMap_NonSGSLTaz[(dfSLDataMap_NonSGSLTaz['SpecGen']=='BYU') & (dfSLDataMap_NonSGSLTaz['TPRCode']=='001')][fnVolume].sum())

56596.87671232876

In [28]:
#display(dfSLDataMap)
dfSLDataMap_TazDist = pd.DataFrame.merge(dfSLDataMap_NonSGSLTaz, dfTazDist, left_on='NonSG_SLTAZ', right_on='SL_COTAZID', how='left')
display(dfSLDataMap_TazDist)

# show unmatched
display(dfSLDataMap_TazDist[dfSLDataMap_TazDist.isna().any(axis=1)])
display(dfSLDataMap_TazDist[dfSLDataMap_TazDist.isnull().any(axis=1)])

# fill na (external) with -1
dfSLDataMap_TazDist = dfSLDataMap_TazDist.fillna(-1)

# check unmatched again
display(dfSLDataMap_TazDist[dfSLDataMap_TazDist.isna().any(axis=1)])
display(dfSLDataMap_TazDist[dfSLDataMap_TazDist.isnull().any(axis=1)])


Unnamed: 0,SpecGen,TPRCode,NonSG_SLTAZ,o_d_traffic_calibrated_trip_volume,PercentTotal,SL_COTAZID,CO_TAZID,DISTMED,DISTLRG
0,BYU,231,490721_0,4,0,490721_0,490721,59,21
1,BYU,231,490781_0,3,0,490781_0,490781,59,21
2,BYU,231,490813_0,2,0,490813_0,490813,59,21
3,BYU,231,490777_2,13,0,490777_2,490777,59,21
4,BYU,231,490721_0,9,0,490721_0,490721,59,21
...,...,...,...,...,...,...,...,...,...
5577739,UVU_PAYSON,213,490933_0,1,0,490933_0,490933,60,22
5577740,UVU_PAYSON,213,491230_0,1,0,491230_0,491230,62,22
5577741,UVU_PAYSON,213,491082_0,2,0,491082_0,491082,61,22
5577742,UVU_PAYSON,213,491145_0,1,0,491145_0,491145,62,22


Unnamed: 0,SpecGen,TPRCode,NonSG_SLTAZ,o_d_traffic_calibrated_trip_volume,PercentTotal,SL_COTAZID,CO_TAZID,DISTMED,DISTLRG
201,BYU,231,18 TO WY 0080P,0,0,,,,
352,BYU,231,27 TO NV 0080N,0,0,,,,
623,BYU,231,18 TO WY 0080P,0,0,,,,
677,BYU,231,10 FR CO 0491N,0,0,,,,
832,BYU,231,24 TO ID 0084P,1,0,,,,
...,...,...,...,...,...,...,...,...,...
5575714,ENSIGN,243,23 TO ID 0015P,1,0,,,,
5576059,WSU_DAVIS,213,24 FR ID 0084N,1,0,,,,
5576157,SLCC_MEAD,223,27 FR NV 0080P,1,0,,,,
5577039,SLCC_MEAD,013,23 FR ID 0015N,1,0,,,,


Unnamed: 0,SpecGen,TPRCode,NonSG_SLTAZ,o_d_traffic_calibrated_trip_volume,PercentTotal,SL_COTAZID,CO_TAZID,DISTMED,DISTLRG
201,BYU,231,18 TO WY 0080P,0,0,,,,
352,BYU,231,27 TO NV 0080N,0,0,,,,
623,BYU,231,18 TO WY 0080P,0,0,,,,
677,BYU,231,10 FR CO 0491N,0,0,,,,
832,BYU,231,24 TO ID 0084P,1,0,,,,
...,...,...,...,...,...,...,...,...,...
5575714,ENSIGN,243,23 TO ID 0015P,1,0,,,,
5576059,WSU_DAVIS,213,24 FR ID 0084N,1,0,,,,
5576157,SLCC_MEAD,223,27 FR NV 0080P,1,0,,,,
5577039,SLCC_MEAD,013,23 FR ID 0015N,1,0,,,,


Unnamed: 0,SpecGen,TPRCode,NonSG_SLTAZ,o_d_traffic_calibrated_trip_volume,PercentTotal,SL_COTAZID,CO_TAZID,DISTMED,DISTLRG


Unnamed: 0,SpecGen,TPRCode,NonSG_SLTAZ,o_d_traffic_calibrated_trip_volume,PercentTotal,SL_COTAZID,CO_TAZID,DISTMED,DISTLRG


In [29]:
dfSLDataMap_TazDist[dfSLDataMap_TazDist['SpecGen']=='BYU']

Unnamed: 0,SpecGen,TPRCode,NonSG_SLTAZ,o_d_traffic_calibrated_trip_volume,PercentTotal,SL_COTAZID,CO_TAZID,DISTMED,DISTLRG
0,BYU,231,490721_0,4,0,490721_0,490721,59,21
1,BYU,231,490781_0,3,0,490781_0,490781,59,21
2,BYU,231,490813_0,2,0,490813_0,490813,59,21
3,BYU,231,490777_2,13,0,490777_2,490777,59,21
4,BYU,231,490721_0,9,0,490721_0,490721,59,21
...,...,...,...,...,...,...,...,...,...
5541733,BYU,213,490725_1,1,0,490725_1,490725,58,21
5541734,BYU,213,490688_0,1,0,490688_0,490688,58,21
5541735,BYU,213,490365_0,1,0,490365_0,490365,54,20
5541736,BYU,213,490712_1,1,0,490712_1,490712,58,21


In [30]:
dfTAZ__DataMap = dfSLDataMap_TazDist.groupby(['SpecGen','TPRCode','CO_TAZID'],as_index=False).agg(Volume=(fnVolume,'sum'),Percent=('PercentTotal','sum'),RecordCount=('SpecGen','size'))
dfMDistDataMap = dfSLDataMap_TazDist.groupby(['SpecGen','TPRCode','DISTMED' ],as_index=False).agg(Volume=(fnVolume,'sum'),Percent=('PercentTotal','sum'),RecordCount=('SpecGen','size'))
dfLDistDataMap = dfSLDataMap_TazDist.groupby(['SpecGen','TPRCode','DISTLRG' ],as_index=False).agg(Volume=(fnVolume,'sum'),Percent=('PercentTotal','sum'),RecordCount=('SpecGen','size'))

dfTAZ__DataMap['GEO'] = 'CO_TAZID'
dfMDistDataMap['GEO'] = 'DISTMED'
dfLDistDataMap['GEO'] = 'DISTLRG'

dfTAZ__DataMap = dfTAZ__DataMap.rename(columns={'CO_TAZID':'GEOID'})
dfMDistDataMap = dfMDistDataMap.rename(columns={'DISTMED' :'GEOID'})
dfLDistDataMap = dfLDistDataMap.rename(columns={'DISTLRG' :'GEOID'})

dfDataMap = pd.concat([dfTAZ__DataMap,dfMDistDataMap,dfLDistDataMap])

display(dfTAZ__DataMap)
display(dfMDistDataMap)
display(dfLDistDataMap)
display(dfDataMap)


Unnamed: 0,SpecGen,TPRCode,GEOID,Volume,Percent,RecordCount,GEO
0,Airport,001,-1,552,0,96,CO_TAZID
1,Airport,001,1001,1,0,4,CO_TAZID
2,Airport,001,1002,0,0,2,CO_TAZID
3,Airport,001,1005,1,0,4,CO_TAZID
4,Airport,001,1006,0,0,2,CO_TAZID
...,...,...,...,...,...,...,...
2481381,WSU_WEST,255,570418,2,0,1,CO_TAZID
2481382,WSU_WEST,255,570419,1,0,1,CO_TAZID
2481383,WSU_WEST,255,570420,1,0,1,CO_TAZID
2481384,WSU_WEST,255,570422,2,0,2,CO_TAZID


Unnamed: 0,SpecGen,TPRCode,GEOID,Volume,Percent,RecordCount,GEO
0,Airport,001,-1,552,0,96,DISTMED
1,Airport,001,1,539,0,831,DISTMED
2,Airport,001,2,573,0,1049,DISTMED
3,Airport,001,3,2131,0,1169,DISTMED
4,Airport,001,4,2698,0,1042,DISTMED
...,...,...,...,...,...,...,...
132960,WSU_WEST,255,46,1,0,1,DISTMED
132961,WSU_WEST,255,53,1,0,1,DISTMED
132962,WSU_WEST,255,58,2,0,2,DISTMED
132963,WSU_WEST,255,60,3,0,2,DISTMED


Unnamed: 0,SpecGen,TPRCode,GEOID,Volume,Percent,RecordCount,GEO
0,Airport,001,-1,552,0,96,DISTLRG
1,Airport,001,1,6125,0,4036,DISTLRG
2,Airport,001,2,209,0,748,DISTLRG
3,Airport,001,3,312,0,413,DISTLRG
4,Airport,001,4,1845,0,2809,DISTLRG
...,...,...,...,...,...,...,...
55219,WSU_WEST,255,17,3,0,3,DISTLRG
55220,WSU_WEST,255,20,1,0,1,DISTLRG
55221,WSU_WEST,255,21,2,0,2,DISTLRG
55222,WSU_WEST,255,22,3,0,2,DISTLRG


Unnamed: 0,SpecGen,TPRCode,GEOID,Volume,Percent,RecordCount,GEO
0,Airport,001,-1,552,0,96,CO_TAZID
1,Airport,001,1001,1,0,4,CO_TAZID
2,Airport,001,1002,0,0,2,CO_TAZID
3,Airport,001,1005,1,0,4,CO_TAZID
4,Airport,001,1006,0,0,2,CO_TAZID
...,...,...,...,...,...,...,...
55219,WSU_WEST,255,17,3,0,3,DISTLRG
55220,WSU_WEST,255,20,1,0,1,DISTLRG
55221,WSU_WEST,255,21,2,0,2,DISTLRG
55222,WSU_WEST,255,22,3,0,2,DISTLRG


In [31]:
# checks to see if totals add up
display(dfTAZ__DataMap['Volume'].sum())
display(dfMDistDataMap['Volume'].sum())
display(dfLDistDataMap['Volume'].sum())
display(dfDataMap     ['Volume'].sum()/3)

display(dfTAZ__DataMap['RecordCount'].sum())
display(dfMDistDataMap['RecordCount'].sum())
display(dfLDistDataMap['RecordCount'].sum())

display(dfSLDataMap   [fnVolume].sum())
display(dfDataMap     ['RecordCount'].sum()/3)

13100711.037696829

13100711.037696833

13100711.037696835

13100711.037696833

5577744

5577744

5577744

13100711.03769683

5577744.0

In [33]:
# pivot tables for each volume and percent

dfDataMap_volume_pivot = dfDataMap.copy()
dfDataMap_volume_pivot['TPRCodeNumber'] = "V_" + dfDataMap_volume_pivot['TPRCode'].astype('str')
dfDataMap_volume_pivot = dfDataMap_volume_pivot.pivot_table(index=('SpecGen','GEO','GEOID'),values='Volume',columns='TPRCodeNumber')
dfDataMap_volume_pivot = dfDataMap_volume_pivot.reset_index()
dfDataMap_volume_pivot = dfDataMap_volume_pivot.fillna(0)
display(dfDataMap_volume_pivot)
display(dfDataMap_volume_pivot.sum())

dfDataMap_percent_pivot = dfDataMap.copy()
dfDataMap_percent_pivot['TPRCodePercent'] = "P_" + dfDataMap_percent_pivot['TPRCode']
dfDataMap_percent_pivot = dfDataMap_percent_pivot.pivot_table(index=('SpecGen','GEO','GEOID'),values='Percent',columns='TPRCodePercent')
dfDataMap_percent_pivot = dfDataMap_percent_pivot.reset_index()
dfDataMap_percent_pivot = dfDataMap_percent_pivot.fillna(0)

# show float with decimals
pd.options.display.float_format = '{:,.4f}'.format

display(dfDataMap_percent_pivot)
display(dfDataMap_percent_pivot.sum())

# show numbers with commas
pd.options.display.float_format = '{:,.0f}'.format

# combine two tables
dfDataMapVolumePercent = pd.DataFrame.merge(dfDataMap_volume_pivot, dfDataMap_percent_pivot, on=('SpecGen','GEO','GEOID'))
display(dfDataMapVolumePercent)

TPRCodeNumber,SpecGen,GEO,GEOID,V_001,V_002,V_003,V_004,V_005,V_011,V_012,...,V_241,V_242,V_243,V_244,V_245,V_251,V_252,V_253,V_254,V_255
0,Airport,CO_TAZID,-1,552,552,411,450,791,44,47,...,92,78,75,66,146,120,104,101,116,159
1,Airport,CO_TAZID,1001,1,1,1,1,2,0,0,...,0,0,0,0,0,0,0,0,1,0
2,Airport,CO_TAZID,1002,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Airport,CO_TAZID,1005,1,0,1,2,2,0,0,...,1,0,1,0,2,0,0,0,0,0
4,Airport,CO_TAZID,1006,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88130,WSU_WEST,DISTMED,64,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88131,WSU_WEST,DISTMED,66,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
88132,WSU_WEST,DISTMED,68,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88133,WSU_WEST,DISTMED,71,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


TPRCodeNumber
SpecGen    AirportAirportAirportAirportAirportAirportAirp...
GEO        CO_TAZIDCO_TAZIDCO_TAZIDCO_TAZIDCO_TAZIDCO_TAZ...
GEOID                                         26,414,015,717
V_001                                              1,387,043
V_002                                              1,518,921
                                 ...                        
V_251                                                240,101
V_252                                                265,725
V_253                                                213,801
V_254                                                226,491
V_255                                                252,883
Length: 93, dtype: object

TPRCodePercent,SpecGen,GEO,GEOID,P_001,P_002,P_003,P_004,P_005,P_011,P_012,...,P_241,P_242,P_243,P_244,P_245,P_251,P_252,P_253,P_254,P_255
0,Airport,CO_TAZID,-1.0000,0.0082,0.0090,0.0062,0.0068,0.0106,0.0066,0.0082,...,0.0103,0.0095,0.0086,0.0075,0.0149,0.0081,0.0078,0.0073,0.0080,0.0092
1,Airport,CO_TAZID,1001.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0001,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0001,0.0000
2,Airport,CO_TAZID,1002.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
3,Airport,CO_TAZID,1005.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0001,0.0000,0.0001,0.0000,0.0002,0.0000,0.0000,0.0000,0.0000,0.0000
4,Airport,CO_TAZID,1006.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88130,WSU_WEST,DISTMED,64.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
88131,WSU_WEST,DISTMED,66.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0002,0.0000,0.0008,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
88132,WSU_WEST,DISTMED,68.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
88133,WSU_WEST,DISTMED,71.0000,0.0000,0.0001,0.0000,0.0000,0.0000,0.0000,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000


TPRCodePercent
SpecGen    AirportAirportAirportAirportAirportAirportAirp...
GEO        CO_TAZIDCO_TAZIDCO_TAZIDCO_TAZIDCO_TAZIDCO_TAZ...
GEOID                                       26,414,015,717.0
P_001                                                   99.0
P_002                                                   99.0
                                 ...                        
P_251                                                   99.0
P_252                                                   99.0
P_253                                                   99.0
P_254                                                   99.0
P_255                                                   99.0
Length: 93, dtype: object

Unnamed: 0,SpecGen,GEO,GEOID,V_001,V_002,V_003,V_004,V_005,V_011,V_012,...,P_241,P_242,P_243,P_244,P_245,P_251,P_252,P_253,P_254,P_255
0,Airport,CO_TAZID,-1,552,552,411,450,791,44,47,...,0,0,0,0,0,0,0,0,0,0
1,Airport,CO_TAZID,1001,1,1,1,1,2,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Airport,CO_TAZID,1002,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Airport,CO_TAZID,1005,1,0,1,2,2,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Airport,CO_TAZID,1006,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88130,WSU_WEST,DISTMED,64,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88131,WSU_WEST,DISTMED,66,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88132,WSU_WEST,DISTMED,68,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88133,WSU_WEST,DISTMED,71,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Prepare shapes

In [34]:
def deleteIfExists(obj):
    if arcpy.Exists(obj): arcpy.Delete_management(obj)

def zipdir(path, ziph):
    # ziph is zipfile handle
    for root, dirs, files in os.walk(path):
        for file in files:
            ziph.write(os.path.join(root, file), 
                        os.path.relpath(os.path.join(root, file), 
                                        os.path.join(path, '..')))

In [35]:
if bPrepareBaseShapefiles:
    # delete old files
    deleteIfExists(shp_MDIST)
    deleteIfExists(shp_LDIST)
    deleteIfExists(shp_TAZ___simple)
    deleteIfExists(shp_MDIST_simple)
    deleteIfExists(shp_LDIST_simple)
    deleteIfExists(shp_TAZ___simple_pnt)
    deleteIfExists(shp_LDIST_simple_pnt)
    deleteIfExists(shp_MDIST_simple_pnt)

    # create medium and large district shapefiles
    arcpy.management.Dissolve(shp_TAZ__, shp_MDIST, 'DISTMED')
    arcpy.management.Dissolve(shp_TAZ__, shp_LDIST, 'DISTLRG')

    # simplify
    arcpy.cartography.SimplifyPolygon(shp_TAZ__, shp_TAZ___simple, "EFFECTIVE_AREA", 100)
    arcpy.cartography.SimplifyPolygon(shp_MDIST, shp_MDIST_simple, "EFFECTIVE_AREA", 100)
    arcpy.cartography.SimplifyPolygon(shp_LDIST, shp_LDIST_simple, "EFFECTIVE_AREA", 100)

In [36]:
sdfTAZ__ = sdfTAZ__
sdfMDist = pd.DataFrame.spatial.from_featureclass(shp_MDIST_simple)
sdfLDist = pd.DataFrame.spatial.from_featureclass(shp_LDIST_simple)

# limit to just needed columns
sdfTAZ__ = sdfTAZ__[['CO_TAZID','SHAPE']]
sdfMDist = sdfMDist[['DISTMED' ,'SHAPE']]
sdfLDist = sdfLDist[['DISTLRG' ,'SHAPE']]

In [39]:
dfSpecGen[dfSpecGen['REGENFGDB']=='Yes']

KeyError: 'REGENFGDB'

In [38]:
dGeos = ['CO_TAZID','DISTMED','DISTLRG']
dSDFs = [  sdfTAZ__, sdfMDist, sdfLDist]

from zipfile import ZipFile

for sg in dSpecGen:

    # only run for specgen with Yes for FGBD
    if sg[3]=='No':
        continue

    print('Exporting ' + sg[0] + '...')

    _gdbname         = sg[0] + '.gdb'
    _zipname         = _gdbname + '.zip'
    _gdbnamewithpath = os.path.join(webapplayers_folder, _gdbname)
    _zipnamewithpath = os.path.join(webapplayers_folder, _zipname)
    #print(_zipnamewithpath)

    # if processing geodatabase doesn't exist, create it
    #print("Checking if " + _gdbname + " exists...")
    if not arcpy.Exists(os.path.join(webapplayers_folder, _gdbname)):
        #print("Creating " + _gdbname + "...")
        arcpy.management.CreateFileGDB(webapplayers_folder, _gdbname)


    for index, value in enumerate(dGeos):
        #print((index, value))
        #print(value)

        dfExportTable = dfDataMapVolumePercent[(dfDataMapVolumePercent['SpecGen']==sg[0]) & (dfDataMapVolumePercent['GEO']==value)]
        #display(dfExportTable)
        sdfExport = pd.DataFrame.merge(dSDFs[index], dfExportTable, left_on=value, right_on='GEOID', how='left')
        sdfExport = sdfExport.fillna(0)
        sdfExport = sdfExport.drop(columns=(['SpecGen','GEO','GEOID']))
        #display(sdfExport)

        #create geodatabases

        _featureclassname = os.path.join(_gdbnamewithpath   , sg[0] + "_" + value)
        
        #print ('Exporting ' + _featureclassname)
        deleteIfExists(_featureclassname)
        sdfExport.spatial.to_featureclass(_featureclassname)

        # create shapefiles and zipfiles
        
        #filenamebase = os.path.join(webapplayers_folder, sg[0] + "_" + value)
        #filenamebase_zip = os.path.join(webapplayerszips_folder, sg[0] + "_" + value)
        #filename_shp     = filenamebase + ".shp"
        #filename_prj     = filenamebase + ".prj"
        #filename_shpxml  = filenamebase + ".shp.xml"
        #filename_shx     = filenamebase + ".shx"
        #filename_shx     = filenamebase + ".shx"

        #print(filename_shp)
        #sdfExport.spatial.to_featureclass(filename_shp)
        
        # create a ZipFile object
        #zipObj = ZipFile(filenamebase_zip + '.zip','w')

        # Add multiple files to the zip
        #zipObj.write(filename_shp   )
        #zipObj.write(filename_prj   )
        #zipObj.write(filename_shpxml)
        #zipObj.write(filename_shx   )
        # close the Zip File
        #zipObj.close()

    # create a ZipFile object
    zipObj = ZipFile(_zipnamewithpath,'w', zipfile.ZIP_DEFLATED)
    zipdir(_gdbnamewithpath,zipObj)
    zipObj.close()


Exporting UOFU_SAND...
Exporting USU_BRIG...
Exporting USU_KAY...
Exporting USU_SL...
Exporting WSU_MAIN...
Exporting WSU_FARM...
Exporting SLCC_MAIN...
Exporting SLCC_LB...
Exporting SLCC_HM...
Exporting SLCC_WVC...
Exporting USU_OREM...
