In [1]:
import numpy as np
import os
import pandas as pd
import arcpy
import pyxlsb

# Import the required ArcGIS API for Python modules
import arcgis
from arcgis.gis import GIS
gis = arcgis.GIS()
from arcgis.geoanalytics import manage_data

from IPython.display import display, Markdown

In [2]:
dirWork         = os.getcwd()
dirInput        = os.path.join(dirWork, r'input'               )
dirIntermediate = os.path.join(dirWork, r'intermediate'        )
dirResults      = os.path.join(dirWork, r'results'             )
dirHHSurvey     = os.path.join(dirWork, r'0 - HH Travel Survey')

In [3]:
#globals
filename_HHSurvey            = os.path.join(dirHHSurvey, r'2012_HHSurvey - USTMv2.1a - 2021-07-13 - Master.xlsb')
filename_ExternalLookupTable = os.path.join(dirWork, r'2_skim_through_externals\ExternalLookup.csv')
filename_TAZ                 = os.path.join(dirWork, r'0 - HH Travel Survey\TAZ\USTM TAZ - v21a\TAZ_v21a_wNatCoGrps_NAD83_v2.shp')

sheetname_TripData = 'TripData_2021-07-06'


In [4]:
#import excel spreadsheet
df_TripData = pd.read_excel(filename_HHSurvey, sheet_name=sheetname_TripData, engine='pyxlsb')

In [5]:
#help
#?pd.read_excel

In [6]:
#display(df_TripData.columns.tolist())

In [7]:
df_TripData_IXXI = df_TripData[['record_id','trip_pur_t','weight','PA_AP','p_CO_TAZID','a_CO_TAZID']]

#filter so only IX and XI trips are left
df_TripData_IXXI = df_TripData_IXXI.loc[(df_TripData_IXXI['p_CO_TAZID']<0) | (df_TripData_IXXI['a_CO_TAZID']<0)]
df_TripData_IXXI = df_TripData_IXXI.loc[(df_TripData_IXXI['p_CO_TAZID']>0) | (df_TripData_IXXI['a_CO_TAZID']>0)]

display(df_TripData_IXXI)
display(df_TripData_IXXI[['weight']].sum())

Unnamed: 0,record_id,trip_pur_t,weight,PA_AP,p_CO_TAZID,a_CO_TAZID
1282,1311,HBW,19.457436,PA,50155,-808956
1283,1312,HBW,19.457436,AP,50155,-808956
1387,1418,NHBNW,56.887142,PA,530504,-808927
1388,1419,NHBNW,56.887142,PA,-808927,530256
1391,1422,NHBNW,56.887142,PA,530530,-808927
...,...,...,...,...,...,...
99197,103039,HBW,38.316773,PA,530285,-808927
99210,103052,HBW,31.184202,PA,530105,-808927
99253,103095,HBW,45.175257,AP,530153,-808927
99302,103144,HBW,18.536184,PA,530296,-808927


weight    49255.08282
dtype: float64

In [8]:
#read in lookup table
df_luExternals = pd.read_csv(filename_ExternalLookupTable)

In [9]:
df_luExternals

Unnamed: 0,ODPair,I,J,External,Time,Distance
0,31_8776,31.0,8776.0,12.0,2869.37,3027.30
1,31_8777,31.0,8777.0,12.0,2712.40,2846.01
2,31_8778,31.0,8778.0,12.0,2509.57,2625.02
3,31_8779,31.0,8779.0,12.0,2439.52,2583.07
4,31_8780,31.0,8780.0,12.0,2267.28,2447.13
...,...,...,...,...,...,...
3407818,8976_8771,8976.0,8771.0,23.0,520.47,493.43
3407819,8976_8772,8976.0,8772.0,23.0,523.07,495.76
3407820,8976_8773,8976.0,8773.0,23.0,532.31,489.34
3407821,8976_8774,8976.0,8774.0,23.0,559.14,498.78


In [10]:
sdf_TAZ = pd.DataFrame.spatial.from_featureclass(filename_TAZ)

In [11]:
df_luTAZ = sdf_TAZ[['TAZID','CO_TAZID']]
df_luTAZ

Unnamed: 0,TAZID,CO_TAZID
0,7067,50639
1,7069,50641
2,6478,50050
3,7091,50663
4,7087,50659
...,...,...
8939,8972,-808972
8940,8973,-808973
8941,8974,-808974
8942,8975,-808975


In [12]:
df_luExternals_wCoTAZID_i = pd.DataFrame.merge(df_luExternals, df_luTAZ, left_on='I', right_on='TAZID')
df_luExternals_wCoTAZID_i = df_luExternals_wCoTAZID_i.rename(columns={"CO_TAZID": "CO_TAZID_i","TAZID": "TAZID_i"})
df_luExternals_wCoTAZID_i

Unnamed: 0,ODPair,I,J,External,Time,Distance,TAZID_i,CO_TAZID_i
0,31_8776,31.0,8776.0,12.0,2869.37,3027.30,31,1001
1,31_8777,31.0,8777.0,12.0,2712.40,2846.01,31,1001
2,31_8778,31.0,8778.0,12.0,2509.57,2625.02,31,1001
3,31_8779,31.0,8779.0,12.0,2439.52,2583.07,31,1001
4,31_8780,31.0,8780.0,12.0,2267.28,2447.13,31,1001
...,...,...,...,...,...,...,...,...
3390832,8976_8771,8976.0,8771.0,23.0,520.47,493.43,8976,-808976
3390833,8976_8772,8976.0,8772.0,23.0,523.07,495.76,8976,-808976
3390834,8976_8773,8976.0,8773.0,23.0,532.31,489.34,8976,-808976
3390835,8976_8774,8976.0,8774.0,23.0,559.14,498.78,8976,-808976


In [13]:
df_luExternals_wCoTAZID_j = pd.DataFrame.merge(df_luExternals_wCoTAZID_i, df_luTAZ, left_on='J', right_on='TAZID')
df_luExternals_wCoTAZID_j = df_luExternals_wCoTAZID_j.rename(columns={"CO_TAZID": "CO_TAZID_j","TAZID": "TAZID_j"})
df_luExternals_wCoTAZID_j

Unnamed: 0,ODPair,I,J,External,Time,Distance,TAZID_i,CO_TAZID_i,TAZID_j,CO_TAZID_j
0,31_8776,31.0,8776.0,12.0,2869.37,3027.30,31,1001,8776,-808776
1,32_8776,32.0,8776.0,12.0,2867.64,3026.35,32,1002,8776,-808776
2,33_8776,33.0,8776.0,12.0,2867.51,3026.29,33,1003,8776,-808776
3,34_8776,34.0,8776.0,12.0,2867.23,3026.29,34,1004,8776,-808776
4,35_8776,35.0,8776.0,12.0,2866.58,3025.66,35,1005,8776,-808776
...,...,...,...,...,...,...,...,...,...,...
3373848,8972_8775,8972.0,8775.0,12.0,565.05,443.95,8972,-808972,8775,51310
3373849,8973_8775,8973.0,8775.0,2.0,524.01,513.90,8973,-808973,8775,51310
3373850,8974_8775,8974.0,8775.0,18.0,491.53,399.41,8974,-808974,8775,51310
3373851,8975_8775,8975.0,8775.0,10.0,545.74,468.97,8975,-808975,8775,51310


In [14]:
df_luExternals_wCoTAZID = df_luExternals_wCoTAZID_j[['TAZID_i','TAZID_j','CO_TAZID_i','CO_TAZID_j','External']]
df_luExternals_wCoTAZID

Unnamed: 0,TAZID_i,TAZID_j,CO_TAZID_i,CO_TAZID_j,External
0,31,8776,1001,-808776,12.0
1,32,8776,1002,-808776,12.0
2,33,8776,1003,-808776,12.0
3,34,8776,1004,-808776,12.0
4,35,8776,1005,-808776,12.0
...,...,...,...,...,...
3373848,8972,8775,-808972,51310,12.0
3373849,8973,8775,-808973,51310,2.0
3373850,8974,8775,-808974,51310,18.0
3373851,8975,8775,-808975,51310,10.0


In [15]:
#combine i and j dataframes to get all IXXI trips
df_TripData_IXXI_ExternalNum = pd.DataFrame.merge(df_TripData_IXXI, df_luExternals_wCoTAZID, left_on=('p_CO_TAZID','a_CO_TAZID'), right_on=('CO_TAZID_i','CO_TAZID_j'), how='left')
df_TripData_IXXI_ExternalNum = df_TripData_IXXI_ExternalNum[['record_id','trip_pur_t','weight','TAZID_i','TAZID_j','External','PA_AP']]

#Calculate Ps and As, replacing >8775 with external number
df_TripData_IXXI_ExternalNum['P'] = df_TripData_IXXI_ExternalNum.apply(lambda row: row['External'] if row['TAZID_i'] > 8775 else row['TAZID_i'], axis=1)
df_TripData_IXXI_ExternalNum['A'] = df_TripData_IXXI_ExternalNum.apply(lambda row: row['External'] if row['TAZID_j'] > 8775 else row['TAZID_j'], axis=1)

df_TripData_IXXI_ExternalNum = df_TripData_IXXI_ExternalNum[['record_id','trip_pur_t','weight','PA_AP','P','A']]
df_TripData_IXXI_ExternalNum

Unnamed: 0,record_id,trip_pur_t,weight,PA_AP,P,A
0,1311,HBW,19.457436,PA,6583.0,22.0
1,1312,HBW,19.457436,AP,6583.0,22.0
2,1418,NHBNW,56.887142,PA,7854.0,3.0
3,1419,NHBNW,56.887142,PA,3.0,7606.0
4,1422,NHBNW,56.887142,PA,7880.0,3.0
...,...,...,...,...,...,...
544,103039,HBW,38.316773,PA,7635.0,3.0
545,103052,HBW,31.184202,PA,7455.0,3.0
546,103095,HBW,45.175257,AP,7503.0,3.0
547,103144,HBW,18.536184,PA,7646.0,3.0


In [16]:
#check to make sure all rows have one external only

#XX
df_check1 = df_TripData_IXXI_ExternalNum[(df_TripData_IXXI_ExternalNum['P']<30) & (df_TripData_IXXI_ExternalNum['A']<30)]
display(df_check1)

#II
df_check2 = df_TripData_IXXI_ExternalNum[(df_TripData_IXXI_ExternalNum['P']>30) & (df_TripData_IXXI_ExternalNum['A']>30)]
display(df_check2)

#IX trips
df_check3 = df_TripData_IXXI_ExternalNum[(df_TripData_IXXI_ExternalNum['P']>30) & (df_TripData_IXXI_ExternalNum['A']<30)]
display(df_check3)

#XI trips
df_check4 = df_TripData_IXXI_ExternalNum[(df_TripData_IXXI_ExternalNum['P']<30) & (df_TripData_IXXI_ExternalNum['A']>30)]
display(df_check4)

#nulls
df_check5 = df_TripData_IXXI_ExternalNum[df_TripData_IXXI_ExternalNum.isna().any(axis=1)]
display(df_check5)

Unnamed: 0,record_id,trip_pur_t,weight,PA_AP,P,A


Unnamed: 0,record_id,trip_pur_t,weight,PA_AP,P,A


Unnamed: 0,record_id,trip_pur_t,weight,PA_AP,P,A
0,1311,HBW,19.457436,PA,6583.0,22.0
1,1312,HBW,19.457436,AP,6583.0,22.0
2,1418,NHBNW,56.887142,PA,7854.0,3.0
4,1422,NHBNW,56.887142,PA,7880.0,3.0
6,1426,NHBNW,56.887142,PA,7854.0,3.0
...,...,...,...,...,...,...
544,103039,HBW,38.316773,PA,7635.0,3.0
545,103052,HBW,31.184202,PA,7455.0,3.0
546,103095,HBW,45.175257,AP,7503.0,3.0
547,103144,HBW,18.536184,PA,7646.0,3.0


Unnamed: 0,record_id,trip_pur_t,weight,PA_AP,P,A
3,1419,NHBNW,56.887142,PA,3.0,7606.0
5,1423,NHBNW,56.887142,PA,3.0,7606.0
7,1427,NHBNW,56.887142,PA,3.0,7606.0
9,1431,NHBNW,56.887142,PA,3.0,7606.0
11,1435,NHBNW,56.887142,PA,3.0,7606.0
...,...,...,...,...,...,...
491,96364,NHBW,38.204073,PA,3.0,7584.0
521,99363,NHBW,34.231311,PA,3.0,7599.0
526,100177,NHBNW,17.726043,PA,3.0,7603.0
528,100182,NHBNW,17.726043,PA,3.0,7603.0


Unnamed: 0,record_id,trip_pur_t,weight,PA_AP,P,A
379,77717,NHBNW,107.696958,PA,,
380,77729,NHBNW,107.696958,PA,,


In [17]:
#check on two null rows

nullist = [77717,77729]

df_nullrecords = df_TripData[df_TripData['record_id'].isin(nullist)]
display(df_nullrecords[['record_id','p_CO_TAZID','a_CO_TAZID']])

Unnamed: 0,record_id,p_CO_TAZID,a_CO_TAZID
74785,77717,351731,-808964
74797,77729,351731,-808964


NOTE: Examining two null records, show that their quickest route goes through multiple externals, which were filtered out in the external lookup table process.

In [18]:
#remove na rows
df_TripData_IXXI_ExternalNum = df_TripData_IXXI_ExternalNum.dropna()

#create O/D columns
df_TripData_IXXI_ExternalNum['O'] = df_TripData_IXXI_ExternalNum.apply(lambda row: row['P'] if row['PA_AP'] == 'PA' else row['A'], axis=1)
df_TripData_IXXI_ExternalNum['D'] = df_TripData_IXXI_ExternalNum.apply(lambda row: row['A'] if row['PA_AP'] == 'PA' else row['P'], axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [19]:
#aggregate by purpose, p, and a
df_TripTable_IXXI_PA = df_TripData_IXXI_ExternalNum.groupby(['trip_pur_t','P','A'],as_index=False).aggregate({'weight':np.sum})

df_TripTable_IXXI_OD = df_TripData_IXXI_ExternalNum.groupby(['trip_pur_t','O','D'],as_index=False).aggregate({'weight':np.sum})

df_TripTable_IXXI_PA = df_TripTable_IXXI_PA.rename(columns={"weight": "trips"})
df_TripTable_IXXI_OD = df_TripTable_IXXI_OD.rename(columns={"weight": "trips"})

#change p/a and o/d to ints
df_TripTable_IXXI_PA['P'] = df_TripTable_IXXI_PA['P'].astype('int')
df_TripTable_IXXI_PA['A'] = df_TripTable_IXXI_PA['A'].astype('int')
df_TripTable_IXXI_OD['O'] = df_TripTable_IXXI_OD['O'].astype('int')
df_TripTable_IXXI_OD['D'] = df_TripTable_IXXI_OD['D'].astype('int')

display(df_TripTable_IXXI_PA)
display(df_TripTable_IXXI_PA[['trips']].sum())

display(df_TripTable_IXXI_OD)
display(df_TripTable_IXXI_OD[['trips']].sum())

Unnamed: 0,trip_pur_t,P,A,trips
0,HBC,1258,3,308.744921
1,HBO,163,23,144.540832
2,HBO,332,24,111.311628
3,HBO,1133,11,55.984236
4,HBO,1164,12,121.548928
...,...,...,...,...
267,NHBW,7654,3,31.123822
268,NHBW,7664,3,34.231311
269,NHBW,7665,3,229.906346
270,NHBW,7779,3,84.547725


trips    49039.688905
dtype: float64

Unnamed: 0,trip_pur_t,O,D,trips
0,HBC,3,1258,154.372460
1,HBC,1258,3,154.372460
2,HBO,3,3980,105.270124
3,HBO,3,7387,101.177824
4,HBO,3,7401,51.790447
...,...,...,...,...
319,NHBW,7575,3,38.204073
320,NHBW,7599,3,34.231311
321,NHBW,7632,3,76.058560
322,NHBW,7665,3,114.953173


trips    49039.688905
dtype: float64

In [20]:
list_TripPurposes = df_TripData_IXXI_ExternalNum['trip_pur_t'].unique().tolist()
display(list_TripPurposes)

for tp in list_TripPurposes:

    
    df_PA = df_TripTable_IXXI_PA[df_TripTable_IXXI_PA['trip_pur_t']==tp]
    df_OD = df_TripTable_IXXI_OD[df_TripTable_IXXI_OD['trip_pur_t']==tp]

    df_PA[['P','A','trips']].to_csv(os.path.join(dirResults, "IXXI_" + tp + "_PA_TripTable.csv"),index=False)
    df_OD[['O','D','trips']].to_csv(os.path.join(dirResults, "IXXI_" + tp + "_OD_TripTable.csv"),index=False)

    display(tp + " exported.")

['HBW', 'NHBNW', 'NHBW', 'HBO', 'HBShp', 'HBPb', 'HBSch', 'HBC']

'HBW exported.'

'NHBNW exported.'

'NHBW exported.'

'HBO exported.'

'HBShp exported.'

'HBPb exported.'

'HBSch exported.'

'HBC exported.'