In [1]:
import pandas as pd
import numpy as np
import openmatrix as omx
import os, sys, glob

In [2]:
os.chdir(r"C:\MTC_tmpy\TM2\New Zone\Airport")

In [3]:
# read and validate crosswalk
xwalk = pd.read_csv("TM2TAZ_Renum_Consec.csv")

# every TM1.5 and TM2 zone is accounted for
# factors sum up to 1 for every TM1.5 zone

missing_zone_IP = set(range(1, max(xwalk.IPZONE) + 1)) - set(xwalk.IPZONE)
missing_zone_OP = set(range(1, max(xwalk.OPZONE) + 1)) - set(xwalk.OPZONE)
assert len(missing_zone_IP) == 0, f'missing TM1.5 zones, {missing_zone_IP}'
assert len(missing_zone_OP) == 0, f'missing TM2 zones, {missing_zone_OP}'

assert (xwalk.groupby(['IPZONE'])['FACTOR'].sum().round(5).nunique() == 1 and
        xwalk.groupby(['IPZONE'])['FACTOR'].sum().round(5).max() == 1), 'factors do not add up to one for every TM1.5 zone'

In [4]:
max(xwalk.IPZONE), max(xwalk.OPZONE)

(4735, 3332)

In [5]:
xwalk.head()

Unnamed: 0,IPZONE,OPZONE,FACTOR
0,1,248,1.0
1,2,249,1.0
2,3,250,1.0
3,4,9,1.0
4,5,251,1.0


In [19]:
in_table_regex = "{year}_tripsAirPax{period}.omx"
out_omx_regex = "disagg/{year}_tripsAirPax{period}.omx"
modes = ["da","sr2","sr3"] # from tm2py model_config.toml

In [9]:
if not os.path.exists('disagg'):
    os.mkdir('disagg')

In [11]:
def read_omx_mats(fn):
    with omx.open_file(fn) as f:
        return f.list_matrices()

In [14]:
# open one file to inspect format
year = '2015'
time_period = 'EA'
read_omx_mats(in_table_regex.format(year = year, period = time_period))

['DA', 'S2', 'S3']

In [23]:
for year in ['2015','2040','2050']:

    for time_period in ['EA','AM','MD','PM','EV']:
        with omx.open_file(in_table_regex.format(year = year, period = time_period)) as f, omx.open_file(out_omx_regex.format(year = year, period=time_period), 'a') as f_out:
            for mode in modes:
                mat_name = mode.upper().replace('SR','S')
                arr = np.array(f[mat_name])
                # Disaggregate
                df = pd.DataFrame(arr)
                df.index.name = 'ORIG'
                df.columns.name = 'DEST'
                df = df.stack().rename('val').reset_index()
                df['ORIG'] += 1
                df['DEST'] += 1

                df_xwalk = df.merge(
                        xwalk, left_on = ['ORIG'], right_on = ['IPZONE'], how = 'left').merge(
                        xwalk, left_on = ['DEST'], right_on = ['IPZONE'], how = 'left', suffixes = ['_ORIG','_DEST'])

                OD_full_index = pd.MultiIndex.from_product([range(1,max(xwalk.OPZONE) + 1), range(1,max(xwalk.OPZONE) + 1)])

                df_xwalk['val_out'] = df_xwalk['val'] * df_xwalk.FACTOR_ORIG * df_xwalk.FACTOR_DEST
                val_new_zones = df_xwalk.groupby(['OPZONE_ORIG','OPZONE_DEST'])['val_out'].sum()
                arr_out = val_new_zones.reindex(OD_full_index, fill_value = 0).unstack().values 

                f_out[mat_name] = arr_out.round(2)

In [92]:
# validate results

for year in ['2015','2040','2050']:
    print(year)
    
    sums = []
    for time_period in ['EA','AM','MD','PM','EV']:
        with omx.open_file(in_table_regex.format(year = year, period = time_period)) as f:
            for mode in modes:
                mat_name = mode.upper().replace('SR','S')
                arr = np.array(f[mat_name])
                df = pd.DataFrame(arr).unstack().reset_index()
                df.columns = ['ORIG','DEST','val']
                df['ORIG'] += 1
                df['DEST'] += 1
                df.set_index(['ORIG','DEST'], inplace = True)
                sums.append(df)

    daily_totals = pd.DataFrame(np.add.reduce(sums))
    daily_totals.index = pd.MultiIndex.from_product([range(1,4736), range(1,4736)], names = ['ORIG','DEST'])
    daily_totals.columns = ['val']
    daily_totals.reset_index(inplace = True)
    
    daily_w_xwalk = daily_totals.merge(
                    xwalk, left_on = ['ORIG'], right_on = ['IPZONE'], how = 'left').merge(
                    xwalk, left_on = ['DEST'], right_on = ['IPZONE'], how = 'left', suffixes = ['_ORIG','_DEST'])
    
    daily_w_xwalk['val'] = daily_w_xwalk['val'] * daily_w_xwalk.FACTOR_ORIG * daily_w_xwalk.FACTOR_DEST
    
    OD_full_index = pd.MultiIndex.from_product([range(1,max(xwalk.OPZONE) + 1), range(1,max(xwalk.OPZONE) + 1)])
    
    trips_in_new_zones = daily_w_xwalk.groupby(['OPZONE_ORIG','OPZONE_DEST'])['val'].sum()
    demand_matrix = trips_in_new_zones.reindex(OD_full_index, fill_value = 0).unstack().values
    
    daily_sum = np.zeros((3332, 3332))
    for time_period in ['EA','AM','MD','PM','EV']:   
        with omx.open_file(out_omx_regex.format(year = year, period=time_period)) as f:
            print(f.list_matrices())
            for m in f.list_matrices():
                daily_sum += np.array(f[m])        

    print('total diff:', demand_matrix.sum() - daily_sum.sum(),
      'element max diff:',(demand_matrix - daily_sum).max(), 
      'mean diff:',(demand_matrix - daily_sum).mean())
        

2015
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
total diff: 1.21973290364258 element max diff: 4.289926757812509 mean diff: 1.0986383479693957e-07
2040
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
total diff: 0.920071823784383 element max diff: 5.970080566406068 mean diff: 8.287274911376192e-08
2050
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
['DA', 'S2', 'S3']
total diff: 0.6198931956896558 element max diff: 6.8399316406248545 mean diff: 5.583504670652487e-08
