In [1]:
import pandas as pd
import numpy as np
import openmatrix as omx
import os, sys, glob

In [2]:
os.chdir(r"C:\MTC_tmpy\TM2\New Zone\Link21_3332TAZs")

In [3]:
# read and validate crosswalk
xwalk = pd.read_csv("TM1_Disagg_Lookup_wExternals.csv")

# every TM1.5 and TM2 zone is accounted for
# factors sum up to 1 for every TM1.5 zone

missing_zone_IP = set(range(1, max(xwalk.IPZONE) + 1)) - set(xwalk.IPZONE)
missing_zone_OP = set(range(1, max(xwalk.OPZONE) + 1)) - set(xwalk.OPZONE)
assert len(missing_zone_IP) == 0, f'missing TM1.5 zones, {missing_zone_IP}'
assert len(missing_zone_OP) == 0, f'missing TM2 zones, {missing_zone_OP}'

assert (xwalk.groupby(['IPZONE'])['FACTOR'].sum().round(5).nunique() == 1 and
        xwalk.groupby(['IPZONE'])['FACTOR'].sum().round(5).max() == 1), 'factors do not add up to one for every TM1.5 zone'

In [4]:
max(xwalk.IPZONE), max(xwalk.OPZONE)

(1475, 3353)

In [5]:
xwalk.head()

Unnamed: 0,IPZONE,OPZONE,FACTOR
0,1,83,1.0
1,2,62,1.0
2,3,85,1.0
3,4,63,1.0
4,5,86,0.202189


In [6]:
from dbfread import DBF

In [10]:
segment_suffixes = ["LowInc", "MedInc", "HighInc", "XHighInc"]
shares = [0.28, 0.24, 0.21, 0.27]
in_table_regex = "{year}/tripsIx{period}.dbf"
out_omx_regex = "disagg/{year}/tripsIx{period}.omx"
modes = ["da","sr2","sr3"] # from tm2py model_config.toml

In [8]:
if not os.path.exists('disagg'):
    os.mkdir('disagg')

In [11]:
# open one file to inspect format
year = '2015'
time_period = 'EA'
df = pd.DataFrame(
            DBF(in_table_regex.format(year = year, period = time_period), load = True).records)

In [12]:
df.head()

Unnamed: 0,ORIG,DEST,DA,SR2,SR3
0,1,1,0.0,0.0,0.0
1,1,2,0.0,0.0,0.0
2,1,3,0.0,0.0,0.0
3,1,4,0.0,0.0,0.0
4,1,5,0.0,0.0,0.0


In [95]:
for year in ['2019','2040','2050']:
#for year in ['2015','2019','2040','2050']:
    if not os.path.exists(f'disagg/{year}'):
        os.mkdir(f'disagg/{year}')

    for time_period in ['EA','AM','MD','PM','EV']:
        df = pd.DataFrame(
            DBF(in_table_regex.format(year = year, period = time_period), load = True).records)
        
        if 'S2' in df.columns:
            df.rename(columns = {'S2':'SR2'}, inplace = True)
        if 'S3' in df.columns:
            df.rename(columns = {'S3':'SR3'}, inplace = True)
        
        
        df_w_xwalk = df.merge(
                xwalk, left_on = ['ORIG'], right_on = ['IPZONE'], how = 'left').merge(
                xwalk, left_on = ['DEST'], right_on = ['IPZONE'], how = 'left', suffixes = ['_ORIG','_DEST'])

        OD_full_index = pd.MultiIndex.from_product([range(1,max(xwalk.OPZONE) + 1), range(1,max(xwalk.OPZONE) + 1)])

        for mode in modes:

            df_w_xwalk[f'{mode}_new'] = df_w_xwalk[mode.upper()] * df_w_xwalk.FACTOR_ORIG * df_w_xwalk.FACTOR_DEST
            trips_in_new_zones = df_w_xwalk.groupby(['OPZONE_ORIG','OPZONE_DEST'])[f'{mode}_new'].sum()
            demand_matrix = trips_in_new_zones.reindex(OD_full_index, fill_value = 0).unstack().values

            with omx.open_file(out_omx_regex.format(year = year, period=time_period), 'a') as f_out:
                for suffix, share in zip(segment_suffixes, shares):
                    f_out[f'{mode}_{suffix}'] = demand_matrix * share

In [97]:
# validate results

#for year in ['2015']:
for year in ['2015','2019','2040','2050']:
    print(year)
    
    sums = []
    for time_period in ['EA','AM','MD','PM','EV']:
        df = pd.DataFrame(DBF(in_table_regex.format(year = year, period = time_period), load = True).records)
        if 'S2' in df.columns:
            df.rename(columns = {'S2':'SR2'}, inplace = True)
        if 'S3' in df.columns:
            df.rename(columns = {'S3':'SR3'}, inplace = True)
        df.set_index(['ORIG','DEST'], inplace = True)
        sums.append(df)

    daily_totals = pd.concat(sums, axis = 1).sum(level=[0], axis = 1).reset_index()
    
    daily_w_xwalk = daily_totals.merge(
                    xwalk, left_on = ['ORIG'], right_on = ['IPZONE'], how = 'left').merge(
                    xwalk, left_on = ['DEST'], right_on = ['IPZONE'], how = 'left', suffixes = ['_ORIG','_DEST'])

    OD_full_index = pd.MultiIndex.from_product([range(1,max(xwalk.OPZONE) + 1), range(1,max(xwalk.OPZONE) + 1)])
    
    for mode in modes:    
        daily_w_xwalk[f'{mode}_new'] = daily_w_xwalk[mode.upper()] * daily_w_xwalk.FACTOR_ORIG * daily_w_xwalk.FACTOR_DEST
        trips_in_new_zones = daily_w_xwalk.groupby(['OPZONE_ORIG','OPZONE_DEST'])[f'{mode}_new'].sum()
        demand_matrix = trips_in_new_zones.reindex(OD_full_index, fill_value = 0).unstack().values
        
        daily_sum = np.zeros((3353, 3353))
        for time_period in ['EA','AM','MD','PM','EV']:   

            with omx.open_file(out_omx_regex.format(year = year, period=time_period)) as f:
                for m in f.list_matrices():
                    if mode in m:
                        daily_sum += np.array(f[m])        

        print(mode, 'total diff:', demand_matrix.sum() - daily_sum.sum(),
              'element max diff:',(demand_matrix - daily_sum).max(), 
              'mean diff:',(demand_matrix - daily_sum).mean())
        

2015


  daily_totals = pd.concat(sums, axis = 1).sum(level=[0], axis = 1).reset_index()


da total diff: 0.0 element max diff: 4.547473508864641e-13 mean diff: -1.5723293891261588e-18
sr2 total diff: 0.0 element max diff: 2.2737367544323206e-13 mean diff: -1.1280905016532252e-19
sr3 total diff: 0.0 element max diff: 1.4210854715202004e-14 mean diff: -3.7864514974618485e-20
2019


  daily_totals = pd.concat(sums, axis = 1).sum(level=[0], axis = 1).reset_index()


da total diff: 1.1641532182693481e-10 element max diff: 9.094947017729282e-13 mean diff: -1.1891214632567353e-18
sr2 total diff: 0.0 element max diff: 2.2737367544323206e-13 mean diff: -1.5584119339544106e-19
sr3 total diff: 1.8189894035458565e-12 element max diff: 1.4210854715202004e-14 mean diff: -5.680518175815432e-20
2040


  daily_totals = pd.concat(sums, axis = 1).sum(level=[0], axis = 1).reset_index()


da total diff: -1.1641532182693481e-10 element max diff: 4.547473508864641e-13 mean diff: -1.9080255435230343e-18
sr2 total diff: 0.0 element max diff: 1.1368683772161603e-13 mean diff: -1.5750199082543943e-19
sr3 total diff: 1.8189894035458565e-12 element max diff: 1.4210854715202004e-14 mean diff: -5.60133192217348e-20
2050


  daily_totals = pd.concat(sums, axis = 1).sum(level=[0], axis = 1).reset_index()


da total diff: 0.0 element max diff: 4.547473508864641e-13 mean diff: -1.7922840984714682e-18
sr2 total diff: 0.0 element max diff: 5.684341886080802e-14 mean diff: -2.522202145979051e-19
sr3 total diff: 3.637978807091713e-12 element max diff: 1.1368683772161603e-13 mean diff: -1.1047577906047023e-20
