In [1]:
import openmatrix as omx
import pandas as pd
import numpy as np
import yaml
from utility import *
import warnings
warnings.filterwarnings("ignore")

#### Details
Jobs accessible from people's homes : 
    count of jobs accessible to each zone within isochrone thresholds (both actual and perceived). Weighted average by workforce in origin zone.
    PP metric: yes
    Multi or Single path : Single
    Modes : rail inclusive (Does this mean only rail OD pairs (IVTHWY + IVTCOM > 0) ?)
    Purpose split: No
    Period splits: Yes, average weekday and annual
    Geography: region, zone origin and destination (RDM, Super district, county) : Just the origin zone ?   
    
Non-work destinations accessible from people's homes
    count of Non-work-destinations accessible to each zone within isochrone thresholds (both actual and perceived). Weighted average by population in origin zone.
    PP metric: yes
    Multi or Single path : Single
    Modes : rail inclusive (Does this mean only rail OD pairs (IVTHWY + IVTCOM > 0) ?)
    Purpose split: No
    Period splits: Yes, average weekday and annual
    Geography: region, zone origin and destination (RDM, Super district, county) : Just the origin zone ?       

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']

skims_dir = _join(model_outputs_dir, "skims")
model_year = params['model_year']
ctramp_dir = params['ctramp_dir']
actual_tt_cores = params['total_travel_time']
perceived_tt_cores = params['perceived_travel_time']
time_periods = params['periods']
acc_egr = params['access_egress_modes']

concept_id = params['concept_id']
summary_columns = params['final_columns']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
period = params['periods']
summary_dir = params['summary_dir']

best_path_skim_extension = params['best_path_skim_extension']

annual_transit_factor = params['annual_transit_factor']
annual_auto_factor = params['annual_auto_factor']

filename_extension = params['filename_extension']

In [3]:
#Read data

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

#get geogrpahies and priority population in the same file
geo_pp_cwks = pd.merge(geo_cwks, pp_perc, on = 'taz', how = 'left') 

#transbay od pairs
# transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

#non work destinations tazs
# non_work_tazs = pd.read_excel(_join(params['common_dir'], 'non_work_destinations.xlsx'))
#non_work_tazs = list(non_work_tazs['non_wrk_taz'])

#taz data for empoyments and resindent employments
tazData = pd.read_csv(_join(params['model_dir'], params['zone_file']))
tazDataTotemp = tazData[["ZONE", "TOTEMP", "RETEMPN", "FPSEMPN", "HEREMPN", "OTHEMPN", "AGREMPN", "MWTEMPN"]]
tazDataEmpres = tazData[["ZONE", "EMPRES"]]
tazDataPop = tazData[["ZONE", "TOTPOP"]]

#mat_core = params['connectivity_mat_core']
time_thresholds = params['accessibility_thresholds']

In [4]:
emp_data = pd.read_csv(_join(params['common_dir'], 'EmpBreakdown'+str(model_year)+'.csv'))
emp_data = emp_data[['TAZ', 'link21', 'jobs']]

In [5]:
emp_data = pd.pivot_table(emp_data, values='jobs', index=['TAZ'],
                       columns=['link21']).fillna(0)
emp_data['TOTEMP']= emp_data.sum(axis=1)
emp_data=emp_data.reset_index()
emp_data=emp_data.rename(columns={'TAZ':'ZONE'})

In [6]:
tazDataTotemp = emp_data

In [7]:
tazDataTotemp

link21,ZONE,CONS,FFRE,FIRE,GOVT,HIED,HMED,K12E,MFRG,PERS,RECS,RETL,RETR,SOCS,UTIL,WTWT,TOTEMP
0,1,24.0,3.0,548.0,252.0,6.0,9.0,0.0,23.0,17.0,806.0,2.0,0.0,0.0,1.0,15.0,1706.0
1,2,64.0,0.0,1036.0,0.0,0.0,90.0,489.0,79.0,52.0,39.0,383.0,224.0,58.0,0.0,0.0,2514.0
2,3,508.0,4.0,2321.0,753.0,84.0,888.0,107.0,224.0,243.0,3212.0,944.0,195.0,38.0,0.0,380.0,9901.0
3,4,74.0,1.0,167.5,96.0,106.0,5.0,19.0,5.0,17.0,185.0,40.0,0.0,17.0,0.0,16.0,748.5
4,5,665.0,2.0,517.5,782.0,18.0,16.0,182.0,257.0,556.0,1146.0,58.0,1191.0,6.0,0.0,404.0,5800.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,3328,158.0,0.0,94.0,144.0,6.0,48.0,249.0,78.0,51.0,108.0,54.0,562.0,67.0,0.0,28.0,1647.0
3176,3329,107.0,4.0,239.5,120.0,0.0,366.0,0.0,0.0,22.0,0.0,69.0,173.0,26.0,0.0,107.0,1233.5
3177,3330,113.0,2.0,258.0,28.0,12.0,86.0,8.0,17.0,128.0,198.0,340.0,80.0,48.0,0.0,31.0,1349.0
3178,3331,188.0,10.0,298.5,253.0,40.0,435.0,19.0,34.0,184.0,138.0,402.0,163.0,68.0,7.5,46.0,2286.0


In [8]:
iteration = params['iteration']
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz', 'income'])
hh = hh.rename(columns = {'taz': 'home_zone'})

In [9]:
hh = assign_income_categories(hh)

In [10]:
hh = hh.groupby(['home_zone', 'income_bin'])['hh_id'].count().reset_index()

In [11]:
hh = hh[['home_zone', 'income_bin']]
hh = hh.rename(columns={'income_bin': 'Income'})

In [12]:
#ck = omx.open_file(transit_skim_files[0])
#ck.list_matrices()

all_tod_tt = pd.read_parquet(_join(preprocess_dir, 'tod_min_actual_travel_time.parquet'))
perc_tod_tt = pd.read_parquet(_join(preprocess_dir, 'tod_min_perceived_travel_time.parquet'))

In [13]:
employment_categories = {'TOTEMP': 'Total employment',
                         'CONS': 'Construction employment', 
                         'FFRE': 'Farming, Forestry, Resource Extraction', 
                         'FIRE': 'Finance, Insurance, Real Estate, Professional Services employment', 
                         'GOVT': 'Public Administration/Government employment',
                         'HIED': 'Higher Education and Educational Services employment',
                         'HMED': 'Health and Medical Services employment',
                         'K12E': 'K12 Education employment',
                         'MFRG': 'Manufacturing employment',
                         'PERS': 'Personal and Repair Services employment',
                         'RECS': 'Restaurants, Hotels/Motels, Recreation Services employment',
                         'RETL': 'Local Serving Retail (*NAICS identical to TM2.1) employment',
                         'RETR': 'Regional Retail (*NAICS identical to TM2.1) employment',
                         'SOCS': 'Social Services employment',
                         'UTIL': 'Utilities employment',
                         'WTWT': 'Wholesale Trade, Warehousing, Transportation employment'}

In [14]:
#employment_categories = {'TOTEMP': 'Total employment'}

In [15]:
#employment_categories = {'TOTEMP': 'Total employment'}
#time_periods = ['am']

In [16]:
for emp_cat in employment_categories:
    print(emp_cat, employment_categories[emp_cat])

TOTEMP Total employment
CONS Construction employment
FFRE Farming, Forestry, Resource Extraction
FIRE Finance, Insurance, Real Estate, Professional Services employment
GOVT Public Administration/Government employment
HIED Higher Education and Educational Services employment
HMED Health and Medical Services employment
K12E K12 Education employment
MFRG Manufacturing employment
PERS Personal and Repair Services employment
RECS Restaurants, Hotels/Motels, Recreation Services employment
RETL Local Serving Retail (*NAICS identical to TM2.1) employment
RETR Regional Retail (*NAICS identical to TM2.1) employment
SOCS Social Services employment
UTIL Utilities employment
WTWT Wholesale Trade, Warehousing, Transportation employment


In [17]:
#geo_pp_cwks

In [18]:
def get_accessibility_jobs(all_tod_tt, time_periods, time_thresholds, geo_pp_cwks, tt_verbose):

    acc_jobs = []
    acc_jobs_nwd = []
    df = []
    acc_jobs_pp = []
    acc_jobs_rdm = []
    acc_jobs_sd = []
    acc_jobs_cnty = []
    acc_jobs_rdm_pp = []
    
    for period in time_periods:
        
            
        for emp_cat in employment_categories:
            print(period, emp_cat)
            emp = employment_categories[emp_cat]

            timedaData = all_tod_tt[[period]].reset_index()
            #print(timedaData.columns)
            #timedaData = timedaData.fillna(0)
            timedaData[period] = timedaData[period]/100

            #total employment based on destination
            od_data = pd.merge(timedaData, tazDataTotemp, left_on='dest', right_on='ZONE', how = 'left')
            #print(od_data.columns)
            
            
            #for time thresholds - currently set in config files
            for threshold in time_thresholds:
                # print(f'processing {transit_file} for time threshold {threshold}')
                # create data for all destinations
                oddata_min = od_data.loc[od_data[period] <= threshold]
                oddata_min[emp_cat] = oddata_min[emp_cat].fillna(0)
                od_data_inc = pd.merge(oddata_min, hh, left_on='orig', right_on='home_zone', how = 'left')
                
                oddata_min_inc = od_data_inc.groupby(['orig', 'Income'])[emp_cat].sum().reset_index()
                
                # adding employed residents
                oddata_min_reg_inc = pd.merge(oddata_min_inc, tazDataEmpres, left_on= ['orig'], right_on =['ZONE'], how ='left')

                # regional 
                oddata_min_reg_inc = oddata_min_reg_inc.loc[oddata_min_reg_inc['EMPRES']>0]
                oddata_min_reg_inc['wt_empres'] = oddata_min_reg_inc[emp_cat] * oddata_min_reg_inc['EMPRES']
                
                df_temp_region = oddata_min_reg_inc.groupby(['Income'])['wt_empres','EMPRES'].sum().reset_index()
                df_temp_region['Value'] =  df_temp_region['wt_empres']/df_temp_region['EMPRES']                
                
                df_temp_region['Description'] = f"{emp} accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                df_temp_region['Period'] = period
                df_temp_region['Geography'] = 'Regional'
                df_temp_region['Population'] = 'Whole Population'
                df_temp_region['Zone_ID'] = 'Region'
                df_temp_region = df_temp_region[['Description', 'Population', 'Period', 'Income',
                                                 'Geography', 'Zone_ID', 'Value']]
                df_temp_region['Submetric'] = 'B1.1.1'
                df_temp_region['Total_Increment'] = ''
                
                
                # regional 
                oddata_min_all = oddata_min.groupby(['orig'])[emp_cat].sum().reset_index()
                oddata_min_all = pd.merge(oddata_min_all, tazDataEmpres, left_on= ['orig'], right_on =['ZONE'], how ='left')
                oddata_min_all = oddata_min_all.loc[oddata_min_all['EMPRES']>0]
                oddata_min_all['wt_empres'] = oddata_min_all[emp_cat] * oddata_min_all['EMPRES']
                job_acc_min_value = oddata_min_all['wt_empres'].sum()/oddata_min_all['EMPRES'].sum()

                #acc_jobs.append(job_acc_min)
                df_temp_region_all = pd.DataFrame({'Description': f"{emp} accessible within " + tt_verbose + 
                                                   " " + str(threshold) + " minutes",
                                                'Period': period,
                                                'Geography': 'Regional',
                                                'Population': 'Whole Population',
                                                'Zone_ID': 'Region',
                                                'Income': 'All',
                                                'Submetric': 'B1.1.1',
                                                'Total_Increment': '',
                                                'Value': job_acc_min_value}, index=[0])
                
                df_temp_region = pd.concat([df_temp_region, df_temp_region_all])
                df.append(df_temp_region)                

                # adding priority population and geographies
                oddata_min_geo = pd.merge(oddata_min, geo_pp_cwks, left_on= ['orig'], right_on =['taz'], how ='left')
                oddata_min_geo = pd.merge(oddata_min_geo, tazDataEmpres, left_on= ['orig'], right_on =['ZONE'], how ='left')
                oddata_min_geo = oddata_min_geo.loc[oddata_min_geo['EMPRES']>0]
                oddata_min_geo['wt_empres'] = oddata_min_geo[emp_cat] * oddata_min_geo['EMPRES']
                
                oddata_min_geo_inc = pd.merge(oddata_min_inc, geo_pp_cwks, left_on= ['orig'], right_on =['taz'], how ='left')
                oddata_min_geo_inc = pd.merge(oddata_min_geo_inc, tazDataEmpres, left_on= ['orig'], right_on =['ZONE'], how ='left')
                oddata_min_geo_inc = oddata_min_geo_inc.loc[oddata_min_geo_inc['EMPRES']>0]
                oddata_min_geo_inc['wt_empres'] = oddata_min_geo_inc[emp_cat] * oddata_min_geo_inc['EMPRES']

                
                 # for county
                #if 'county' in geo_pp_cwks.columns:
                oddata_min_cnty = oddata_min_geo.groupby(['county'])['wt_empres','EMPRES'].sum().reset_index()
                oddata_min_cnty['Value'] =  oddata_min_cnty['wt_empres']/oddata_min_cnty['EMPRES']
                oddata_min_cnty['Description'] = f"{emp} accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                oddata_min_cnty['Period'] = period
                oddata_min_cnty['Geography'] = 'County'
                oddata_min_cnty['Population'] = 'Whole Population'
                oddata_min_cnty = oddata_min_cnty.rename(columns={'county': 'Zone_ID'})
                oddata_min_cnty = oddata_min_cnty[['Description', 'Population', 'Period',
                                                 'Geography', 'Zone_ID', 'Value']]   
                oddata_min_cnty['Submetric'] = 'B1.1.2'
                oddata_min_cnty['Income'] = ''
                oddata_min_cnty['Total_Increment'] = ''
                acc_jobs_cnty.append(oddata_min_cnty)

                # for RDM zones
                #if 'rdm_zones' in geo_pp_cwks.columns:
                oddata_min_rdm = oddata_min_geo_inc.groupby(['rdm_zones', 'Income'])['wt_empres','EMPRES'].sum().reset_index()
                oddata_min_rdm['Value'] =  oddata_min_rdm['wt_empres']/oddata_min_rdm['EMPRES']
                oddata_min_rdm['Description'] = f"{emp} accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                oddata_min_rdm['Period'] = period
                oddata_min_rdm['Geography'] = 'RDM'
                oddata_min_rdm['Population'] = 'Whole Population'
                oddata_min_rdm = oddata_min_rdm.rename(columns={'rdm_zones' : 'Zone_ID'})
                oddata_min_rdm = oddata_min_rdm[['Description', 'Population', 'Period','Income',
                                                 'Geography', 'Zone_ID', 'Value']]
                oddata_min_rdm['Submetric'] = 'B1.1.3'
                oddata_min_rdm['Total_Increment'] = ''
                acc_jobs_rdm.append(oddata_min_rdm)
                
                oddata_min_rdm_all = oddata_min_geo.groupby(['rdm_zones'])['wt_empres','EMPRES'].sum().reset_index()
                oddata_min_rdm_all['Value'] =  oddata_min_rdm_all['wt_empres']/oddata_min_rdm_all['EMPRES']
                oddata_min_rdm_all['Description'] = f"{emp} accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                oddata_min_rdm_all['Period'] = period
                oddata_min_rdm_all['Geography'] = 'RDM'
                oddata_min_rdm_all['Population'] = 'Whole Population'
                oddata_min_rdm_all.rename(columns={'rdm_zones' : 'Zone_ID'}, inplace=True)
                oddata_min_rdm_all['Income'] = 'All'
                oddata_min_rdm_all = oddata_min_rdm_all[['Description', 'Population', 'Period', 'Income',
                                                 'Geography', 'Zone_ID', 'Value']]
                oddata_min_rdm_all['Submetric'] = 'B1.1.3'
                oddata_min_rdm_all['Total_Increment'] = ''
                acc_jobs_rdm.append(oddata_min_rdm_all)
                
                
                # for superdistrict
                #if 'super_district' in geo_pp_cwks.columns:
                oddata_min_sd = oddata_min_geo.groupby(['super_district'])['wt_empres','EMPRES'].sum().reset_index()
                oddata_min_sd['Value'] =  oddata_min_sd['wt_empres']/oddata_min_sd['EMPRES']
                oddata_min_sd['Description'] = f"{emp} accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                oddata_min_sd['Period'] = period
                oddata_min_sd['Population'] = 'Whole Population'
                oddata_min_sd['Geography'] = 'Super district'
                oddata_min_sd.rename(columns={'super_district': 'Zone_ID'}, inplace=True)
                oddata_min_sd = oddata_min_sd[['Description', 'Population', 'Period',
                                                 'Geography', 'Zone_ID', 'Value']]
                oddata_min_sd['Income'] = ''
                oddata_min_sd['Submetric'] = 'B1.1.4'
                oddata_min_sd['Total_Increment'] = ''
                acc_jobs_sd.append(oddata_min_sd)

                # for prioirty population
                #if 'pp_share' in geo_pp_cwks.columns:
                oddata_min_geo['priority_population'] = oddata_min_geo['pp_share'].apply(lambda x: 1 if x > 0 else 0)
                oddata_min_geo['wt_empres_pp'] = oddata_min_geo[emp_cat] * oddata_min_geo['EMPRES'] * oddata_min_geo['pp_share']/100
                oddata_min_geo['EMPRES_pp'] = oddata_min_geo['EMPRES'] * oddata_min_geo['pp_share']/100
                oddata_min_pp = oddata_min_geo[oddata_min_geo['priority_population']==1]
                oddata_min_pp = oddata_min_pp.groupby(['priority_population'])['wt_empres_pp','EMPRES_pp'].sum().reset_index()
                oddata_min_pp['Value'] = oddata_min_pp['wt_empres_pp']/oddata_min_pp['EMPRES_pp']
                oddata_min_pp = oddata_min_pp[['priority_population', 'Value']]

                oddata_min_comb = pd.concat([oddata_min_pp], ignore_index=False)
                oddata_min_comb = oddata_min_comb.drop(columns=['priority_population'])
                oddata_min_comb['Description'] = f"{emp} accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                oddata_min_comb['Period'] = period             
                oddata_min_comb['Population'] = 'Priority population'
                oddata_min_comb['Geography'] = 'Region'
                oddata_min_comb['Zone_ID'] = ''
                oddata_min_comb = oddata_min_comb[['Description', 'Population', 'Period', 
                                                 'Geography', 'Zone_ID', 'Value']]
                oddata_min_comb['Income'] = ''
                oddata_min_comb['Submetric'] = 'B1.1.5'
                oddata_min_comb['Total_Increment'] = ''
                acc_jobs_pp.append(oddata_min_comb)
                
                # RDM Zone Prioirty Population
                oddata_min_geo_inc['priority_population'] = oddata_min_geo_inc['pp_share'].apply(lambda x: 1 if x > 0 else 0) 
                oddata_min_geo_inc['wt_empres_pp'] = oddata_min_geo_inc[emp_cat] * oddata_min_geo_inc['EMPRES'] * oddata_min_geo_inc['pp_share']/100
                oddata_min_geo_inc['EMPRES_pp'] = oddata_min_geo_inc['EMPRES'] * oddata_min_geo_inc['pp_share']/100
                
                oddata_min_pp = oddata_min_geo_inc[oddata_min_geo_inc['priority_population']==1]
                oddata_min_pp_rdm = oddata_min_pp.groupby(['rdm_zones', 'Income'])['wt_empres_pp','EMPRES_pp'].sum().reset_index()
                oddata_min_pp_rdm['Value'] = oddata_min_pp_rdm['wt_empres_pp']/oddata_min_pp_rdm['EMPRES_pp']
                oddata_min_pp_rdm = oddata_min_pp_rdm[['rdm_zones', 'Income', 'Value']]
                oddata_min_pp_rdm.rename(columns={'rdm_zones' : 'Zone_ID'}, inplace=True)
                oddata_min_pp_rdm['Description'] = f"{emp} accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                oddata_min_pp_rdm['Period'] = period             
                oddata_min_pp_rdm['Population'] = 'Priority population'
                oddata_min_pp_rdm['Geography'] = 'RDM'
                oddata_min_pp_rdm = oddata_min_pp_rdm[['Description', 'Population', 'Period', 'Income',
                                                 'Geography', 'Zone_ID', 'Value']]
                oddata_min_pp_rdm['Submetric'] = 'B1.1.6'
                oddata_min_pp_rdm['Total_Increment'] = ''
                acc_jobs_rdm_pp.append(oddata_min_pp_rdm)
                
                oddata_min_pp_rdm_all = oddata_min_pp.groupby(['rdm_zones'])['wt_empres_pp','EMPRES_pp'].sum().reset_index()
                oddata_min_pp_rdm_all['Value'] = oddata_min_pp_rdm_all['wt_empres_pp']/oddata_min_pp_rdm_all['EMPRES_pp']
                oddata_min_pp_rdm_all = oddata_min_pp_rdm_all[['rdm_zones', 'Value']]
                oddata_min_pp_rdm_all.rename(columns={'rdm_zones' : 'Zone_ID'}, inplace=True)
                oddata_min_pp_rdm_all['Description'] = f"{emp} accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                oddata_min_pp_rdm_all['Period'] = period             
                oddata_min_pp_rdm_all['Population'] = 'Priority population'
                oddata_min_pp_rdm_all['Geography'] = 'RDM'
                oddata_min_pp_rdm_all = oddata_min_pp_rdm_all[['Description', 'Population', 'Period',
                                                 'Geography', 'Zone_ID', 'Value']]
                oddata_min_pp_rdm_all['Income'] = 'All'
                oddata_min_pp_rdm_all['Submetric'] = 'B1.1.6'
                oddata_min_pp_rdm_all['Total_Increment'] = ''
                acc_jobs_rdm_pp.append(oddata_min_pp_rdm_all)
                
            
                
                #break
            #break
        #break
            
    df_region = pd.concat(df).reset_index(drop=True)
    df_pp = pd.concat(acc_jobs_pp).reset_index(drop=True)
    df_rdm = pd.concat(acc_jobs_rdm).reset_index(drop=True)
    df_rdm = df_rdm.sort_values(by=['Zone_ID', 'Period', 'Description', 'Income'])
    df_sd = pd.concat(acc_jobs_sd).reset_index(drop=True)
    df_cnty = pd.concat(acc_jobs_cnty).reset_index(drop=True)
    df_rdm_pp = pd.concat(acc_jobs_rdm_pp).reset_index(drop=True)
    df_rdm_pp = df_rdm_pp.sort_values(by=['Zone_ID', 'Period', 'Description', 'Income'])
    
    return df_region, df_pp, df_rdm, df_sd, df_cnty, df_rdm_pp

In [19]:
df_region_act, df_pp_act, df_rdm_act, df_sd_act, df_cnty_act, df_rdm_pp_act = get_accessibility_jobs(all_tod_tt, time_periods, time_thresholds, 
                                                                  geo_pp_cwks, "actual")

am TOTEMP
am CONS
am FFRE
am FIRE
am GOVT
am HIED
am HMED
am K12E
am MFRG
am PERS
am RECS
am RETL
am RETR
am SOCS
am UTIL
am WTWT
md TOTEMP
md CONS
md FFRE
md FIRE
md GOVT
md HIED
md HMED
md K12E
md MFRG
md PERS
md RECS
md RETL
md RETR
md SOCS
md UTIL
md WTWT
pm TOTEMP
pm CONS
pm FFRE
pm FIRE
pm GOVT
pm HIED
pm HMED
pm K12E
pm MFRG
pm PERS
pm RECS
pm RETL
pm RETR
pm SOCS
pm UTIL
pm WTWT
ev TOTEMP
ev CONS
ev FFRE
ev FIRE
ev GOVT
ev HIED
ev HMED
ev K12E
ev MFRG
ev PERS
ev RECS
ev RETL
ev RETR
ev SOCS
ev UTIL
ev WTWT
ea TOTEMP
ea CONS
ea FFRE
ea FIRE
ea GOVT
ea HIED
ea HMED
ea K12E
ea MFRG
ea PERS
ea RECS
ea RETL
ea RETR
ea SOCS
ea UTIL
ea WTWT


In [20]:
#df_region_act.to_csv("b111_test.csv")

In [21]:
#df_rdm_pp_act

In [22]:
df_region_perc, df_pp_perc, df_rdm_perc, df_sd_perc, df_cnty_perc, df_rdm_pp_perc = get_accessibility_jobs(perc_tod_tt, time_periods, time_thresholds, 
                                                                  geo_pp_cwks, "perceived")

am TOTEMP
am CONS
am FFRE
am FIRE
am GOVT
am HIED
am HMED
am K12E
am MFRG
am PERS
am RECS
am RETL
am RETR
am SOCS
am UTIL
am WTWT
md TOTEMP
md CONS
md FFRE
md FIRE
md GOVT
md HIED
md HMED
md K12E
md MFRG
md PERS
md RECS
md RETL
md RETR
md SOCS
md UTIL
md WTWT
pm TOTEMP
pm CONS
pm FFRE
pm FIRE
pm GOVT
pm HIED
pm HMED
pm K12E
pm MFRG
pm PERS
pm RECS
pm RETL
pm RETR
pm SOCS
pm UTIL
pm WTWT
ev TOTEMP
ev CONS
ev FFRE
ev FIRE
ev GOVT
ev HIED
ev HMED
ev K12E
ev MFRG
ev PERS
ev RECS
ev RETL
ev RETR
ev SOCS
ev UTIL
ev WTWT
ea TOTEMP
ea CONS
ea FFRE
ea FIRE
ea GOVT
ea HIED
ea HMED
ea K12E
ea MFRG
ea PERS
ea RECS
ea RETL
ea RETR
ea SOCS
ea UTIL
ea WTWT


In [23]:
decimals = 0  
#df['column'] = df['column'].apply(lambda x: round(x, decimals))

In [24]:
comb_df_act = pd.concat([df_region_act, df_pp_act, df_rdm_act, df_sd_act, df_cnty_act, df_rdm_pp_act], ignore_index=True)
comb_df_act['Concept_ID'] = concept_id
comb_df_act['Metric_ID'] = 'B1.1'
comb_df_act['Metric_name'] = 'Jobs accessible from peoples homes - actual travel time'
comb_df_act['Origin_zone'] = ''
comb_df_act['Dest_zone'] = ''
comb_df_act['Purpose'] = ''
comb_df_act['Mode'] = ''
#comb_df_act['Income'] = ''
comb_df_act['Units'] = 'Jobs'
comb_df_act['Value'] = comb_df_act['Value'].apply(lambda x: round(x, decimals))
comb_df_act = comb_df_act[summary_columns]

In [25]:
unique_medtric_ids = comb_df_act['Submetric'].unique()
for mids in unique_medtric_ids:
    metric_name = '_jobs_accessible_from_home_actual_time_'
    dfs = comb_df_act.loc[comb_df_act['Submetric']==mids]
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = mids
    geography = '_' + dfs['Geography'][0].replace(' ', '_')
    dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Value'].sum())

comb_df_act.to_csv(_join(summary_dir, 'B1.1'+'_jobs_accessible_from_home_actual_time_'  + concept_id + '_region' +filename_extension + '.csv'), index=None)

1600 B1.1.1 118343488.0
320 B1.1.5 57767.0
345664 B1.1.3 23513299051.0
8208 B1.1.4 1671459.0
2000 B1.1.2 408550.0
164784 B1.1.6 15027603624.0


In [26]:
comb_df_perc = pd.concat([df_region_perc, df_pp_perc, df_rdm_perc, df_sd_perc, df_cnty_perc, df_rdm_pp_perc], ignore_index=True)
comb_df_perc['Concept_ID'] = concept_id
comb_df_perc['Metric_ID'] = 'B1.1'
comb_df_perc['Metric_name'] = 'Jobs accessible from peoples homes - perceived travel time'
comb_df_perc['Origin_zone'] = ''
comb_df_perc['Dest_zone'] = ''
comb_df_perc['Purpose'] = ''
comb_df_perc['Mode'] = ''
#comb_df_perc['Income'] = ''
comb_df_perc['Units'] = 'Jobs'
comb_df_perc['Value'] = comb_df_perc['Value'].apply(lambda x: round(x, decimals))
comb_df_perc = comb_df_perc[summary_columns]

In [27]:
unique_medtric_ids = comb_df_perc['Submetric'].unique()
for mids in unique_medtric_ids:
    metric_name = '_jobs_accessible_from_home_perceived_time_'
    dfs = comb_df_perc.loc[comb_df_perc['Submetric']==mids]
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = mids
    geography = '_' + dfs['Geography'][0].replace(' ', '_')
    dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Value'].sum())

comb_df_perc.to_csv(_join(summary_dir, 'B1.1' + metric_name + concept_id + '_region' + filename_extension + '.csv'), index=None)

1600 B1.1.1 77229243.0
320 B1.1.5 71266.0
325920 B1.1.3 15782154325.0
7952 B1.1.4 2074462.0
2032 B1.1.2 532439.0
159200 B1.1.6 10302644230.0


def get_non_work_destionations(all_tod_tt, time_periods, time_thresholds, geo_pp_cwks, non_work_dest_tazs, tt_verbose):

    acc_jobs = []
    df = []
    nwd_acc_min = []
    nwd_jobs_pp = []
    nwd_jobs_rdm = []
    nwd_jobs_sd = []
    nwd_jobs_cnty = []
    
    for period in time_periods:
        
        if period in all_tod_tt.columns:

            timedaData = all_tod_tt[[period]].reset_index()
            print(timedaData.columns)
            timedaData = timedaData.fillna(0)
            timedaData = timedaData.loc[timedaData[period]>0]
            timedaData[period] = timedaData[period]/100

            #total non work destinations based on destination
            od_data = pd.merge(timedaData, non_work_dest_tazs, left_on='dest', right_on='taz', how = 'inner')
            od_data.isna().sum()

            #for time thresholds - currently set in config files
            for threshold in time_thresholds:
                # print(f'processing {transit_file} for time threshold {threshold}')
                # create data for all destinations
                oddata_min = od_data.loc[od_data[period] <= threshold]
                oddata_min = oddata_min.groupby(['orig'])['non_work_dest'].sum().reset_index()

                # adding employed residents
                oddata_min = pd.merge(oddata_min, tazDataPop, left_on= ['orig'], right_on =['ZONE'], how ='left')

                # regional 
                oddata_min['wt_pop'] = oddata_min['non_work_dest'] * oddata_min['TOTPOP']
                nwd_acc_min = oddata_min['wt_pop'].sum()/oddata_min['non_work_dest'].sum()
                #acc_jobs.append(job_acc_min)

                df_temp_region = pd.DataFrame({'Description': "Non-work destinations accessible within " + tt_verbose + " " + str(threshold) + " minutes",
                                               'Population': 'Whole Population',
                                               'Period': period,
                                               'Geography': 'Regional total',
                                               'Zone_ID': 'Region',
                                               'Submetric': 'B1.3.1',
                                               'Total_Increment': '',
                                               'Value': nwd_acc_min}, index=[0])
                
                
                df.append(df_temp_region)

                # adding priority population and geographies
                oddata_min = pd.merge(oddata_min, geo_pp_cwks, left_on= ['orig'], right_on =['taz'], how ='left')

                # for prioirty population
                if 'pp_share' in geo_pp_cwks.columns:
                    oddata_min['priority_population'] = oddata_min['pp_share'].apply(lambda x: 1 if x > 0 else 0)
                    oddata_min['wt_pop_pp'] = oddata_min['non_work_dest'] * oddata_min['TOTPOP'] * oddata_min['pp_share']/100
                    oddata_min['pop_pp'] = oddata_min['TOTPOP'] * oddata_min['pp_share']/100
                    oddata_min_pp = oddata_min[oddata_min['priority_population']==1]
                    oddata_min_nonpp = oddata_min[oddata_min['priority_population']==0]
                    oddata_min_pp = oddata_min_pp.groupby(['priority_population'])['wt_pop_pp','pop_pp'].sum().reset_index()
                    oddata_min_pp['Value'] = oddata_min_pp['wt_pop_pp']/oddata_min_pp['pop_pp']
                    oddata_min_pp = oddata_min_pp[['priority_population', 'Value']]

                    oddata_min_nonpp = oddata_min_nonpp.groupby(['priority_population'])['wt_pop','TOTPOP'].sum().reset_index()
                    oddata_min_nonpp['Value'] = oddata_min_nonpp['wt_pop']/oddata_min_nonpp['TOTPOP']
                    oddata_min_nonpp = oddata_min_nonpp[['priority_population', 'Value']]

                    oddata_min_comb = pd.concat([oddata_min_pp, oddata_min_nonpp], ignore_index=False)
                    oddata_min_comb = oddata_min_comb.loc[oddata_min_comb['priority_population'] == 1]
                    oddata_min_comb = oddata_min_comb.drop(columns=['priority_population'])
                    oddata_min_comb['Description'] = "Non-work destinations accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                    oddata_min_comb['Period'] = period
                    oddata_min_comb['Population'] = 'Priority population'
                    oddata_min_comb['Geography'] = 'Regional total'
                    oddata_min_comb['Zone_ID'] = 'Region'
                    oddata_min_comb = oddata_min_comb[['Description', 'Population', 'Period', 
                                                     'Geography', 'Zone_ID', 'Value']]
                    oddata_min_comb['Submetric'] = 'B1.3.2'
                    oddata_min_comb['Total_Increment'] = ''
                    nwd_jobs_pp.append(oddata_min_comb)

                # for RDM zones
                if 'rdm_zones' in geo_pp_cwks.columns:
                    oddata_min_rdm = oddata_min.groupby(['rdm_zones'])['wt_pop','TOTPOP'].sum().reset_index()
                    oddata_min_rdm['Value'] =  oddata_min_rdm['wt_pop']/oddata_min_rdm['TOTPOP']
                    oddata_min_rdm['Description'] = "Non-work destinations accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                    oddata_min_rdm['Period'] = period
                    oddata_min_rdm['Geography'] = 'RDM'
                    oddata_min_rdm['Population'] = 'Whole Population'
                    oddata_min_rdm.rename(columns={'rdm_zones' : 'Zone_ID'}, inplace=True)
                    oddata_min_rdm = oddata_min_rdm[['Description', 'Population', 'Period',
                                                     'Geography', 'Zone_ID', 'Value']]
                    oddata_min_rdm['Submetric'] = 'B1.3.3'
                    oddata_min_rdm['Total_Increment'] = ''
                    nwd_jobs_rdm.append(oddata_min_rdm)

                # for superdistrict
                if 'super_district' in geo_pp_cwks.columns:
                    oddata_min_sd = oddata_min.groupby(['super_district'])['wt_pop','TOTPOP'].sum().reset_index()
                    oddata_min_sd['Value'] =  oddata_min_sd['wt_pop']/oddata_min_sd['TOTPOP']
                    oddata_min_sd['Description'] = "Non-work destinations accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                    oddata_min_sd['Period'] = period
                    oddata_min_sd['Population'] = 'Whole Population'
                    oddata_min_sd['Geography'] = 'Superdistrict'
                    oddata_min_sd.rename(columns={'super_district': 'Zone_ID'}, inplace=True)
                    oddata_min_sd = oddata_min_sd[['Description', 'Population', 'Period',
                                                     'Geography', 'Zone_ID', 'Value']]
                    oddata_min_sd['Submetric'] = 'B1.3.4'
                    oddata_min_sd['Total_Increment'] = ''
                    nwd_jobs_sd.append(oddata_min_sd)

                # for county
                if 'county' in geo_pp_cwks.columns:
                    oddata_min_cnty = oddata_min.groupby(['county'])['wt_pop','TOTPOP'].sum().reset_index()
                    oddata_min_cnty['Value'] =  oddata_min_cnty['wt_pop']/oddata_min_cnty['TOTPOP']
                    oddata_min_cnty['Description'] = "Non-work destinations accessible within " + tt_verbose + " " + str(threshold) + " minutes"
                    oddata_min_cnty['Period'] = period
                    oddata_min_cnty['Geography'] = 'County'
                    oddata_min_cnty['Population'] = 'Whole Population'
                    oddata_min_cnty.rename(columns={'county': 'Zone_ID'}, inplace=True)
                    oddata_min_cnty = oddata_min_cnty[['Description', 'Population', 'Period',
                                                     'Geography', 'Zone_ID', 'Value']]   
                    oddata_min_cnty['Submetric'] = 'B1.3.5'
                    oddata_min_cnty['Total_Increment'] = ''
                    nwd_jobs_cnty.append(oddata_min_cnty)
                        
        else:
            print(f'{period} travel time doesn\'t exist.')
            
    df_region = pd.concat(df).reset_index(drop=True)
    df_pp = pd.concat(nwd_jobs_pp).reset_index(drop=True)
    df_rdm = pd.concat(nwd_jobs_rdm).reset_index(drop=True)
    df_sd = pd.concat(nwd_jobs_sd).reset_index(drop=True)
    df_cnty = pd.concat(nwd_jobs_cnty).reset_index(drop=True)
    
    return df_region, df_pp, df_rdm, df_sd, df_cnty

df_region, df_pp, df_rdm, df_sd, df_cnty = get_non_work_destionations(all_tod_tt, time_periods, time_thresholds,
                                                                      geo_pp_cwks, non_work_tazs, "actual")

comb_df = pd.concat([df_region, df_pp, df_rdm, df_sd, df_cnty], ignore_index=True)

comb_df['Concept_ID'] = concept_id
comb_df['Metric_ID'] = 'B1.3'
comb_df['Metric_name'] = 'Non-work destinations accessible from people\'s homes'
comb_df['Origin_zone'] = ''
comb_df['Dest_zone'] = ''
comb_df['Purpose'] = ''
comb_df['Units'] = 'Destinations'
comb_df['Value'] = comb_df['Value'].apply(lambda x: round(x, decimals))

comb_df_nwd_act = comb_df[summary_columns]

df_region, df_pp, df_rdm, df_sd, df_cnty = get_non_work_destionations(perc_tod_tt, time_periods, time_thresholds,
                                                                      geo_pp_cwks, non_work_tazs, "perceived")

comb_df = pd.concat([df_region, df_pp, df_rdm, df_sd, df_cnty], ignore_index=True)

comb_df['Concept_ID'] = concept_id
comb_df['Metric_ID'] = 'B1.3'
comb_df['Metric_name'] = 'Non-work destinations accessible from people\'s homes'
comb_df['Origin_zone'] = ''
comb_df['Dest_zone'] = ''
comb_df['Purpose'] = ''
comb_df['Units'] = 'Destinations'
comb_df['Value'] = comb_df['Value'].apply(lambda x: round(x, decimals))

comb_df_nwd_perc = comb_df[summary_columns]

comb_df_nwd = pd.concat([comb_df_nwd_act, comb_df_nwd_perc], ignore_index=True)

In [28]:
#comb_df = pd.concat([comb_df_acc, comb_df_nwd], ignore_index=True)
#comb_df = comb_df[summary_columns]

In [29]:
#with pd.ExcelWriter(os.path.join(summary_outputs, 'concept-BY15.xlsx'), engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
#    comb_df_acc.to_excel(writer, sheet_name='B1.1', startcol=0, index=False)
#    comb_df_nwd.to_excel(writer, sheet_name='B1.3', startcol=0, index=False)

## Archive

def get_accessibility_jobs(transit_skim_files, mat_core, geo_pp_cwks, non_work_dest_tazs):

    acc_jobs = []
    acc_jobs_nwd = []
    df = []
    acc_jobs_pp = []
    acc_jobs_rdm = []
    acc_jobs_sd = []
    acc_jobs_cnty = []
    
    #for 25 transit skims files - this includes time periods
    for transit_file in transit_skim_files:
        
        transit_file_name = os.path.split(transit_file)[-1]
        
        for core in mat_core:
            # od data from skims
            timedaData = convertMat2Df(transit_file, core)
            timedaData = timedaData.loc[timedaData[core]>0]
            timedaData[core] = timedaData[core]/100
              
            #total employment based on destination
            od_data = pd.merge(timedaData, tazDataTotemp, left_on='dest', right_on='ZONE', how = 'left')
            od_data.columns

            #for time thresholds - currently set in config files
            for threshold in time_thresholds:
                print(f'processing {transit_file} for time threshold {threshold}')
                # create data for all destinations
                oddata_min = od_data.loc[od_data[core] <= threshold]
                oddata_min = oddata_min.groupby(['orig'])['TOTEMP'].sum().reset_index()

                # adding employed residents
                oddata_min = pd.merge(oddata_min, tazDataEmpres, left_on= ['orig'], right_on =['ZONE'], how ='left')

                # regional 
                oddata_min['wt_empres'] = oddata_min['TOTEMP'] * oddata_min['EMPRES']
                job_acc_min = oddata_min['wt_empres'].sum()/oddata_min['EMPRES'].sum()
                #acc_jobs.append(job_acc_min)
                
                df_temp_region = pd.DataFrame({'transit_file': os.path.split(transit_file)[-1], 'time_threshold': threshold, 'jobs_from_home': job_acc_min, 'time': core}, index=[0])
                df.append(df_temp_region)

                # adding priority population and geographies
                oddata_min = pd.merge(oddata_min, geo_pp_cwks, left_on= ['orig'], right_on =['taz'], how ='left')
                
                # for prioirty population
                if 'pp_share' in geo_pp_cwks.columns:
                    oddata_min['priority_population'] = oddata_min['pp_share'].apply(lambda x: 1 if x > 0 else 0)
                    oddata_min['wt_empres_pp'] = oddata_min['TOTEMP'] * oddata_min['EMPRES'] * oddata_min['pp_share']/100
                    oddata_min['EMPRES_pp'] = oddata_min['EMPRES'] * oddata_min['pp_share']/100
                    oddata_min_pp = oddata_min[oddata_min['priority_population']==1]
                    oddata_min_nonpp = oddata_min[oddata_min['priority_population']==0]
                    oddata_min_pp = oddata_min_pp.groupby(['priority_population'])['wt_empres_pp','EMPRES_pp'].sum().reset_index()
                    oddata_min_pp['jobs_from_home'] = oddata_min_pp['wt_empres_pp']/oddata_min_pp['EMPRES_pp']
                    oddata_min_pp = oddata_min_pp[['priority_population', 'jobs_from_home']]

                    oddata_min_nonpp = oddata_min_nonpp.groupby(['priority_population'])['wt_empres','EMPRES'].sum().reset_index()
                    oddata_min_nonpp['jobs_from_home'] = oddata_min_nonpp['wt_empres']/oddata_min_nonpp['EMPRES']
                    oddata_min_nonpp = oddata_min_nonpp[['priority_population', 'jobs_from_home']]

                    oddata_min_comb = pd.concat([oddata_min_pp, oddata_min_nonpp], ignore_index=False)
                    oddata_min_comb['time_threshold'] = threshold
                    oddata_min_comb['transit_file'] = transit_file_name
                    oddata_min_comb['time'] = core
                    acc_jobs_pp.append(oddata_min_comb)

                # for RDM zones
                if 'rdm_zones' in geo_pp_cwks.columns:
                    oddata_min_rdm = oddata_min.groupby(['rdm_zones'])['wt_empres','EMPRES'].sum().reset_index()
                    oddata_min_rdm['jobs_from_home'] =  oddata_min_rdm['wt_empres']/oddata_min_rdm['EMPRES']
                    oddata_min_rdm['time_threshold'] = threshold
                    oddata_min_rdm['transit_file'] = transit_file_name
                    oddata_min_rdm['time'] = core
                    oddata_min_rdm = oddata_min_rdm[['rdm_zones', 'jobs_from_home', 'time_threshold', 'transit_file', 'time']] 
                    acc_jobs_rdm.append(oddata_min_rdm)

                # for superdistrict
                if 'super_district' in geo_pp_cwks.columns:
                    oddata_min_sd = oddata_min.groupby(['super_district'])['wt_empres','EMPRES'].sum().reset_index()
                    oddata_min_sd['jobs_from_home'] =  oddata_min_sd['wt_empres']/oddata_min_sd['EMPRES']
                    oddata_min_sd['time_threshold'] = threshold
                    oddata_min_sd['transit_file'] = transit_file_name
                    oddata_min_sd['time'] = core
                    oddata_min_sd = oddata_min_sd[['super_district', 'jobs_from_home', 'time_threshold', 'transit_file', 'time']] 
                    acc_jobs_sd.append(oddata_min_sd)

                # for county
                if 'county' in geo_pp_cwks.columns:
                    oddata_min_cnty = oddata_min.groupby(['county'])['wt_empres','EMPRES'].sum().reset_index()
                    oddata_min_cnty['jobs_from_home'] =  oddata_min_cnty['wt_empres']/oddata_min_cnty['EMPRES']
                    oddata_min_cnty['time_threshold'] = threshold
                    oddata_min_cnty['transit_file'] = transit_file_name
                    oddata_min_cnty['time'] = core
                    oddata_min_cnty = oddata_min_cnty[['county', 'jobs_from_home', 'time_threshold', 'transit_file', 'time']] 
                    acc_jobs_cnty.append(oddata_min_cnty)
            
    df_region = pd.concat(df)
    df_pp = pd.concat(acc_jobs_pp)
    df_rdm = pd.concat(acc_jobs_rdm)
    df_sd = pd.concat(acc_jobs_sd)
    df_cnty = pd.concat(acc_jobs_cnty)
    
    return df_region, df_pp, df_rdm, df_sd, df_cnty

def get_non_work_destionations(transit_skim_files, mat_core, geo_pp_cwks, non_work_dest_tazs):

    acc_jobs = []
    nwd_jobs_nwd = []
    df = []
    nwd_jobs_pp = []
    nwd_jobs_rdm = []
    nwd_jobs_sd = []
    nwd_jobs_cnty = []
    
    
    #for 25 transit skims files - this includes time periods
    for transit_file in transit_skim_files:
        
        transit_file_name = os.path.split(transit_file)[-1]
        
        for core in mat_core:
            # od data from skims
            timedaData = convertMat2Df(transit_file, core)
            timedaData = timedaData.loc[timedaData[core]>0]
            timedaData[core] = timedaData[core]/100

            #total non work destinations based on destination
            od_data = pd.merge(timedaData, non_work_dest_tazs, left_on='dest', right_on='taz', how = 'inner')
            od_data.isna().sum()

            #for time thresholds - currently set in config files
            for threshold in time_thresholds:
                print(f'processing {transit_file} for time threshold {threshold}')
                # create data for all destinations
                oddata_min = od_data.loc[od_data[core] <= threshold]
                oddata_min = oddata_min.groupby(['orig'])['non_work_dest'].sum().reset_index()

                # adding employed residents
                oddata_min = pd.merge(oddata_min, tazDataPop, left_on= ['orig'], right_on =['ZONE'], how ='left')

                # regional 
                oddata_min['wt_pop'] = oddata_min['non_work_dest'] * oddata_min['TOTPOP']
                nwd_acc_min = oddata_min['wt_pop'].sum()/oddata_min['non_work_dest'].sum()
                #acc_jobs.append(job_acc_min)
                
                df_temp_region = pd.DataFrame({'transit_file': os.path.split(transit_file)[-1], 'time_threshold': threshold, 'nwd_from_home': nwd_acc_min, 'time': core}, index=[0])
                df.append(df_temp_region)

                # adding priority population and geographies
                oddata_min = pd.merge(oddata_min, geo_pp_cwks, left_on= ['orig'], right_on =['taz'], how ='left')
                
                # for prioirty population
                if 'pp_share' in geo_pp_cwks.columns:
                    oddata_min['priority_population'] = oddata_min['pp_share'].apply(lambda x: 1 if x > 0 else 0)
                    oddata_min['wt_pop_pp'] = oddata_min['non_work_dest'] * oddata_min['TOTPOP'] * oddata_min['pp_share']/100
                    oddata_min['pop_pp'] = oddata_min['TOTPOP'] * oddata_min['pp_share']/100
                    oddata_min_pp = oddata_min[oddata_min['priority_population']==1]
                    oddata_min_nonpp = oddata_min[oddata_min['priority_population']==0]
                    oddata_min_pp = oddata_min_pp.groupby(['priority_population'])['wt_pop_pp','pop_pp'].sum().reset_index()
                    oddata_min_pp['nwd_from_home'] = oddata_min_pp['wt_pop_pp']/oddata_min_pp['pop_pp']
                    oddata_min_pp = oddata_min_pp[['priority_population', 'nwd_from_home']]

                    oddata_min_nonpp = oddata_min_nonpp.groupby(['priority_population'])['wt_pop','TOTPOP'].sum().reset_index()
                    oddata_min_nonpp['nwd_from_home'] = oddata_min_nonpp['wt_pop']/oddata_min_nonpp['TOTPOP']
                    oddata_min_nonpp = oddata_min_nonpp[['priority_population', 'nwd_from_home']]

                    oddata_min_comb = pd.concat([oddata_min_pp, oddata_min_nonpp], ignore_index=False)
                    oddata_min_comb['time_threshold'] = threshold
                    oddata_min_comb['transit_file'] = transit_file_name
                    oddata_min_comb['time'] = core
                    nwd_jobs_pp.append(oddata_min_comb)

                # for RDM zones
                if 'rdm_zones' in geo_pp_cwks.columns:
                    oddata_min_rdm = oddata_min.groupby(['rdm_zones'])['wt_pop','TOTPOP'].sum().reset_index()
                    oddata_min_rdm['nwd_from_home'] =  oddata_min_rdm['wt_pop']/oddata_min_rdm['TOTPOP']
                    oddata_min_rdm['time_threshold'] = threshold
                    oddata_min_rdm['transit_file'] = transit_file_name
                    oddata_min_rdm['time'] = core
                    oddata_min_rdm = oddata_min_rdm[['rdm_zones', 'nwd_from_home', 'time_threshold', 'transit_file', 'time']] 
                    nwd_jobs_rdm.append(oddata_min_rdm)

                # for superdistrict
                if 'super_district' in geo_pp_cwks.columns:
                    oddata_min_sd = oddata_min.groupby(['super_district'])['wt_pop','TOTPOP'].sum().reset_index()
                    oddata_min_sd['nwd_from_home'] =  oddata_min_sd['wt_pop']/oddata_min_sd['TOTPOP']
                    oddata_min_sd['time_threshold'] = threshold
                    oddata_min_sd['transit_file'] = transit_file_name
                    oddata_min_sd['time'] = core
                    oddata_min_sd = oddata_min_sd[['super_district', 'nwd_from_home', 'time_threshold', 'transit_file', 'time']] 
                    nwd_jobs_sd.append(oddata_min_sd)

                # for county
                if 'county' in geo_pp_cwks.columns:
                    oddata_min_cnty = oddata_min.groupby(['county'])['wt_pop','TOTPOP'].sum().reset_index()
                    oddata_min_cnty['nwd_from_home'] =  oddata_min_cnty['wt_pop']/oddata_min_cnty['TOTPOP']
                    oddata_min_cnty['time_threshold'] = threshold
                    oddata_min_cnty['transit_file'] = transit_file_name
                    oddata_min_cnty['time'] = core
                    oddata_min_cnty = oddata_min_cnty[['county', 'nwd_from_home', 'time_threshold', 'transit_file', 'time']] 
                    nwd_jobs_cnty.append(oddata_min_cnty)
            
    df_region = pd.concat(df)
    df_pp = pd.concat(nwd_jobs_pp)
    df_rdm = pd.concat(nwd_jobs_rdm)
    df_sd = pd.concat(nwd_jobs_sd)
    df_cnty = pd.concat(nwd_jobs_cnty)
    
    return df_region, df_pp, df_rdm, df_sd, df_cnty