In [None]:
import openmatrix as omx
import pandas as pd
import numpy as np
import yaml
from utility import *
import warnings
warnings.filterwarnings("ignore")

In [None]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']

skims_dir = _join(model_outputs_dir, "skims")
landuse_dir = _join(model_outputs_dir, "landuse")

ctramp_dir = params['ctramp_dir']
summary_outputs = params['summary_dir']
mat_core = params['connectivity_mat_core']
time_thresholds = params['accessibility_thresholds']

concept_id = params['concept_id']
summary_columns = params['final_columns']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
period = params['periods']
summary_dir = params['summary_dir']

best_path_skim_extension = params['best_path_skim_extension']

annual_transit_factor = params['annual_transit_factor']
annual_auto_factor = params['annual_auto_factor']

filename_extension = params['filename_extension']


mat_core = params['connectivity_mat_core']
time_thresholds = params['accessibility_thresholds']

travel_time_cores = params['total_travel_time']
time_periods = params['periods']
acc_egr = params['access_egress_modes']

In [None]:
#Read data

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

#get geogrpahies and priority population in the same file
geo_pp_cwks = pd.merge(geo_cwks, pp_perc, on = 'taz', how = 'left') 

#transbay od pairs
#transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

#non work destinations tazs
non_work_tazs = pd.read_excel(_join(params['common_dir'], 'non_work_destinations.xlsx'))
#non_work_tazs = list(non_work_tazs['non_wrk_taz'])

#taz data for empoyments and resindent employments
tazData = pd.read_csv(_join(params['model_dir'], params['zone_file']))
tazDataTotemp = tazData[["ZONE", "TOTEMP"]]
tazDataEmpres = tazData[["ZONE", "EMPRES"]]
tazDataPop = tazData[["ZONE", "TOTPOP"]]

In [None]:
all_tod_tt = pd.read_parquet(_join(preprocess_dir, 'tod_min_actual_travel_time.parquet'))
perc_tod_tt = pd.read_parquet(_join(preprocess_dir, 'tod_min_perceived_travel_time.parquet'))

In [None]:
def business_accessibile_employees(all_tod_tt, time_periods, time_thresholds, geo_pp_cwks, tt_verbose):

    acc_jobs = []
    acc_jobs_nwd = []
    df = []
    acc_jobs_pp = []
    acc_jobs_rdm = []
    acc_jobs_sd = []
    acc_jobs_cnty = []
    
    #for 25 transit skims files - this includes time periods
    for period in time_periods:
        
        if period in all_tod_tt.columns:
            
            timedaData = all_tod_tt[[period]].reset_index()
            print(timedaData.columns)
            #timedaData = timedaData.fillna(0)
            timedaData[period] = timedaData[period]/100
            
            #employed residents based on destination
            od_data = pd.merge(timedaData, tazDataEmpres, left_on='dest', right_on='ZONE', how = 'left')
            od_data['EMPRES'] = od_data['EMPRES'].fillna(0)

            #for time thresholds - currently set in config files
            for threshold in time_thresholds:
                #print(f'processing {transit_file} for time threshold {threshold}')
                # create data for all destinations
                oddata_min = od_data.loc[od_data[period] <= threshold]
                oddata_min = oddata_min.groupby(['orig'])['EMPRES'].sum().reset_index()

                # adding employed residents
                oddata_min = pd.merge(oddata_min, tazDataTotemp, left_on= ['orig'], right_on =['ZONE'], how ='left')

                # regional 
                oddata_min['wt_totemp'] = oddata_min['TOTEMP'] * oddata_min['EMPRES']
                job_acc_min = oddata_min['wt_totemp'].sum()/oddata_min['TOTEMP'].sum()
                #acc_jobs.append(job_acc_min)
                
                df_temp_region = pd.DataFrame({'Description': "Business access to potential employees within "+ tt_verbose + " "  + str(threshold) + " minutes",
                                               'Population': 'Whole Population',
                                               'Period': period,
                                               'Geography': 'Regional total',
                                               'Zone_ID': 'Region',
                                               'Submetric': 'C1.2.1',
                                               'Total_Increment': '',
                                               'Value': job_acc_min}, index=[0])
                df.append(df_temp_region)

                # adding priority population and geographies
                oddata_min = pd.merge(oddata_min, geo_pp_cwks, left_on= ['orig'], right_on =['taz'], how ='left')
                
                # for prioirty population
                """
                if 'pp_share' in geo_pp_cwks.columns:
                    oddata_min['priority_population'] = oddata_min['pp_share'].apply(lambda x: 1 if x > 0 else 0)
                    oddata_min['wt_empres_pp'] = oddata_min['TOTEMP'] * oddata_min['EMPRES'] * oddata_min['pp_share']/100
                    oddata_min['EMPRES_pp'] = oddata_min['EMPRES'] * oddata_min['pp_share']/100
                    oddata_min_pp = oddata_min[oddata_min['priority_population']==1]
                    oddata_min_nonpp = oddata_min[oddata_min['priority_population']==0]
                    oddata_min_pp = oddata_min_pp.groupby(['priority_population'])['wt_empres_pp','EMPRES_pp'].sum().reset_index()
                    oddata_min_pp['buss_acc_emp'] = oddata_min_pp['wt_empres_pp']/oddata_min_pp['EMPRES_pp']
                    oddata_min_pp = oddata_min_pp[['priority_population', 'buss_acc_emp']]

                    oddata_min_nonpp = oddata_min_nonpp.groupby(['priority_population'])['wt_empres','EMPRES'].sum().reset_index()
                    oddata_min_nonpp['buss_acc_emp'] = oddata_min_nonpp['wt_empres']/oddata_min_nonpp['EMPRES']
                    oddata_min_nonpp = oddata_min_nonpp[['priority_population', 'buss_acc_emp']]

                    oddata_min_comb = pd.concat([oddata_min_pp, oddata_min_nonpp], ignore_index=False)
                    oddata_min_comb['time_threshold'] = threshold
                    oddata_min_comb['transit_file'] = transit_file_name
                    oddata_min_comb['time'] = core
                    acc_jobs_pp.append(oddata_min_comb)
                """
                
                # for county
                if 'county' in geo_pp_cwks.columns:
                    oddata_min_cnty = oddata_min.groupby(['county'])['wt_totemp','TOTEMP'].sum().reset_index()
                    oddata_min_cnty['Value'] =  oddata_min_cnty['wt_totemp']/oddata_min_cnty['TOTEMP']
                    oddata_min_cnty['Description'] = "Business access to potential employees within " + tt_verbose + " " + str(threshold) + " minutes"
                    oddata_min_cnty['Period'] = period
                    oddata_min_cnty['Geography'] = 'County'
                    oddata_min_cnty['Population'] = 'Whole Population'
                    oddata_min_cnty.rename(columns={'county': 'Zone_ID'}, inplace=True)
                    oddata_min_cnty = oddata_min_cnty[['Description', 'Population', 'Period',
                                                     'Geography', 'Zone_ID', 'Value']]   
                    oddata_min_cnty['Submetric'] = 'C1.2.2'
                    oddata_min_cnty['Total_Increment'] = ''
                    acc_jobs_cnty.append(oddata_min_cnty)

                # for RDM zones
                if 'rdm_zones' in geo_pp_cwks.columns:
                    oddata_min_rdm = oddata_min.groupby(['rdm_zones'])['wt_totemp','TOTEMP'].sum().reset_index()
                    oddata_min_rdm['Value'] =  oddata_min_rdm['wt_totemp']/oddata_min_rdm['TOTEMP']
                    oddata_min_rdm['Description'] = "Business access to potential employees within " + tt_verbose + " " + str(threshold) + " minutes"
                    oddata_min_rdm['Period'] = period
                    oddata_min_rdm['Geography'] = 'RDM'
                    oddata_min_rdm['Population'] = 'Whole Population'
                    oddata_min_rdm.rename(columns={'rdm_zones' : 'Zone_ID'}, inplace=True)
                    oddata_min_rdm = oddata_min_rdm[['Description', 'Population', 'Period',
                                                     'Geography', 'Zone_ID', 'Value']]
                    oddata_min_rdm['Submetric'] = 'C1.2.3'
                    oddata_min_rdm['Total_Increment'] = ''
                    acc_jobs_rdm.append(oddata_min_rdm)

                # for superdistrict
                if 'super_district' in geo_pp_cwks.columns:
                    oddata_min_sd = oddata_min.groupby(['super_district'])['wt_totemp', 'TOTEMP'].sum().reset_index()
                    oddata_min_sd['Value'] =  oddata_min_sd['wt_totemp']/oddata_min_sd['TOTEMP']
                    oddata_min_sd['Description'] = "Business access to potential employees within " + tt_verbose + " " + str(threshold) + " minutes"
                    oddata_min_sd['Period'] = period
                    oddata_min_sd['Population'] = 'Whole Population'
                    oddata_min_sd['Geography'] = 'Superdistrict'
                    oddata_min_sd.rename(columns={'super_district': 'Zone_ID'}, inplace=True)
                    oddata_min_sd = oddata_min_sd[['Description', 'Population', 'Period',
                                                     'Geography', 'Zone_ID', 'Value']]
                    oddata_min_sd['Submetric'] = 'C1.2.4'
                    oddata_min_sd['Total_Increment'] = ''
                    acc_jobs_sd.append(oddata_min_sd)


                    
        else:
            print(f'{period} travel time doesn\'t exist.')
            
    df_region = pd.concat(df).reset_index(drop=True)
    #df_pp = pd.concat(acc_jobs_pp).reset_index(drop=True)
    df_rdm = pd.concat(acc_jobs_rdm).reset_index(drop=True)
    df_sd = pd.concat(acc_jobs_sd).reset_index(drop=True)
    df_cnty = pd.concat(acc_jobs_cnty).reset_index(drop=True)
    
    return df_region, df_rdm, df_sd, df_cnty

In [None]:
decimals = 0

In [None]:
df_region_act, df_rdm_act, df_sd_act, df_cnty_act = business_accessibile_employees(all_tod_tt, time_periods, 
                                                                   time_thresholds, geo_pp_cwks, "actual")

In [None]:
comb_df_act = pd.concat([df_region_act, df_rdm_act, df_sd_act, df_cnty_act], ignore_index=True)
                        
comb_df_act['Concept_ID'] = concept_id
comb_df_act['Metric_ID'] = 'C1.2'
comb_df_act['Metric_name'] = 'Accessibility to workforce from employment opportunities - actual travel time'
comb_df_act['Origin_zone'] = ''
comb_df_act['Dest_zone'] = ''
comb_df_act['Purpose'] = ''
comb_df_act['Units'] = 'Employees'
comb_df_act['Value'] = comb_df_act['Value'].apply(lambda x: round(x, decimals))
comb_df_act['Income'] = ''
comb_df_act['Mode'] = ''

comb_df_act = comb_df_act[summary_columns]

In [None]:
unique_medtric_ids = comb_df_act['Submetric'].unique()
for mids in unique_medtric_ids:
    metric_name = '_accessibility_to_workforce_actual_time_'
    dfs = comb_df_act.loc[comb_df_act['Submetric']==mids]
    #print(dfs.columns)
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = mids
    geography = '_' + dfs['Geography'][0].replace(' ', '_')
    dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Value'].sum(), metric_name)

comb_df_act.to_csv(_join(summary_dir, 'C1.2' + '_accessibility_to_workforce_actual_time_' + concept_id + '_region' + filename_extension + '.csv'), index=None)

In [None]:
df_region_perc, df_rdm_perc, df_sd_perc, df_cnty_perc = business_accessibile_employees(perc_tod_tt, time_periods, 
                                                                   time_thresholds, geo_pp_cwks, "perceived")

In [None]:

comb_df_perc = pd.concat([df_region_perc, df_rdm_perc, df_sd_perc, df_cnty_perc], ignore_index=True)

comb_df_perc['Concept_ID'] = concept_id
comb_df_perc['Metric_ID'] = 'C1.2'
comb_df_perc['Metric_name'] = 'Accessibility to workforce from employment opportunities - perceived travel time'
comb_df_perc['Origin_zone'] = ''
comb_df_perc['Dest_zone'] = ''
comb_df_perc['Purpose'] = ''
comb_df_perc['Units'] = 'Employees'
comb_df_perc['Value'] = comb_df_perc['Value'].apply(lambda x: round(x, decimals))
comb_df_perc['Income'] = ''
comb_df_perc['Mode'] = ''

comb_df_perc = comb_df_perc[summary_columns]

In [None]:
unique_medtric_ids = comb_df_perc['Submetric'].unique()
for mids in unique_medtric_ids:
    metric_name = '_accessibility_to_workforce_perceived_time_'
    dfs = comb_df_perc.loc[comb_df_perc['Submetric']==mids]
    #print(dfs.columns)
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = dfs['Submetric'][0]
    geography = '_' + dfs['Geography'][0].replace(' ', '_')
    dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Value'].sum())

comb_df_perc.to_csv(_join(summary_dir, 'C1.2' + '_accessibility_to_workforce_perceived_time_' + concept_id + '_region' + filename_extension + '.csv'), index=None)