In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
period = params['periods']
summary_dir = params['summary_dir']

filename_extension = params['filename_extension']

In [3]:
#data inputs 
df_tours = pd.read_parquet(_join(preprocess_dir, 'tour_roster.parquet'))
df_tours = df_tours.rename(columns={'income_bin' : 'Income'})

In [4]:
perf_measure_columns

['Concept_ID',
 'Metric_ID',
 'Metric_name',
 'Submetric',
 'Description',
 'Population',
 'Period',
 'Geography',
 'Zone_ID',
 'Origin_zone',
 'Dest_zone',
 'Purpose',
 'Mode',
 'Income',
 'Value',
 'Units',
 'Total_Increment']

### Regional Mode Share

In [5]:
#df_region_share = df_tours.groupby(['tour_mode', 'tour_purpose'])['tours'].sum().reset_index()

In [6]:
#regional mode share
# df_region_share['Value'] = df_region_share['tours']/df_region_share['tours'].sum()
# df_region_share = df_region_share.rename(columns={'tour_mode': 'Mode', 'tour_purpose': 'Purpose'})
# df_region_share = df_region_share[['Mode', 'Purpose', 'Value']]

# df_region_share['Concept_ID'] = concept_id
# df_region_share['Metric_ID'] = 'A3.3'
# df_region_share['Metric_name'] = 'Mode Shares'
# df_region_share['Submetric'] = 'A3.3.1'
# df_region_share['Description'] = 'Regional mode share'
# df_region_share['Population'] = 'Whole Population'
# df_region_share['Period'] = ''
# df_region_share['Geography'] = 'Region'
# df_region_share['Zone_ID'] = ''
# df_region_share['Origin_zone'] = ''
# df_region_share['Dest_zone'] = ''
# df_region_share['Dest_zone'] = ''
# df_region_share['Units'] = 'Percentage share'
# df_region_share['Total_Increment'] = 'Increment'

In [7]:
#df_region_period

### Regional mode share by period

In [8]:
df_region_period = df_tours.groupby(['Period', 'tour_purpose', 'Income', 'Mode'])['tours'].sum().reset_index()

df_temp1 = summarize_all_combinations(df_region_period, groupby_columns=['Period', 'tour_purpose', 'Income', 'Mode'], 
                                      summary_column='tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_region_period, groupby_columns=['Period', 'tour_purpose', 'Income'], 
                                      summary_column='tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})

df_region_period = df_temp1.merge(df_temp2, on = ['Period', 'tour_purpose', 'Income'], how = 'left')
df_region_period['Value'] = df_region_period['tours1'] * 100 /  df_region_period['tours2']

#df_region_period['Value'] = df_region_period['tours']/df_region_period['tours'].sum()
df_region_period = df_region_period.rename(columns={'tour_purpose': 'Purpose'})
df_region_period = df_region_period[['Mode', 'Period', 'Purpose', 'Income', 'Value']]

df_region_period['Concept_ID'] = concept_id
df_region_period['Metric_ID'] = 'A3.3'
df_region_period['Metric_name'] = 'Mode Shares'
df_region_period['Submetric'] = 'A3.3.1'
df_region_period['Description'] = 'Regional mode share by time period, tour purpose and household income'
df_region_period['Population'] = 'Whole Population'
df_region_period['Geography'] = 'Region'
df_region_period['Zone_ID'] = ''
df_region_period['Origin_zone'] = ''
df_region_period['Dest_zone'] = ''
df_region_period['Units'] = 'Percentage'
df_region_period['Total_Increment'] = ''

## Mode Share in Transbay split

In [9]:
df_tb = df_tours.loc[df_tours['transbay_od'] == 1]

df_temp1 = summarize_all_combinations(df_tb, groupby_columns=['Period', 'tour_purpose', 'Income', 'Mode'], 
                                      summary_column='tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_tb, groupby_columns=['Period', 'tour_purpose', 'Income'], 
                                      summary_column='tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})

df_tb = df_temp1.merge(df_temp2, on = ['Period', 'tour_purpose', 'Income'], how = 'left')
df_tb['Value'] = df_tb['tours1'] * 100 /  df_tb['tours2']


#df_tb['Value'] = df_tb['tours']/df_tb['tours'].sum()
df_tb = df_tb.rename(columns={'tour_purpose': 'Purpose'})
df_tb = df_tb[['Mode', 'Period', 'Purpose', 'Income', 'Value']]

df_tb['Concept_ID'] = concept_id
df_tb['Metric_ID'] = 'A3.3'
df_tb['Metric_name'] = 'Mode Shares'
df_tb['Submetric'] = 'A3.3.2'
df_tb['Zone_ID'] = 1
df_tb['Description'] = 'Mode share by time period, tour purpose and household income in transbay region'
df_tb['Population'] = 'Whole Population'
df_tb['Geography'] = 'Transbay region'
df_tb['Origin_zone'] = ''
df_tb['Dest_zone'] = ''
df_tb['Units'] = 'Percentage'
df_tb['Total_Increment'] = ''

### Mode Share by County

In [10]:
#df_cnty = df_tours.groupby(['orig_county', 'dest_county', 'Mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_temp1 = summarize_all_combinations(df_tours, groupby_columns=['orig_county', 'dest_county', 'Period', 'tour_purpose', 'Income', 'Mode'], 
                                      summary_column='tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_tours, groupby_columns=['orig_county', 'dest_county', 'Period', 'tour_purpose', 'Income'], 
                                      summary_column='tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})


df_cnty = df_temp1.merge(df_temp2, on = ['orig_county', 'dest_county', 'Period', 'tour_purpose', 'Income'], how = 'left')
df_cnty['Value'] = df_cnty['tours1'] * 100 /  df_cnty['tours2']


#df_cnty['Value'] = df_cnty['tours']/df_cnty['tours'].sum()
df_cnty = df_cnty.rename(columns={
                                  'tour_purpose': 'Purpose', 
                                  'orig_county' : 'Origin_zone',
                                  'dest_county' : 'Dest_zone'})
df_cnty = df_cnty[['Origin_zone', 'Dest_zone', 'Mode', 'Period', 'Purpose', 'Income', 'Value']]

df_cnty['Concept_ID'] = concept_id
df_cnty['Metric_ID'] = 'A3.3'
df_cnty['Metric_name'] = 'Mode Shares'
df_cnty['Submetric'] = 'A3.3.3'
df_cnty['Description'] = 'Mode share by time period, tour purpose and household income in origin and destination county'
df_cnty['Population'] = 'Whole Population'
df_cnty['Geography'] = 'County'
df_cnty['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
df_cnty['Units'] = 'Percentage'
df_cnty['Total_Increment'] = ''

### Mode Share by RDM Zones

In [11]:
#df_rdm = df_tours.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_temp1 = summarize_all_combinations(df_tours, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'tour_purpose', 'Income', 'Mode'], 
                                      summary_column='tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_tours, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'tour_purpose', 'Income'], 
                                      summary_column='tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})


df_rdm = df_temp1.merge(df_temp2, on = ['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'tour_purpose', 'Income'], how = 'left')
df_rdm['Value'] = df_rdm['tours1'] * 100 /  df_rdm['tours2']



#df_rdm['Value'] = df_rdm['tours']/df_rdm['tours'].sum()
df_rdm = df_rdm.rename(columns={ 
                                'tour_purpose': 'Purpose', 
                                'orig_rdm_zones' : 'Origin_zone',
                                'dest_rdm_zones' : 'Dest_zone'})
df_rdm = df_rdm[['Origin_zone', 'Dest_zone', 'Mode', 'Period', 'Purpose', 'Income', 'Value']]

df_rdm['Concept_ID'] = concept_id
df_rdm['Metric_ID'] = 'A3.3'
df_rdm['Metric_name'] = 'Mode Shares'
df_rdm['Submetric'] = 'A3.3.4'
df_rdm['Description'] = 'Mode share by time period, tour purpose and household income in origin and destination RDM Zones'
df_rdm['Population'] = 'Whole Population'
df_rdm['Geography'] = 'RDM'
df_rdm['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
df_rdm['Units'] = 'Percentage'
df_rdm['Total_Increment'] = ''

### Mode Share by Super District

In [12]:
#df_sd = df_tours.groupby(['orig_super_dist', 'dest_super_dist', 'tour_mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_temp1 = summarize_all_combinations(df_tours, groupby_columns=['orig_super_dist', 'dest_super_dist', 'Period', 'tour_purpose', 'Income', 'Mode'], 
                                      summary_column='tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_tours, groupby_columns=['orig_super_dist', 'dest_super_dist', 'Period', 'tour_purpose', 'Income'], 
                                      summary_column='tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})


df_sd = df_temp1.merge(df_temp2, on = ['orig_super_dist', 'dest_super_dist', 'Period', 'tour_purpose', 'Income'], how = 'left')
df_sd['Value'] = df_sd['tours1'] * 100 /  df_sd['tours2']

#df_sd['Value'] = df_sd['tours']/df_sd['tours'].sum()

df_sd = df_sd.rename(columns={'tour_mode': 'Mode', 
                              'tour_purpose': 'Purpose', 
                              'orig_super_dist' : 'Origin_zone',
                              'dest_super_dist' : 'Dest_zone'})
df_sd = df_sd[['Origin_zone', 'Dest_zone', 'Mode', 'Period', 'Purpose', 'Income', 'Value']]

df_sd['Concept_ID'] = concept_id
df_sd['Metric_ID'] = 'A3.3'
df_sd['Metric_name'] = 'Mode Shares'
df_sd['Submetric'] = 'A3.3.5'
df_sd['Description'] = 'Mode share by time period, tour purpose and household income in origin and destination super districts'
df_sd['Population'] = 'Whole Population'
df_sd['Geography'] = 'Super district'
df_sd['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
df_sd['Units'] = 'Percentage'
df_sd['Total_Increment'] = ''

### Mode Share for Priority Population

In [None]:
df_tours['pp_tours'] = df_tours['tours'] * df_tours['pp_share'] / 100

df_temp1 = summarize_all_combinations(df_tours, groupby_columns=['Period', 'tour_purpose', 'Income', 'Mode'], 
                                      summary_column='pp_tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_tours, groupby_columns=['Period', 'tour_purpose', 'Income'], 
                                      summary_column='pp_tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})


df_pp = df_temp1.merge(df_temp2, on = ['Period', 'tour_purpose', 'Income'], how = 'left')
df_pp['Value'] = df_pp['tours1'] * 100 /  df_pp['tours2']

#df_pp['Value'] = df_pp['pp_tours']/df_pp['pp_tours'].sum()

df_pp = df_pp.rename(columns={'tour_purpose': 'Purpose'})
df_pp = df_pp[['Mode', 'Period', 'Purpose', 'Income', 'Value']]

df_pp['Concept_ID'] = concept_id
df_pp['Metric_ID'] = 'A3.3'
df_pp['Metric_name'] = 'Mode Shares'
df_pp['Submetric'] = 'A3.3.6'
df_pp['Description'] = 'Mode shares by time period and tour purpose'
df_pp['Population'] = 'Priority population'
df_pp['Geography'] = 'Region'
df_pp['Zone_ID'] = ''
df_pp['Origin_zone'] = ''
df_pp['Dest_zone'] = ''
df_pp['Units'] = 'Percentage'
df_pp['Total_Increment'] = ''

### Mode Share for Priority Population in RDM zone

In [None]:
df_tours['pp_tours'] = df_tours['tours'] * df_tours['pp_share'] / 100

df_temp1 = summarize_all_combinations(df_tours, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'tour_purpose', 'Income', 'Mode'], 
                                      summary_column='pp_tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_tours, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'tour_purpose', 'Income'], 
                                      summary_column='pp_tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})


df_pp_rdm = df_temp1.merge(df_temp2, on = ['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'tour_purpose', 'Income'], how = 'left')
df_pp_rdm['Value'] = df_pp_rdm['tours1'] * 100 /  df_pp_rdm['tours2']

#df_pp['Value'] = df_pp['pp_tours']/df_pp['pp_tours'].sum()

df_pp_rdm = df_pp_rdm.rename(columns={'tour_purpose': 'Purpose', 
                                      'orig_rdm_zones': 'Origin_zone', 
                                      'dest_rdm_zones': 'Dest_zone'})

df_pp_rdm = df_pp_rdm[['Origin_zone', 'Dest_zone', 'Mode', 'Period', 'Purpose', 'Income', 'Value']]

df_pp_rdm['Concept_ID'] = concept_id
df_pp_rdm['Metric_ID'] = 'A3.3'
df_pp_rdm['Metric_name'] = 'Mode Shares'
df_pp_rdm['Submetric'] = 'A3.3.7'
df_pp_rdm['Description'] = 'Mode shares by time period and tour purpose between RDM zones'
df_pp_rdm['Population'] = 'Priority population'
df_pp_rdm['Geography'] = 'RDM'
df_pp_rdm['Zone_ID'] = ''
#df_pp_rdm['Origin_zone'] = ''
#df_pp_rdm['Dest_zone'] = ''
df_pp_rdm['Units'] = 'Percentage'
df_pp_rdm['Total_Increment'] = ''

In [None]:
df_tours['pp_tours'] = df_tours['tours'] * df_tours['pp_share'] / 100

df_temp1 = summarize_all_combinations(df_tours, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Income', 'Mode'], 
                                      summary_column='pp_tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_tours, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Income'], 
                                      summary_column='pp_tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})


df_pp_rdm_inc = df_temp1.merge(df_temp2, on = ['orig_rdm_zones', 'dest_rdm_zones', 'Income'], how = 'left')
df_pp_rdm_inc['Value'] = df_pp_rdm_inc['tours1'] * 100 /  df_pp_rdm_inc['tours2']

#df_pp['Value'] = df_pp['pp_tours']/df_pp['pp_tours'].sum()

df_pp_rdm_inc = df_pp_rdm_inc.rename(columns={'tour_purpose': 'Purpose'})
df_pp_rdm_inc = df_pp_rdm_inc[['orig_rdm_zones', 'dest_rdm_zones', 'Mode', 'Income', 'Value']]

df_pp_rdm_inc['Concept_ID'] = concept_id
df_pp_rdm_inc['Metric_ID'] = 'A3.3'
df_pp_rdm_inc['Metric_name'] = 'Mode Shares'
df_pp_rdm_inc['Submetric'] = 'A3.3.8'
df_pp_rdm_inc['Description'] = 'Mode shares by Income between RDM zones'
df_pp_rdm_inc['Population'] = 'Priority population'
df_pp_rdm_inc['Geography'] = 'RDM'
df_pp_rdm_inc['Zone_ID'] = ''
df_pp_rdm_inc['Origin_zone'] = ''
df_pp_rdm_inc['Dest_zone'] = ''
df_pp_rdm_inc['Purpose'] = ''
df_pp_rdm_inc['Period'] = ''
df_pp_rdm_inc['Units'] = 'Percentage'
df_pp_rdm_inc['Total_Increment'] = ''

In [None]:
#df_rdm = df_tours.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_temp1 = summarize_all_combinations(df_tours, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Income', 'Mode'], 
                                      summary_column='tours')
df_temp1 = df_temp1.rename(columns={'Value': 'tours1'})

df_temp2 = summarize_all_combinations(df_tours, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Income'], 
                                      summary_column='tours')
df_temp2 = df_temp2.rename(columns={'Value': 'tours2'})


df_rdm_inc = df_temp1.merge(df_temp2, on = ['orig_rdm_zones', 'dest_rdm_zones', 'Income'], how = 'left')
df_rdm_inc['Value'] = df_rdm_inc['tours1'] * 100 /  df_rdm_inc['tours2']

#df_rdm['Value'] = df_rdm['tours']/df_rdm['tours'].sum()
df_rdm_inc = df_rdm_inc.rename(columns={ 
                                'tour_purpose': 'Purpose', 
                                'orig_rdm_zones' : 'Origin_zone',
                                'dest_rdm_zones' : 'Dest_zone'})
df_rdm_inc = df_rdm_inc[['Origin_zone', 'Dest_zone', 'Mode', 'Income', 'Value']]

df_rdm_inc['Concept_ID'] = concept_id
df_rdm_inc['Metric_ID'] = 'A3.3'
df_rdm_inc['Metric_name'] = 'Mode Shares'
df_rdm_inc['Submetric'] = 'A3.3.9'
df_rdm_inc['Description'] = 'Mode share by household income in origin and destination RDM Zones'
df_rdm_inc['Population'] = 'Whole Population'
df_rdm_inc['Geography'] = 'RDM'
df_rdm_inc['Zone_ID'] = ''
df_rdm_inc['Period'] = ''
df_rdm_inc['Purpose'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
df_rdm_inc['Units'] = 'Percentage'
df_rdm_inc['Total_Increment'] = ''

In [None]:
all_dfs = [df_region_period, df_tb, df_cnty, df_rdm, df_sd, df_pp, df_pp_rdm, df_pp_rdm_inc]

for dfs in all_dfs:
    metric_name = '_mode_shares_'
    dfs = dfs.reset_index(drop=True)
    #print(dfs.columns)
    dfs = dfs[perf_measure_columns]
    file_name = dfs['Submetric'][0]
    geography = '_' + dfs['Geography'][0].replace(' ', '_')
    dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0])
    
combined_df = pd.concat([df_region_period, df_tb, df_cnty, df_rdm, df_sd, df_pp]).reset_index(drop=True)
combined_df.to_csv(_join(summary_dir, 'A3.3' + '_mode_shares_' + concept_id + '_region' + filename_extension+'.csv'), index=None)

In [None]:
#df_shares = pd.concat([df_region_period, df_cnty, df_rdm, df_sd], ignore_index=False)

In [None]:
#df_shares.to_csv(_join())

In [None]:
#with pd.ExcelWriter(os.path.join(summary_outputs, 'concept-BY15.xlsx'), engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
#    df_region_period.to_excel(writer, sheet_name='A3.3.1', startcol=0, index=False)
#    df_tb.to_excel(writer, sheet_name='A3.3.2', startcol=0, index=False)
#    df_cnty.to_excel(writer, sheet_name='A3.3.3', startcol=0, index=False)
#    df_rdm.to_excel(writer, sheet_name='A3.3.4', startcol=0, index=False)
#    df_sd.to_excel(writer, sheet_name='A3.3.5', startcol=0, index=False)

In [None]:
#tab1 = df_tours.groupby(['tour_mode', 'transbay_od','tour_purpose', 'pp_share'])['flag'].count().reset_index()
#tab2 = df_tours.groupby(['tour_mode', 'start_hour'])['flag'].count().reset_index()
#tab3 = df_tours.groupby(['tour_mode', 'orig_county', 'dest_county'])['flag'].count().reset_index()
#tab4 = df_tours.groupby(['tour_mode', 'orig_super_dist', 'dest_super_dist'])['flag'].count().reset_index()
#tab4 = df_tours.groupby(['tour_mode', 'orig_rdm_zones', 'dest_rdm_zones'])['flag'].count().reset_index()
#tab1