In [1]:
import os
import sys
import boto3
import logging
import pandas as pd
from zipfile import ZipFile

s3_client = boto3.client('s3')

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO,
    format='%(asctime)s %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


os.chdir('../')
import kpi

# Psudo-code to process data

In [10]:
def download_s3(local_file_name,s3_bucket,s3_object_key):
    """ 
    reference: 
    https://stackoverflow.com/questions/41827963/
    track-download-progress-of-s3-file-using-boto3-and-callbacks
    """

    meta_data = s3_client.head_object(Bucket=s3_bucket, Key=s3_object_key)
    total_length = int(meta_data.get('ContentLength', 0))
    downloaded = 0

    def progress(chunk):
        nonlocal downloaded
        downloaded += chunk
        done = int(50 * downloaded / total_length)
        sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) )
        sys.stdout.flush()

    logger.info(f'Downloading {s3_object_key}')
    with open(local_file_name, 'wb') as f:
        s3_client.download_fileobj(s3_bucket, s3_object_key, f, Callback=progress)
        

def download_kpis(scenario):
    local_file_name = "kpis/kpi_{}.yaml".format(scenario)
    s3_bucket = 'carb-results'
    s3_object_key = "{}/kpi_{}.yaml".format(scenario, scenario)

    download_s3(local_file_name, s3_bucket, s3_object_key)
        

def download_data(scenario):
    """
    Download results (ActivitySim and Skims) of scenario in a tmp folder. 
    
    Parameters: 
    -------------
    - scenario: str. scenario name 
    
    Returns:
    --------
    None
    
    """
    data_exist = os.path.isdir('tmp/{}'.format(scenario))
    
    if data_exist:
        pass
    
    else:
        s3_bucket = 'carb-results'

        #Download Asim results
        asim_local_file = "tmp/{}_asim_output.zip".format(scenario)
        asim_s3_object_key = "{}/asim_outputs_2022.zip".format(scenario)
        download_s3(asim_local_file, s3_bucket , asim_s3_object_key)

        #Unzip Asim Results
        with ZipFile(asim_local_file, 'r') as zipObj:
            zipObj.extractall('tmp')

         #Download Skims
        skims_local_file = "tmp/{}/skims.omx".format(scenario)
        skims_s3_object_key = "{}/skims.omx".format(scenario)
        download_s3(skims_local_file, s3_bucket , skims_s3_object_key)

    return None


def delete_data(scenario):
    """
    Deletes data of the give scenario. 
    
    Parameters: 
    ------------
    - scenario: str. scenario name.
    """
    if (scenario == '01_base_000') or (scenario == 'ex_1'):
        pass
    else:
        os.rmdir('tmp/{}'.format(scenario))
        os.remove("tmp/{}_asim_output.zip".format(scenario))
    return None
    
def add_scenario_changes(df,policy_changes):
    """ 
    Adds the scenario_id percentual change columns df
    
    Parameters: 
    ------------
    df: pandas DataFrame. Policy resutls 
    policy_changes. dict. Dictionary with percental 
                          change by policy and scenario. 
    
    Returns:
    --------
    df with <scenario_ids>_%change columns. 
    
    """
    changes = pd.DataFrame(policy_changes).T
    changes.index.set_names('policy', inplace = True)
    return df.join(changes, how = 'outer',lsuffix='_metric', rsuffix="_%change")


def scenario_elasticities(df):
    """
    Estimates scenario elasticity. 
    """
    metrics = df[df.columns[df.columns.str.contains('_metric')]]
    change = df[df.columns[df.columns.str.contains('_%change')]].values
    baseline = df['base_line']
    
    elasticity = (metrics.div(baseline, axis = 0) - 1).div(change, axis = 'columns')
    elasticity.columns = elasticity.columns.str[:11] 
    elasticity = elasticity.add_suffix('elasticity')
    return df.join(elasticity)


def mean_elasticity(df):
    """
    Estimates the mean elasticity
    """
    scenario_elasticities = df[df.columns[df.columns.str.contains('_elasticity')]]
    mean = scenario_elasticities.mean(axis = 1)
    df['mean_elasticity'] = mean
    return df


def common_entries(dcts):
    """
    Zip function for dicts
    Reference: https://stackoverflow.com/questions/16458340/python-equivalent-of-zip-for-dictionaries
    Change code to return a dictionary instead, and input a list of dicts. 
    """
    if not dcts:
        return
    dict_ = {}
    for i in set(dcts[0]).intersection(*dcts[1:]):
        dict_[i] = tuple(d[i] for d in dcts)
    return dict_


def kpis_scenario(policy, scenario, scenario_id):
    """
    Computes the kpi for the given policy, scenario and scenario_id
    
    Parameters: 
    - policy: str. policy name
    - scenario: str. scenario name
    - scenario_id: str. scenario id
    
    Returns:
    --------
    dict. dict of KPIs
    """
    
    logger.info('Estimating KPIs for scenario: {}'.format(scenario))
    try: 
        download_kpis(scenario)
    except:
        pass 
    
    results_exist = os.path.isfile('kpis/kpi_{}.yaml'.format(scenario))
    if results_exist:
        metrics = kpi.read_yaml('kpis/kpi_{}.yaml'.format(scenario))
        metrics['policy'] = policy
        metrics['scenario_id'] = scenario_id
        
    else: 
        download_data(scenario)
        metrics = kpi.get_scenario_results(policy, scenario, scenario_id)
        kpi.save_yaml('kpis/{}.yaml'.format(scenario), metrics)
    
    kpis = list(set(metrics.keys()) - set(['policy', 'name', 'scenario_id']))
    dfs_dict = {}
    
    for i in kpis:
        try:
            n_categories = len(metrics[i])
            categories = metrics[i].keys()
            baselines = metrics[i].values()

        except TypeError:
            n_categories = 1
            categories = 'none'
            baselines = [metrics[i]]
            
        scenario_name = metrics['scenario_id']

        df = pd.DataFrame({'policy': [metrics['policy']] * n_categories , 
                           'category': categories,
                           '{}'.format(scenario_name): baselines})
        
        df = df.set_index(['policy','category'])
        dfs_dict[i] = df
    
#     delete_data(scenario)
    return dfs_dict

def save_df(name, df):
    """ 
    Saves dataframe 
    """
    df.to_csv('kpis/summary/{}.csv'.format(name))

In [20]:
policy_scenarios = {'policy_one': {'base_line':'ex_1',
                                   'scenario_1':'ex_2', 
                                   'scenario_2':'ex_3'},
                    'policy_two': {'base_line':'ex_1',
                                   'scenario_1':'ex_4', 
                                   'scenario_2':'ex_5'}}

policy_changes = {'policy_one': {'scenario_1':0.25, 
                                 'scenario_2':-0.25},
                  'policy_two': {'scenario_1':0.1, 
                                 'scenario_2':0.25}}

policy_scenarios = {'share_tnc_price': {'base_line':'16_share_tnc_price_+050',
                                    'scenario_1':'16_share_tnc_price_+050', 
                                    'scenario_2':'17_share_tnc_price_+025', 
                                    'scenario_3':'18_share_tnc_price_-025',
                                    'scenario_4':'19_share_tnc_price_-050'},
                'operating_cost': {'base_line':'16_share_tnc_price_+050',
                                    'scenario_1':'20_operating_cost_+100', 
                                    'scenario_2':'21_operating_cost_+050', 
                                    'scenario_3':'22_operating_cost_-025',
                                    'scenario_4':'23_operating_cost_-050'}
               }

policy_changes = {'share_tnc_price': {'scenario_1':0.5, 
                                        'scenario_2':0.25, 
                                        'scenario_3':-0.25,
                                        'scenario_4':-0.50},
                    'operating_cost': {'scenario_1':1.0, 
                                        'scenario_2':0.5, 
                                        'scenario_3':-0.25,
                                        'scenario_4':-0.50}
                   }


if __name__ == '__main__':
    
    metrics_list = []

    for policy, scenarios in policy_scenarios.items(): 
        
        scenario_list = [kpis_scenario(policy,s,s_id) for s_id,s in scenarios.items()]
        iterable = common_entries(scenario_list)
        scenario_list = {k:pd.concat(v, axis = 1) for k, v in iterable.items()}
        metrics_list.append(scenario_list)
    
    iterable = common_entries(metrics_list)
    dfs = {k:pd.concat(v, axis = 0) for k, v in iterable.items()}
    dfs = {k:add_scenario_changes(v, policy_changes) for k, v in dfs.items()}
    dfs = {k:scenario_elasticities(v) for k, v in dfs.items()}
    dfs = {k:mean_elasticity(v) for k, v in dfs.items()}
    [save_df(name, df) for name, df in dfs.items()]

2022-09-24 13:56:03,847 __main__ - INFO - Estimating KPIs for scenario: 16_share_tnc_price_+050
2022-09-24 13:56:04,170 __main__ - INFO - Downloading 16_share_tnc_price_+050/kpi_16_share_tnc_price_+050.yaml
2022-09-24 13:56:04,447 __main__ - INFO - Downloading 16_share_tnc_price_+050/kpi_16_share_tnc_price_+050.yaml
2022-09-24 13:56:04,702 __main__ - INFO - Downloading 17_share_tnc_price_+025/kpi_17_share_tnc_price_+025.yaml
2022-09-24 13:56:05,015 __main__ - INFO - Downloading 18_share_tnc_price_-025/kpi_18_share_tnc_price_-025.yaml
2022-09-24 13:56:05,307 __main__ - INFO - Downloading 19_share_tnc_price_-050/kpi_19_share_tnc_price_-050.yaml
2022-09-24 13:56:05,626 __main__ - INFO - Downloading 16_share_tnc_price_+050/kpi_16_share_tnc_price_+050.yaml
2022-09-24 13:56:05,939 __main__ - INFO - Downloading 20_operating_cost_+100/kpi_20_operating_cost_+100.yaml
2022-09-24 13:56:06,268 __main__ - INFO - Downloading 21_operating_cost_+050/kpi_21_operating_cost_+050.yaml
2022-09-24 13:56:06,

In [19]:
dfs['mode_shares']

Unnamed: 0_level_0,Unnamed: 1_level_0,base_line,scenario_1_metric,scenario_2_metric,scenario_3_metric,scenario_4_metric,scenario_1_%change,scenario_2_%change,scenario_3_%change,scenario_4_%change,scenario_1_elasticity,scenario_2_elasticity,scenario_3_elasticity,scenario_4_elasticity,mean_elasticity
policy,category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
share_tnc_price,Bike,0.013801,0.013801,0.013945,0.013783,0.01372,0.5,0.25,-0.25,-0.5,0.0,0.042002,0.005169,0.011734,0.014726
share_tnc_price,Drive Alone,0.451296,0.451296,0.446147,0.442646,0.436751,0.5,0.25,-0.25,-0.5,0.0,-0.045637,0.076669,0.064458,0.023873
share_tnc_price,Public Transit,0.076135,0.076135,0.075774,0.076566,0.077937,0.5,0.25,-0.25,-0.5,0.0,-0.018955,-0.022664,-0.047339,-0.022239
share_tnc_price,Shared Ride,0.327816,0.327816,0.323108,0.324127,0.323203,0.5,0.25,-0.25,-0.5,0.0,-0.057441,0.045012,0.028146,0.003929
share_tnc_price,TNC - Pooled,0.005194,0.005194,0.008364,0.019629,0.029977,0.5,0.25,-0.25,-0.5,0.0,2.440976,-11.116689,-9.543173,-4.554721
share_tnc_price,TNC - Ride Alone,0.037553,0.037553,0.035679,0.031558,0.029704,0.5,0.25,-0.25,-0.5,0.0,-0.199618,0.638561,0.41804,0.214246
share_tnc_price,Walk,0.088205,0.088205,0.096982,0.091691,0.088708,0.5,0.25,-0.25,-0.5,0.0,0.398015,-0.158065,-0.011407,0.057136
operating_cost,Bike,0.013801,0.01857,0.016035,0.012696,0.011829,1.0,0.5,-0.25,-0.5,0.34557,0.32376,0.320211,0.285688,0.318807
operating_cost,Drive Alone,0.451296,0.416628,0.432435,0.451932,0.45994,1.0,0.5,-0.25,-0.5,-0.076819,-0.083586,-0.005638,-0.038306,-0.051087
operating_cost,Public Transit,0.076135,0.099184,0.088192,0.069617,0.064034,1.0,0.5,-0.25,-0.5,0.302743,0.316731,0.342415,0.317862,0.319938
