In [12]:
from __future__ import (absolute_import, division, print_function, )
from future.standard_library import install_aliases
install_aliases()  # noqa: E402

import sys
import logging
import argparse

from activitysim.core import inject
from activitysim.core import tracing
from activitysim.core import config
from activitysim.core import pipeline
from activitysim.core import mp_tasks
from activitysim.core import chunk
# from activitysim.cli import run

logger = logging.getLogger('activitysim')

In [21]:
def log_settings():

    settings = [
        'households_sample_size',
        'chunk_size',
        'multiprocess',
        'num_processes',
        'resume_after',
    ]

    for k in settings:
        logger.info("setting %s: %s" % (k, config.setting(k)))

In [26]:
def cleanup_output_files():

    active_log_files = \
        [h.baseFilename for h in logger.root.handlers if isinstance(h, logging.FileHandler)]
    tracing.delete_output_files('log', ignore=active_log_files)

    tracing.delete_output_files('h5')
    tracing.delete_output_files('csv')
    tracing.delete_output_files('txt')
    tracing.delete_output_files('yaml')
    tracing.delete_output_files('prof')
    tracing.delete_output_files('omx')

In [22]:
injectables = ['data_dir','configs_dir','output_dir']

inject.add_injectable('data_dir', 'data')
inject.add_injectable('configs_dir', ['configs', 'configs/configs'])

config.filter_warnings()
tracing.config_logger()

log_settings()

t0 = tracing.print_elapsed_time()

INFO - activitysim - Read logging configuration from: configs/logging.yaml
INFO - activitysim - setting households_sample_size: 0
INFO - activitysim - setting chunk_size: 4000000000
INFO - activitysim - setting multiprocess: True
INFO - activitysim - setting num_processes: 30
INFO - activitysim - setting resume_after: _


In [23]:
injectables = {k: inject.get_injectable(k) for k in injectables}

In [24]:
resume_after = config.setting('resume_after', None)

In [27]:
if not resume_after:
    cleanup_output_files()

In [29]:
if config.setting('multiprocess', False):
    logger.info('run multiprocess simulation')

INFO - activitysim - run multiprocess simulation


In [31]:
run_list = mp_tasks.get_run_list()

OSError: Could not find saved breadcrumbs file 'output/breadcrumbs.yaml'

In [33]:
hdf.close()

In [38]:
import geopandas as gpd
import os 

In [39]:
os.getcwd()

'/home/ubuntu/ual_asim/activitysim/bay_area_mp'

In [40]:
blocks = gpd.read_file('data/blocks.shp')

In [42]:
blocks.COUNTY.nunique()

9

## Reading OMX file 

In [4]:
import openmatrix as omx 

In [2]:
!omx-validate data/skims.omx

File contents: data/skims.omx
data/skims.omx (File) ''
Last modif.: 'Wed Jul 15 01:08:09 2020'
Object Tree: 
/ (RootGroup) ''
/data (Group) ''
/lookup (Group) ''


Check 1: Has OMX_VERSION attribute set to 0.2
  File version is 0.2: Pass

Check 2: Has SHAPE array attribute set to two item integer array

Check 3: Has data group for matrices
  Group: Pass
  Number of Matrices: 0
  Matrix names: []

Check 4: Matrix shape matches file shape

Check 5: Uses common data types (float or int) for matrices

Check 6: Matrices chunked for faster I/O

Check 7: Uses zlib compression if compression used

Check 8: Has NA attribute if desired (but not required)

Check 9: Has lookup group for labels/indexes if desired (but not required)
  Group: Pass
  Number of Lookups: 0
  Lookups names: []

Check 10: Lookup shapes are 1-d and match file shape

Check 11: Uses common data types (int or str) for lookups

Check 12: Has Lookup DIM attribute of 0 (row) or 1 (column) if

In [5]:
myfile = omx.open_file('data/skims.omx')

In [8]:
print(myfile.shape())

None


In [27]:
import pandas as pd 
import numpy as np

In [22]:
skims_imputed = pd.read_csv("s3://baus-data/spring_2019/beam_skims_imputed.csv")

In [20]:
beam_skims = pd.read_csv('s3://baus-data/spring_2019/30.skims-smart-23April2019-baseline.csv.gz')

In [23]:
beam_skims.head()

Unnamed: 0,hour,mode,origTaz,destTaz,travelTimeInS,generalizedTimeInS,cost,generalizedCost,distanceInM,numObservations,energy
0,16,CAR,1136,922,933.0,853.4,0.0,12.782407,28173.405,1,48377200.0
1,17,CAR,106,241,2592.0,2572.1,0.0,26.092245,26881.345,1,51728080.0
2,15,WALK,108,110,482.0,482.0,0.0,5.903581,625.999,1,33177.95
3,6,DRIVE_TRANSIT,360,390,816.0,1200.0,3.000917,7.587518,11641.535621,1,20135060.0
4,10,CAR,1289,1220,2976.0,2745.1,0.0,15.885995,70944.826,1,122730800.0


# To do: Look for code that imputed this and save it in a jupyter notebook by it self. Try to maintain the same format 

In [28]:
num_hours = len(beam_skims['hour'].unique())
num_modes = len(beam_skims['mode'].unique())
num_od_pairs = len(beam_skims) / num_hours / num_modes
num_taz = np.sqrt(num_od_pairs)
assert num_taz.is_integer()
num_taz = int(num_taz)

AssertionError: 

In [29]:
num_hours

24

In [30]:
num_modes

8

In [31]:
num_od_pairs

5312.635416666667

In [32]:
num_taz

72.88782763031607

In [33]:
#Load mtc skims, load raw beam skims 

In [None]:
#Source code: https://github.com/ual/activitysynth/blob/master/activitysynth/scripts/utils.py
def impute_missing_skims(mtc_skims, beam_skims_raw):
    df = beam_skims_raw.to_frame()

    # seconds to minutes
    df['gen_tt'] = df['generalizedTimeInS'] / 60

    mtc = mtc_skims.to_frame(columns=['orig', 'dest', 'da_distance_AM'])
    mtc.rename(
        columns={'orig': 'from_zone_id', 'dest': 'to_zone_id'},
        inplace=True)
    mtc.set_index(['from_zone_id', 'to_zone_id'], inplace=True)

    # miles to meters
    mtc['dist'] = mtc['da_distance_AM'] * 1609.34

    # impute mtc zone-to-zone distances where zero-valued in beam skims
    if len(df.loc[df['distanceInM'] == 0, 'distanceInM']) > 0:
        df.loc[df['distanceInM'] == 0, 'distanceInM'] = mtc.loc[
            pd.MultiIndex.from_frame(df.loc[df['distanceInM'] == 0, [
                'from_zone_id', 'to_zone_id']]), 'dist'].values

    # use MTC dists for all intra-taz distances
    intra_taz_mask = df['from_zone_id'] == df['to_zone_id']
    df.loc[intra_taz_mask, 'distanceInM'] = mtc.loc[pd.MultiIndex.from_frame(
        df.loc[intra_taz_mask, ['from_zone_id', 'to_zone_id']]), 'dist'].values


    # create morning peak lookup
    df['gen_time_per_m'] = df['gen_tt'] / df['distanceInM']
    df['gen_cost_per_m'] = df['gen_cost'] / df['distanceInM']
    df.loc[df['hour'].isin([7, 8, 9]), 'period'] = 'AM'
    df_am = df[df['period'] == 'AM']
    df_am = df_am.replace([np.inf, -np.inf], np.nan)
    df_am = df_am.loc[df_am.index.repeat(df_am.numObservations)]  # weighted
    am_lookup = df_am[[
        'mode', 'gen_time_per_m', 'gen_cost_per_m']].dropna().groupby(
            ['mode']).mean().reset_index()

    # morning averages
    df_am_avg = df_am[[
        'from_zone_id', 'to_zone_id', 'mode', 'gen_tt',
        'gen_cost']].groupby(
        ['from_zone_id', 'to_zone_id', 'mode']).mean().reset_index()

    # long to wide
    df_am_pivot = df_am_avg.pivot_table(
        index=['from_zone_id', 'to_zone_id'], columns='mode')
    df_am_pivot.columns = ['_'.join(col) for col in df_am_pivot.columns.values]

    # combine with mtc-based dists
    merged = pd.merge(
        mtc[['dist']], df_am_pivot, left_index=True, right_index=True,
        how='left')

    # impute
    for mode in am_lookup['mode'].values:
        for impedance in ['gen_tt', 'gen_cost']:
            if impedance == 'gen_tt':
                lookup_col = 'gen_time_per_m'
            elif impedance == 'gen_cost':
                lookup_col = 'gen_cost_per_m'
            colname = impedance + '_' + mode
            lookup_val = am_lookup.loc[
                am_lookup['mode'] == mode, lookup_col].values[0]
            merged.loc[pd.isnull(merged[colname]), colname] = merged.loc[
                pd.isnull(merged[colname]), 'dist'] * lookup_val

    assert len(merged) == 2114116

    return merged