In [18]:
import logging

from activitysim.core import tracing
from activitysim.core import config
from activitysim.core import pipeline
from activitysim.core import simulate
from activitysim.core import inject
from activitysim.core import logit

# from .util import expressions
# from .util import estimation

logger = logging.getLogger(__name__)

In [19]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None  # default='warn'

In [73]:
## What it should be available at: 
households = pd.read_csv('../bay_area_base/data/households.csv')
persons = pd.read_csv('../bay_area_base/data/persons.csv').sample(10000)

households = households.set_index('household_id')
persons = persons.set_index('person_id')

persons['job_sector'] = np.random.randint(0,2, len(persons))

In [74]:
persons_merge = persons.merge(households, how = 'inner', left_on = 'household_id', right_index = True )

# ## telework_rates_csv
# telework_option_rates = pd.DataFrame({'age_category':[0,0,0,1,1,1,2,2,2,3,3,3], 
#                       'income_category':[0,1,2,0,1,2,0,1,2,0,1,2],
#                       'rate':np.random.rand(12)})

# telework_frequency_rates = pd.DataFrame({'0_days':[0.476],'1_days':[0.395],'2-3_days':[0.103],'4+_days':[0.026]})

# telework_daily_rates = pd.DataFrame({'0_days':[0.0],'1_days':[0.2],'2-3_days':[0.5],'4+_days':[0.8]})

In [11]:
telework_option_rates.to_csv('telework_option_rates.csv', index = False)

In [58]:
telework_option_anotate = pd.read_csv('annotate_telework_option.csv', comment = "#" )

In [59]:
telework_option_rates = pd.read_csv('telework_option_rates.csv')

In [60]:
telework_option_anotate

Unnamed: 0,Description,Target,Expression
0,job_sector,job_sector,df.job_sector
1,age_category,age_category,"pd.cut(df.age, [-np.inf,25,45,np.inf], labels ..."
2,income_category,income_category,"pd.cut(df.income, [-np.inf,50000,150000,np.inf..."


In [61]:
list(telework_option_rates.columns[:-1])

['job_sector', 'income_category', 'age_category']

## Telework as an Option

In [62]:
def find_index(array_, value):
    """
    Returns the index where value is first found in array. If value is not found, returns NaN
    
    Parameters:
    ------------
    - array: n-dimensional array. Array of shape (n,m)
    - value: 1d-array of shape (m,)
    """
    
    not_found = True
    i = 0
    while not_found:
        try: 
            comparison = array_[i,:] == value
        except IndexError:
            comparison  = False
            return np.nan
        
        if comparison.all():
            not_found = False
        else:
            i += 1
    return i

In [63]:
def find_rate(rates, category):
    "The df has the categories, and find the category combination in array and returns its index"

    index = []
    for cat in np.array(rates.drop(columns = 'rate')):
        i = find_index(category, cat)
        index.append(i)
    return index

def annotate(df, annotation):
    """ Annotates a dataframe with annotation
    Parameters: 
    ------------
    - df: Pandas DataFrame. Dataframe that reflects the annotation. 
    - annotation: Pandas DataFrame. DataFrame with Expressions to annotate in Dataframe. 
        This dataframe should have at least two columns: 
        - Target: str.  Name of the new column to annotate. 
        - Expression: str. Expression to evaluate with python eval. 
    
    Return: 
    --------
    Annotated dataFrame
    """
    for index, row in annotation.iterrows():
        default_local_dict = {'pd':pd, 'np': np, 'df':df}
        name = row['Target']
        expression = row['Expression']
        df[name] = eval(expression, {}, default_local_dict)
    return df

def create_dict_rate(rates, category):
    rates_copy = rates.copy(deep = True)

    corresponding_category = find_rate(rates_copy, category)
    rates_copy['category'] = corresponding_category
    
    return rates_copy.dropna().set_index('category')['rate'].to_dict()

In [79]:
# Filter and Annote
choosers = persons_merge[persons_merge.ptype.isin([1,2])]
choosers = annotate(choosers, telework_option_anotate)

In [84]:
# Processing: Add telework_option_rate for each agent. 
telework_rate_categories = list(telework_option_anotate.Target)
category, category_index = np.unique(choosers[telework_rate_categories].to_numpy(), axis=0, return_inverse=True)
choosers['telework_option_category'] = category_index
dict_cat_rate = create_dict_rate(telework_option_rates, category) #Dict categories and rates
choosers['telework_option_rate'] = choosers.telework_option_category.replace(dict_cat_rate)

In [227]:
# Simulation
trace_label = 'telework_option'
probs = choosers[['telework_option_rate']]
probs.insert(0,'0', 1 - probs.telework_option_rate)
choices, rands = logit.make_choices(probs, trace_label=trace_label)

In [228]:
# Simulation Result. Who telecommutes today. 
persons['telework_option'] = choices.reindex(persons.index).fillna(0).astype(bool)

In [229]:
persons

Unnamed: 0_level_0,age,earning,edu,hispanic,hours,member_id,race_id,relate,sex,student,...,hispanic.1,p_hispanic,MAR,TAZ,ptype,pemploy,pstudent,home_x,home_y,telework_option
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6064841,68,0.0,17.0,0,0.0,1,2,0,2,0,...,0,no,2,1256,5,3,3,-122.007278,38.258727,False
6143674,20,9300.0,20.0,1,20.0,2,1,2,1,1,...,1,yes,5,1285,3,1,2,-122.010185,38.368708,False
5109975,42,106000.0,22.0,0,40.0,1,1,0,1,0,...,0,no,1,508,1,1,3,-121.961474,37.259705,False
2728636,5,0.0,1.0,1,0.0,7,1,10,2,0,...,1,yes,5,1292,8,4,1,-122.269008,38.177331,False
3739808,69,0.0,16.0,0,0.0,2,1,1,2,0,...,0,no,1,196,5,3,3,-122.490676,37.663261,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2932506,2,0.0,0.0,0,0.0,4,1,2,1,0,...,0,no,5,112,8,4,1,-122.397070,37.763187,False
3500421,60,37800.0,16.0,0,40.0,1,6,0,1,0,...,0,no,1,73,1,1,3,-122.431086,37.787800,True
367816,58,69500.0,21.0,0,62.0,2,1,1,2,0,...,0,no,1,818,1,1,3,-122.068624,37.644138,True
5036748,27,25000.0,18.0,0,40.0,1,1,0,2,0,...,0,no,3,475,1,1,3,-121.982645,37.296902,True


In [232]:
#Change ptype to 4 (non-workers which makes mandatory trips unaveilable)
new_ptype = persons['ptype'].mask(persons['telework_option'], 4)

In [233]:
new_ptype

person_id
6064841    5
6143674    3
5109975    1
2728636    8
3739808    5
          ..
2932506    8
3500421    4
367816     4
5036748    4
1143364    5
Name: ptype, Length: 100, dtype: int64

In [None]:
# @inject.step()
def telework_option(
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor):
    
    """
    Rate-base telework as an option model. 

    Returns:
    ---------
    Simulation result of telework as an option. 
    """

    trace_label = 'telework_option'

    #Read Files
    model_settings = config.read_model_settings('telework_option.yaml')
    annotate_path = config.config_file_path(model_settings['annotation_file'])
    rates_path = config.config_file_path(model_settings['rates_file'])

    # telework_option_anotate = pd.read_csv('annotate_telework_option.csv', comment = "#" )
    telework_option_anotate = pd.read_csv(annotate_path, comment='#')
    telework_option_rates = pd.read(rates_path)

    #Choosers
    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.ptype.isin([1,2])] # Choosers are full- or part-time workers only
    choosers = annotate(choosers, telework_option_anotate)
    
    logger.info("Running %s with %d persons", trace_label, len(choosers))


    # Preprocessing: Add rates to choosers. 
    telework_rate_categories = list(telework_option_anotate.Target)
    category, category_index = np.unique(choosers[telework_rate_categories].to_numpy(), axis=0, return_inverse=True)
    choosers['telework_option_category'] = category_index
    dict_cat_rate = create_dict_rate(telework_option_rates, category) #Dict categories and rates
    choosers['telework_option_rate'] = choosers.telework_option_category.replace(dict_cat_rate)

    # Simulation
    probs = choosers[['telework_option_rate']]
    probs.insert(0,'0', 1 - probs.telework_option_rate)
    choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # Simulation Result. Who telecommutes today. 
    persons = persons.to_frame()
    persons['telework_option'] = choices.reindex(persons.index).fillna(0).astype(bool)
    persons['ptype'] = persons['ptype'].mask(persons['telework_option'], 4)

    pipeline.replace_table("persons", persons)
    tracing.print_summary('telework', persons.telework, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)

## Telework Frequency

In [66]:
telework_frequency_rates = pd.DataFrame({'0_days':[0.476],'1_days':[0.395],'2-3_days':[0.103],'4+_days':[0.026]})
telework_frequency_rates.to_csv('telework_frequency_rates.csv')

In [67]:
telework_daily_rates = pd.DataFrame({'rate_category':['0_days', '1_days','2-3_days','4+_days'],'rate':[0.0,0.2, 0.5, 0.8]})
telework_daily_rates.to_csv('telework_daily_rates.csv')

In [48]:
prob_telecommute = telework_daily_rates['rate'].to_dict()

In [54]:
persons_merge['telework_option'] = np.random.randint(0,2, size = len(persons_merge)).astype(bool)
choosers = persons_merge[persons_merge['telework_option']]

In [55]:
telework_frequency_rates

probs = pd.concat([telework_frequency_rates] * len(choosers))
probs.set_index(choosers.index, inplace=True)

In [56]:
# Simulation Telework Frequency
probs = pd.concat([telework_frequency_rates] * len(choosers))
probs.set_index(choosers.index, inplace=True)
choices, rands = logit.make_choices(probs, trace_label='hi', )
choosers['telework_frequency'] = choices

In [58]:

# Simulation Telework Today 
choosers['prob_telework_today'] = choosers['telework_frequency'].replace(prob_telecommute)
probs = choosers[['prob_telework_today']]

probs.insert(0,'0', 1 - probs.prob_telework_today)
choices, rands = logit.make_choices(probs, trace_label='hi')


In [62]:
persons['telework'] = choices.reindex(persons.index).fillna(0).astype(bool)

In [64]:
persons['telework'].sum()

8

In [65]:
len(persons)

100

In [None]:
def telework(
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor):
    
    """
    Rate-base telework as an option model. 

    Returns:
    ---------
    Simulation result of telework as an option. 
    """

    trace_label = 'telework'

    #Read Files
    model_settings = config.read_model_settings('telework.yaml')
    frequency_rates_path = config.config_file_path(model_settings['frequency_rates'])
    day_rates_path = config.config_file_path(model_settings['daily_rates'])

    # telework_option_anotate = pd.read_csv('annotate_telework_option.csv', comment = "#" )
    telework_frequency_rates = pd.read_csv(frequency_rates_path, comment='#')
    telework_daily_rates = pd.read(day_rates_path, comment='#')

    #Choosers
    choosers = persons_merged.to_frame()
    choosers = persons_merge[persons_merge['telework_option']] #Only those who have telework as an option
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    # Simulation Telework Frequency
    frequency_probs = pd.concat([telework_frequency_rates] * len(choosers))
    frequency_probs.set_index(choosers.index, inplace=True)
    choices, rands = logit.make_choices(frequency_probs, trace_label='telework_frequencies')
    choosers['telework_frequency'] = choices

    # Simulation Telework daily
    prob_telecommute = telework_daily_rates['rate'].to_dict()
    choosers['telework_rate'] = choosers['telework_frequency'].replace(prob_telecommute)
    telework_probs = choosers[['telework_Rate']]
    telework_probs.insert(0,'0', 1 - telework_probs.telework_rate)
    choices, rands = logit.make_choices(telework_probs, trace_label='telework_daily')

    persons['telework'] = choices.reindex(persons.index).fillna(0).astype(bool)
    persons['ptype'] = persons['ptype'].mask(persons['telework'], 4)

    pipeline.replace_table("persons", persons)
    tracing.print_summary('telework', persons.telework, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)