Skip to content

Commit

Permalink
Merge branch 'master' into doc-updates-and-clean-up
Browse files Browse the repository at this point in the history
  • Loading branch information
bstabler committed Nov 6, 2016
2 parents 977f607 + 9000323 commit e1af27d
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 112 deletions.
102 changes: 48 additions & 54 deletions activitysim/cdap/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

logger = logging.getLogger(__name__)

DUMP = False
# FIXME - this allows us to turn some dev debug table dump code on and off - eventually remove?
# DUMP = False

_persons_index_ = 'PERID'
_hh_index_ = 'HHID'
Expand Down Expand Up @@ -145,16 +146,9 @@ def assign_cdap_rank(persons, trace_hh_id=None, trace_label=None):
p[_cdap_rank_] = rank
persons[_cdap_rank_] = p[_cdap_rank_] # assignment aligns on index values

# FIXME - as noted above, this is slow, the brute force code above is equivalent
# persons[_cdap_rank_] = persons\
# .sort_values(by=[_hh_id_, _cdap_rank_, _age_], ascending=[True, True, True])\
# .groupby(_hh_id_)[_hh_id_]\
# .rank(method='first', na_option='top')\
# .astype(int)

if DUMP:
tracing.trace_df(persons, '%s.DUMP.cdap_person_array' % trace_label,
transpose=False, slicer='NONE')
# if DUMP:
# tracing.trace_df(persons, '%s.DUMP.cdap_person_array' % trace_label,
# transpose=False, slicer='NONE')

if trace_hh_id:
tracing.trace_df(persons, '%s.cdap_rank' % trace_label)
Expand Down Expand Up @@ -193,9 +187,9 @@ def individual_utilities(
useful_columns = [_hh_id_, _ptype_, _cdap_rank_, _hh_size_]
indiv_utils[useful_columns] = persons[useful_columns]

if DUMP:
tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label,
transpose=False, slicer='NONE')
# if DUMP:
# tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label,
# transpose=False, slicer='NONE')

if trace_hh_id:
tracing.trace_df(individual_vars, '%s.individual_vars' % trace_label,
Expand Down Expand Up @@ -294,11 +288,11 @@ def build_cdap_spec(interaction_coefficients, hhsize,
spec: pandas.DataFrame
"""
if DUMP:
# dump the interaction_coefficients table because it has been preprocessed
tracing.trace_df(interaction_coefficients,
'%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize),
transpose=False, slicer='NONE')
# if DUMP:
# # dump the interaction_coefficients table because it has been preprocessed
# tracing.trace_df(interaction_coefficients,
# '%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize),
# transpose=False, slicer='NONE')

# cdap spec is same for all households of MAX_HHSIZE and greater
hhsize = min(hhsize, MAX_HHSIZE)
Expand Down Expand Up @@ -396,7 +390,7 @@ def build_cdap_spec(interaction_coefficients, hhsize,
# eval expression goes in the index
spec.set_index(expression_name, inplace=True)

if DUMP or trace_spec:
if trace_spec:
tracing.trace_df(spec, '%s.hhsize%d_spec' % (trace_label, hhsize),
transpose=False, slicer='NONE')

Expand All @@ -406,7 +400,7 @@ def build_cdap_spec(interaction_coefficients, hhsize,
spec[c] =\
spec[c].map(lambda x: d.get(x, x or 0.0)).fillna(0)

if DUMP or trace_spec:
if trace_spec:
tracing.trace_df(spec, '%s.hhsize%d_spec_patched' % (trace_label, hhsize),
transpose=False, slicer='NONE')

Expand Down Expand Up @@ -606,22 +600,22 @@ def household_activity_choices(indiv_utils, interaction_coefficients, hhsize,
# convert choice expressed as index into alternative name from util column label
choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)

if DUMP:

if hhsize > 1:
tracing.trace_df(choosers, '%s.DUMP.hhsize%d_choosers' % (trace_label, hhsize),
transpose=False, slicer='NONE')
tracing.trace_df(vars, '%s.DUMP.hhsize%d_vars' % (trace_label, hhsize),
transpose=False, slicer='NONE')

tracing.trace_df(utils, '%s.DUMP.hhsize%d_utils' % (trace_label, hhsize),
transpose=False, slicer='NONE')

tracing.trace_df(probs, '%s.DUMP.hhsize%d_probs' % (trace_label, hhsize),
transpose=False, slicer='NONE')

tracing.trace_df(choices, '%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize),
transpose=False, slicer='NONE')
# if DUMP:
#
# if hhsize > 1:
# tracing.trace_df(choosers, '%s.DUMP.hhsize%d_choosers' % (trace_label, hhsize),
# transpose=False, slicer='NONE')
# tracing.trace_df(vars, '%s.DUMP.hhsize%d_vars' % (trace_label, hhsize),
# transpose=False, slicer='NONE')
#
# tracing.trace_df(utils, '%s.DUMP.hhsize%d_utils' % (trace_label, hhsize),
# transpose=False, slicer='NONE')
#
# tracing.trace_df(probs, '%s.DUMP.hhsize%d_probs' % (trace_label, hhsize),
# transpose=False, slicer='NONE')
#
# tracing.trace_df(choices, '%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize),
# transpose=False, slicer='NONE')

if trace_hh_id:

Expand Down Expand Up @@ -681,10 +675,10 @@ def unpack_cdap_indiv_activity_choices(persons, hh_choices,

cdap_indiv_activity_choices = indiv_activity['cdap_activity']

if DUMP:
tracing.trace_df(cdap_indiv_activity_choices,
'%s.DUMP.cdap_indiv_activity_choices' % trace_label,
transpose=False, slicer='NONE')
# if DUMP:
# tracing.trace_df(cdap_indiv_activity_choices,
# '%s.DUMP.cdap_indiv_activity_choices' % trace_label,
# transpose=False, slicer='NONE')

return cdap_indiv_activity_choices

Expand Down Expand Up @@ -743,14 +737,14 @@ def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
# convert choice from column index to activity name
choices = pd.Series(probs.columns[idx_choices].values, index=probs.index)

if DUMP:
tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
transpose=False, slicer='NONE')
tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
transpose=False, slicer='NONE')
tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
transpose=False,
slicer='NONE')
# if DUMP:
# tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
# transpose=False, slicer='NONE')
# tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
# transpose=False, slicer='NONE')
# tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
# transpose=False,
# slicer='NONE')

if trace_hh_id:
tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
Expand Down Expand Up @@ -826,11 +820,11 @@ def _run_cdap(

cdap_results = persons[['cdap_rank', 'cdap_activity']]

if DUMP:
tracing.trace_df(hh_activity_choices, '%s.DUMP.hh_activity_choices' % trace_label,
transpose=False, slicer='NONE')
tracing.trace_df(cdap_results, '%s.DUMP.cdap_results' % trace_label,
transpose=False, slicer='NONE')
# if DUMP:
# tracing.trace_df(hh_activity_choices, '%s.DUMP.hh_activity_choices' % trace_label,
# transpose=False, slicer='NONE')
# tracing.trace_df(cdap_results, '%s.DUMP.cdap_results' % trace_label,
# transpose=False, slicer='NONE')

# return dataframe with two columns
return cdap_results
Expand Down
8 changes: 6 additions & 2 deletions activitysim/defaults/models/destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,17 @@ def destination_choice(set_random_seed,
# the segment is now available to switch between size terms
locals_d['segment'] = kludge_name

logger.info("Running segment '%s' of size %d" % (name, len(segment)))
# FIXME - no point in considering impossible alternatives
alternatives_segment = alternatives[alternatives[kludge_name] > 0]

logger.info("Running segment '%s' of %d tours %d alternatives" %
(name, len(segment), len(alternatives_segment)))

# name index so tracing knows how to slice
segment.index.name = 'tour_id'

choices = asim.interaction_simulate(segment,
alternatives,
alternatives_segment,
spec[[kludge_name]],
skims=skims,
locals_d=locals_d,
Expand Down
8 changes: 4 additions & 4 deletions activitysim/defaults/models/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def tour_mode_choice_simulate(tours_merged,
trace_label=tracing.extend_trace_label(trace_label, tour_type),
trace_choice_name='tour_mode_choice')

tracing.print_summary('tour_mode_choice_simulate %s' % tour_type,
tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type,
choices, value_counts=True)

choices_list.append(choices)
Expand All @@ -230,7 +230,7 @@ def tour_mode_choice_simulate(tours_merged,

choices = pd.concat(choices_list)

tracing.print_summary('tour_mode_choice_simulate all tour type',
tracing.print_summary('tour_mode_choice_simulate all tour type choices',
choices, value_counts=True)

orca.add_column("tours", "mode", choices)
Expand Down Expand Up @@ -300,7 +300,7 @@ def trip_mode_choice_simulate(tours_merged,
trace_choice_name='trip_mode_choice')

# FIXME - no point in printing verbose value_counts now that we have tracing?
tracing.print_summary('trip_mode_choice_simulate %s' % tour_type,
tracing.print_summary('trip_mode_choice_simulate %s choices' % tour_type,
choices, value_counts=True)

choices_list.append(choices)
Expand All @@ -311,7 +311,7 @@ def trip_mode_choice_simulate(tours_merged,

choices = pd.concat(choices_list)

tracing.print_summary('trip_mode_choice_simulate all tour type',
tracing.print_summary('trip_mode_choice_simulate all tour type choices',
choices, value_counts=True)

# FIXME - is this a NOP if trips table doesn't exist
Expand Down
2 changes: 1 addition & 1 deletion activitysim/defaults/models/workplace_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def workplace_location_simulate(set_random_seed,

# for now I'm going to generate a workplace location for everyone -
# presumably it will not get used in downstream models for everyone -
# it should depend on CDAP and mandatory tour generation as to whethrer
# it should depend on CDAP and mandatory tour generation as to whether
# it gets used
choosers = persons_merged.to_frame()
alternatives = destination_size_terms.to_frame()
Expand Down
6 changes: 2 additions & 4 deletions activitysim/tests/test_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,17 +224,15 @@ def test_register_tours(capsys):
# household id should not be None
tracing.register_tours(df, trace_hh_id=None)

#
with pytest.raises(RuntimeError) as excinfo:
tracing.register_tours(df, trace_hh_id=5)
assert "register_tours called before register_persons" in str(excinfo.value)
tracing.register_tours(df, trace_hh_id=5)

out, err = capsys.readouterr()

# don't consume output
print out

assert "register_tours called with null trace_hh_id" in out
assert "no person ids registered for trace_hh_id 5" in out

close_handlers()

Expand Down
54 changes: 14 additions & 40 deletions activitysim/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@
logger = logging.getLogger(__name__)


def trace_logger():
return logger


def check_for_variability():
return orca.get_injectable('check_for_variability')

Expand Down Expand Up @@ -170,10 +166,10 @@ def print_summary(label, df, describe=False, value_counts=False):
logger.error("print_summary neither value_counts nor describe")

if value_counts:
print "\n%s choices value counts:\n%s\n" % (label, df.value_counts())
print "\n%s value counts:\n%s\n" % (label, df.value_counts())

if describe:
print "\n%s choices summary:\n%s\n" % (label, df.describe())
print "\n%s summary:\n%s\n" % (label, df.describe())


def register_households(df, trace_hh_id):
Expand Down Expand Up @@ -235,23 +231,23 @@ def register_tours(df, trace_hh_id):

# get list of persons in traced household (should already have been registered)
person_ids = orca.get_injectable("trace_person_ids")
trace_tour_ids = []

# since trace_hh_id is defined, we expect some trace_person_ids
if not person_ids:
logger.error("register_tours called before register_persons")
raise RuntimeError('register_tours called before register_persons')

traced_tours_df = slice_ids(df, person_ids, column='person_id')

trace_tour_ids = traced_tours_df.index.tolist()
if len(trace_tour_ids) == 0:
logger.warn("register_tours: person_ids %s not found." % person_ids)
if len(person_ids) == 0:
# trace_hh_id not in households table or register_persons was not not called
logger.warn("no person ids registered for trace_hh_id %s" % trace_hh_id)
else:
# but if household_id is in households, then we expect some tours
traced_tours_df = slice_ids(df, person_ids, column='person_id')
trace_tour_ids = traced_tours_df.index.tolist()
if len(trace_tour_ids) == 0:
logger.info("register_tours: person_ids %s not found." % person_ids)
else:
logger.info("tracing tour_ids %s in %s tours" % (trace_tour_ids, len(df.index)))

orca.add_injectable("trace_tour_ids", trace_tour_ids)
logger.debug("register_tours injected trace_tour_ids %s" % trace_tour_ids)

logger.info("tracing tour_ids %s in %s tours" % (trace_tour_ids, len(df.index)))


def register_persons(df, trace_hh_id):
"""
Expand Down Expand Up @@ -471,7 +467,6 @@ def get_trace_target(df, slicer):
elif slicer == 'NONE':
target_ids = None
else:
logger.error("slice_canonically: bad slicer '%s'" % (slicer, ))
raise RuntimeError("slice_canonically: bad slicer '%s'" % (slicer, ))

if target_ids and not isinstance(target_ids, (list, tuple)):
Expand Down Expand Up @@ -575,27 +570,6 @@ def trace_df(df, label, slicer=None, columns=None,
column_labels=column_labels, transpose=transpose)


def trace_nan_values(df, label):
"""
Trace NaN values
Parameters
----------
df: pandas.DataFrame
data frame
label: str
tracer name
Returns
-------
Nothing
"""
df = slice_ids(df, orca.get_injectable('trace_person_ids'))
if np.isnan(df).any():
logger.warn("%s NaN values in %s" % (np.isnan(df).sum(), label))
write_df_csv(df, "%s.nan" % label)


def interaction_trace_rows(interaction_df, choosers):
"""
Trace model design for interaction_simulate
Expand Down
2 changes: 1 addition & 1 deletion sandbox/configs/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ households_sample_size: 1000
# zero prob for school location with unfiltered alternatives
# trace_hh_id: 862924

trace_hh_id: 238516
trace_hh_id: 2694596234

# trace origin, destination in accessibility calculation
# trace_od: [5, 11]
Expand Down
11 changes: 5 additions & 6 deletions sandbox/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def print_table_schema(table_names):


def log_memory_info(message):
tracing.trace_logger().debug("%s %s" % (message, asim.memory_info()))
logger.debug("%s %s" % (message, asim.memory_info()))


def set_random_seed():
Expand All @@ -103,8 +103,7 @@ def run_model(model_name):
orca.add_injectable("output_dir", 'output')
tracing.config_logger(os.path.join('configs', 'logging.yaml'))

logger = tracing.trace_logger()

logger = logging.getLogger('activitysim')

# pandas display options
pd.options.display.max_columns = 500
Expand All @@ -124,10 +123,10 @@ def run_model(model_name):
# hh_chunk_size = 50000)

inject_settings(config='example',
data='full',
households_sample_size=300000,
data='example',
households_sample_size=300,
preload_3d_skims=True,
chunk_size = 100000,
chunk_size = 0,
hh_chunk_size=0)

print_settings()
Expand Down

0 comments on commit e1af27d

Please sign in to comment.