Merge branch 'master' into doc-updates-and-clean-up

ActivitySim · Nov 6, 2016 · e1af27d · e1af27d
2 parents 977f607 + 9000323
commit e1af27d
Show file tree

Hide file tree

Showing 8 changed files with 81 additions and 112 deletions.
diff --git a/activitysim/cdap/cdap.py b/activitysim/cdap/cdap.py
@@ -17,7 +17,8 @@
 
 logger = logging.getLogger(__name__)
 
-DUMP = False
+# FIXME - this allows us to turn some dev debug table dump code on and off - eventually remove?
+# DUMP = False
 
 _persons_index_ = 'PERID'
 _hh_index_ = 'HHID'
@@ -145,16 +146,9 @@ def assign_cdap_rank(persons, trace_hh_id=None, trace_label=None):
     p[_cdap_rank_] = rank
     persons[_cdap_rank_] = p[_cdap_rank_]  # assignment aligns on index values
 
-    # FIXME - as noted above, this is slow, the brute force code above is equivalent
-    # persons[_cdap_rank_] = persons\
-    #     .sort_values(by=[_hh_id_, _cdap_rank_, _age_], ascending=[True, True, True])\
-    #     .groupby(_hh_id_)[_hh_id_]\
-    #     .rank(method='first', na_option='top')\
-    #     .astype(int)
-
-    if DUMP:
-        tracing.trace_df(persons, '%s.DUMP.cdap_person_array' % trace_label,
-                         transpose=False, slicer='NONE')
+    # if DUMP:
+    #     tracing.trace_df(persons, '%s.DUMP.cdap_person_array' % trace_label,
+    #                      transpose=False, slicer='NONE')
 
     if trace_hh_id:
         tracing.trace_df(persons, '%s.cdap_rank' % trace_label)
@@ -193,9 +187,9 @@ def individual_utilities(
     useful_columns = [_hh_id_, _ptype_, _cdap_rank_, _hh_size_]
     indiv_utils[useful_columns] = persons[useful_columns]
 
-    if DUMP:
-        tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label,
-                         transpose=False, slicer='NONE')
+    # if DUMP:
+    #     tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label,
+    #                      transpose=False, slicer='NONE')
 
     if trace_hh_id:
         tracing.trace_df(individual_vars, '%s.individual_vars' % trace_label,
@@ -294,11 +288,11 @@ def build_cdap_spec(interaction_coefficients, hhsize,
     spec: pandas.DataFrame
 
     """
-    if DUMP:
-        # dump the interaction_coefficients table because it has been preprocessed
-        tracing.trace_df(interaction_coefficients,
-                         '%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize),
-                         transpose=False, slicer='NONE')
+    # if DUMP:
+    #     # dump the interaction_coefficients table because it has been preprocessed
+    #     tracing.trace_df(interaction_coefficients,
+    #                      '%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize),
+    #                      transpose=False, slicer='NONE')
 
     # cdap spec is same for all households of MAX_HHSIZE and greater
     hhsize = min(hhsize, MAX_HHSIZE)
@@ -396,7 +390,7 @@ def build_cdap_spec(interaction_coefficients, hhsize,
     # eval expression goes in the index
     spec.set_index(expression_name, inplace=True)
 
-    if DUMP or trace_spec:
+    if trace_spec:
         tracing.trace_df(spec, '%s.hhsize%d_spec' % (trace_label, hhsize),
                          transpose=False, slicer='NONE')
 
@@ -406,7 +400,7 @@ def build_cdap_spec(interaction_coefficients, hhsize,
         spec[c] =\
             spec[c].map(lambda x: d.get(x, x or 0.0)).fillna(0)
 
-    if DUMP or trace_spec:
+    if trace_spec:
         tracing.trace_df(spec, '%s.hhsize%d_spec_patched' % (trace_label, hhsize),
                          transpose=False, slicer='NONE')
 
@@ -606,22 +600,22 @@ def household_activity_choices(indiv_utils, interaction_coefficients, hhsize,
     # convert choice expressed as index into alternative name from util column label
     choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)
 
-    if DUMP:
-
-        if hhsize > 1:
-            tracing.trace_df(choosers, '%s.DUMP.hhsize%d_choosers' % (trace_label, hhsize),
-                             transpose=False, slicer='NONE')
-            tracing.trace_df(vars, '%s.DUMP.hhsize%d_vars' % (trace_label, hhsize),
-                             transpose=False, slicer='NONE')
-
-        tracing.trace_df(utils, '%s.DUMP.hhsize%d_utils' % (trace_label, hhsize),
-                         transpose=False, slicer='NONE')
-
-        tracing.trace_df(probs, '%s.DUMP.hhsize%d_probs' % (trace_label, hhsize),
-                         transpose=False, slicer='NONE')
-
-        tracing.trace_df(choices, '%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize),
-                         transpose=False, slicer='NONE')
+    # if DUMP:
+    #
+    #     if hhsize > 1:
+    #         tracing.trace_df(choosers, '%s.DUMP.hhsize%d_choosers' % (trace_label, hhsize),
+    #                          transpose=False, slicer='NONE')
+    #         tracing.trace_df(vars, '%s.DUMP.hhsize%d_vars' % (trace_label, hhsize),
+    #                          transpose=False, slicer='NONE')
+    #
+    #     tracing.trace_df(utils, '%s.DUMP.hhsize%d_utils' % (trace_label, hhsize),
+    #                      transpose=False, slicer='NONE')
+    #
+    #     tracing.trace_df(probs, '%s.DUMP.hhsize%d_probs' % (trace_label, hhsize),
+    #                      transpose=False, slicer='NONE')
+    #
+    #     tracing.trace_df(choices, '%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize),
+    #                      transpose=False, slicer='NONE')
 
     if trace_hh_id:
 
@@ -681,10 +675,10 @@ def unpack_cdap_indiv_activity_choices(persons, hh_choices,
 
     cdap_indiv_activity_choices = indiv_activity['cdap_activity']
 
-    if DUMP:
-        tracing.trace_df(cdap_indiv_activity_choices,
-                         '%s.DUMP.cdap_indiv_activity_choices' % trace_label,
-                         transpose=False, slicer='NONE')
+    # if DUMP:
+    #     tracing.trace_df(cdap_indiv_activity_choices,
+    #                      '%s.DUMP.cdap_indiv_activity_choices' % trace_label,
+    #                      transpose=False, slicer='NONE')
 
     return cdap_indiv_activity_choices
 
@@ -743,14 +737,14 @@ def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
     # convert choice from column index to activity name
     choices = pd.Series(probs.columns[idx_choices].values, index=probs.index)
 
-    if DUMP:
-        tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
-                         transpose=False, slicer='NONE')
-        tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
-                         transpose=False, slicer='NONE')
-        tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
-                         transpose=False,
-                         slicer='NONE')
+    # if DUMP:
+    #     tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
+    #                      transpose=False, slicer='NONE')
+    #     tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
+    #                      transpose=False, slicer='NONE')
+    #     tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
+    #                      transpose=False,
+    #                      slicer='NONE')
 
     if trace_hh_id:
         tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
@@ -826,11 +820,11 @@ def _run_cdap(
 
     cdap_results = persons[['cdap_rank', 'cdap_activity']]
 
-    if DUMP:
-        tracing.trace_df(hh_activity_choices, '%s.DUMP.hh_activity_choices' % trace_label,
-                         transpose=False, slicer='NONE')
-        tracing.trace_df(cdap_results, '%s.DUMP.cdap_results' % trace_label,
-                         transpose=False, slicer='NONE')
+    # if DUMP:
+    #     tracing.trace_df(hh_activity_choices, '%s.DUMP.hh_activity_choices' % trace_label,
+    #                      transpose=False, slicer='NONE')
+    #     tracing.trace_df(cdap_results, '%s.DUMP.cdap_results' % trace_label,
+    #                      transpose=False, slicer='NONE')
 
     # return dataframe with two columns
     return cdap_results

diff --git a/activitysim/defaults/models/destination.py b/activitysim/defaults/models/destination.py
@@ -80,13 +80,17 @@ def destination_choice(set_random_seed,
         # the segment is now available to switch between size terms
         locals_d['segment'] = kludge_name
 
-        logger.info("Running segment '%s' of size %d" % (name, len(segment)))
+        # FIXME - no point in considering impossible alternatives
+        alternatives_segment = alternatives[alternatives[kludge_name] > 0]
+
+        logger.info("Running segment '%s' of %d tours %d alternatives" %
+                    (name, len(segment), len(alternatives_segment)))
 
         # name index so tracing knows how to slice
         segment.index.name = 'tour_id'
 
         choices = asim.interaction_simulate(segment,
-                                            alternatives,
+                                            alternatives_segment,
                                             spec[[kludge_name]],
                                             skims=skims,
                                             locals_d=locals_d,

diff --git a/activitysim/defaults/models/mode.py b/activitysim/defaults/models/mode.py
@@ -219,7 +219,7 @@ def tour_mode_choice_simulate(tours_merged,
             trace_label=tracing.extend_trace_label(trace_label, tour_type),
             trace_choice_name='tour_mode_choice')
 
-        tracing.print_summary('tour_mode_choice_simulate %s' % tour_type,
+        tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type,
                               choices, value_counts=True)
 
         choices_list.append(choices)
@@ -230,7 +230,7 @@ def tour_mode_choice_simulate(tours_merged,
 
     choices = pd.concat(choices_list)
 
-    tracing.print_summary('tour_mode_choice_simulate all tour type',
+    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                           choices, value_counts=True)
 
     orca.add_column("tours", "mode", choices)
@@ -300,7 +300,7 @@ def trip_mode_choice_simulate(tours_merged,
             trace_choice_name='trip_mode_choice')
 
         # FIXME - no point in printing verbose value_counts now that we have tracing?
-        tracing.print_summary('trip_mode_choice_simulate %s' % tour_type,
+        tracing.print_summary('trip_mode_choice_simulate %s choices' % tour_type,
                               choices, value_counts=True)
 
         choices_list.append(choices)
@@ -311,7 +311,7 @@ def trip_mode_choice_simulate(tours_merged,
 
     choices = pd.concat(choices_list)
 
-    tracing.print_summary('trip_mode_choice_simulate all tour type',
+    tracing.print_summary('trip_mode_choice_simulate all tour type choices',
                           choices, value_counts=True)
 
     # FIXME - is this a NOP if trips table doesn't exist

diff --git a/activitysim/defaults/models/workplace_location.py b/activitysim/defaults/models/workplace_location.py
@@ -42,7 +42,7 @@ def workplace_location_simulate(set_random_seed,
 
     # for now I'm going to generate a workplace location for everyone -
     # presumably it will not get used in downstream models for everyone -
-    # it should depend on CDAP and mandatory tour generation as to whethrer
+    # it should depend on CDAP and mandatory tour generation as to whether
     # it gets used
     choosers = persons_merged.to_frame()
     alternatives = destination_size_terms.to_frame()

diff --git a/activitysim/tests/test_tracing.py b/activitysim/tests/test_tracing.py
@@ -224,17 +224,15 @@ def test_register_tours(capsys):
     # household id should not be None
     tracing.register_tours(df, trace_hh_id=None)
 
-    #
-    with pytest.raises(RuntimeError) as excinfo:
-        tracing.register_tours(df, trace_hh_id=5)
-    assert "register_tours called before register_persons" in str(excinfo.value)
+    tracing.register_tours(df, trace_hh_id=5)
 
     out, err = capsys.readouterr()
 
     # don't consume output
     print out
 
     assert "register_tours called with null trace_hh_id" in out
+    assert "no person ids registered for trace_hh_id 5" in out
 
     close_handlers()
 

diff --git a/activitysim/tracing.py b/activitysim/tracing.py
@@ -23,10 +23,6 @@
 logger = logging.getLogger(__name__)
 
 
-def trace_logger():
-    return logger
-
-
 def check_for_variability():
     return orca.get_injectable('check_for_variability')
 
@@ -170,10 +166,10 @@ def print_summary(label, df, describe=False, value_counts=False):
         logger.error("print_summary neither value_counts nor describe")
 
     if value_counts:
-        print "\n%s choices value counts:\n%s\n" % (label, df.value_counts())
+        print "\n%s value counts:\n%s\n" % (label, df.value_counts())
 
     if describe:
-        print "\n%s choices summary:\n%s\n" % (label, df.describe())
+        print "\n%s summary:\n%s\n" % (label, df.describe())
 
 
 def register_households(df, trace_hh_id):
@@ -235,23 +231,23 @@ def register_tours(df, trace_hh_id):
 
     # get list of persons in traced household (should already have been registered)
     person_ids = orca.get_injectable("trace_person_ids")
+    trace_tour_ids = []
 
-    # since trace_hh_id is defined, we expect some trace_person_ids
-    if not person_ids:
-        logger.error("register_tours called before register_persons")
-        raise RuntimeError('register_tours called before register_persons')
-
-    traced_tours_df = slice_ids(df, person_ids, column='person_id')
-
-    trace_tour_ids = traced_tours_df.index.tolist()
-    if len(trace_tour_ids) == 0:
-        logger.warn("register_tours: person_ids %s not found." % person_ids)
+    if len(person_ids) == 0:
+        # trace_hh_id not in households table or register_persons was not not called
+        logger.warn("no person ids registered for trace_hh_id %s" % trace_hh_id)
+    else:
+        # but if household_id is in households, then we expect some tours
+        traced_tours_df = slice_ids(df, person_ids, column='person_id')
+        trace_tour_ids = traced_tours_df.index.tolist()
+        if len(trace_tour_ids) == 0:
+            logger.info("register_tours: person_ids %s not found." % person_ids)
+        else:
+            logger.info("tracing tour_ids %s in %s tours" % (trace_tour_ids, len(df.index)))
 
     orca.add_injectable("trace_tour_ids", trace_tour_ids)
     logger.debug("register_tours injected trace_tour_ids %s" % trace_tour_ids)
 
-    logger.info("tracing tour_ids %s in %s tours" % (trace_tour_ids, len(df.index)))
-
 
 def register_persons(df, trace_hh_id):
     """
@@ -471,7 +467,6 @@ def get_trace_target(df, slicer):
     elif slicer == 'NONE':
         target_ids = None
     else:
-        logger.error("slice_canonically: bad slicer '%s'" % (slicer, ))
         raise RuntimeError("slice_canonically: bad slicer '%s'" % (slicer, ))
 
     if target_ids and not isinstance(target_ids, (list, tuple)):
@@ -575,27 +570,6 @@ def trace_df(df, label, slicer=None, columns=None,
                   column_labels=column_labels, transpose=transpose)
 
 
-def trace_nan_values(df, label):
-    """
-    Trace NaN values
-
-    Parameters
-    ----------
-    df: pandas.DataFrame
-        data frame
-    label: str
-        tracer name
-
-    Returns
-    -------
-    Nothing
-    """
-    df = slice_ids(df, orca.get_injectable('trace_person_ids'))
-    if np.isnan(df).any():
-        logger.warn("%s NaN values in %s" % (np.isnan(df).sum(), label))
-        write_df_csv(df, "%s.nan" % label)
-
-
 def interaction_trace_rows(interaction_df, choosers):
     """
     Trace model design for interaction_simulate

diff --git a/sandbox/configs/settings.yaml b/sandbox/configs/settings.yaml
@@ -25,7 +25,7 @@ households_sample_size: 1000
 # zero prob for school location with unfiltered alternatives
 # trace_hh_id: 862924
 
-trace_hh_id:  238516
+trace_hh_id:  2694596234
 
 # trace origin, destination in accessibility calculation
 # trace_od: [5, 11]

diff --git a/sandbox/simulation.py b/sandbox/simulation.py
@@ -86,7 +86,7 @@ def print_table_schema(table_names):
 
 
 def log_memory_info(message):
-    tracing.trace_logger().debug("%s %s" % (message, asim.memory_info()))
+    logger.debug("%s %s" % (message, asim.memory_info()))
 
 
 def set_random_seed():
@@ -103,8 +103,7 @@ def run_model(model_name):
 orca.add_injectable("output_dir", 'output')
 tracing.config_logger(os.path.join('configs', 'logging.yaml'))
 
-logger = tracing.trace_logger()
-
+logger = logging.getLogger('activitysim')
 
 # pandas display options
 pd.options.display.max_columns = 500
@@ -124,10 +123,10 @@ def run_model(model_name):
 #                 hh_chunk_size = 50000)
 
 inject_settings(config='example',
-                data='full',
-                households_sample_size=300000,
+                data='example',
+                households_sample_size=300,
                 preload_3d_skims=True,
-                chunk_size = 100000,
+                chunk_size = 0,
                 hh_chunk_size=0)
 
 print_settings()