Merge pull request #76 from UDST/get_data_col_filter_belt_and_suspenders

quick fix to handle orca merge_tables bug where more columns are retu…
UDST · Dec 13, 2018 · 793c807 · 793c807
2 parents ec3ef21 + 84b7dcb
commit 793c807
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 6 deletions.
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name='urbansim_templates',
-    version='0.1.dev21',
+    version='0.1.dev22',
     description='UrbanSim extension for managing model steps',
     author='UrbanSim Inc.',
     author_email='info@urbansim.com',

diff --git a/urbansim_templates/__init__.py b/urbansim_templates/__init__.py
@@ -1 +1 @@
-version = __version__ = '0.1.dev21'
+version = __version__ = '0.1.dev22'
diff --git a/urbansim_templates/tests/test_utils.py b/urbansim_templates/tests/test_utils.py
@@ -63,7 +63,7 @@ def test_get_data(orca_session):
                         filters = ['age > 20', 'age < 50'],
                         extra_columns = 'zone_id')
 
-    assert(set(df.columns) == set(['tenure', 'pop', 'age', 'building_id', 'zone_id']))
+    assert(set(df.columns) == set(['tenure', 'pop', 'age', 'zone_id']))
     assert(len(df) == 2)
 
 
@@ -84,7 +84,7 @@ def test_get_data_bad_columns(orca_session):
     df = utils.get_data(tables = ['households', 'buildings'], 
                         model_expression = 'tenure ~ pop + potato')
 
-    assert(set(df.columns) == set(['tenure', 'pop', 'building_id']))
+    assert(set(df.columns) == set(['tenure', 'pop']))
 
 
 

diff --git a/urbansim_templates/utils.py b/urbansim_templates/utils.py
@@ -137,7 +137,8 @@ def get_data(tables, fallback_tables=None, filters=None, model_expression=None,
     Default behavior is for the output to inclue all columns. If a model_expression and/or
     extra_columns is provided, non-relevant columns will be dropped from the output.
     Relevant columns include any mentioned in the model expression, filters, or list of 
-    extras, plus join keys if the data is drawn from multiple tables.
+    extras. Join keys will *not* be included in the final output even if the data is drawn
+    from multiple tables, unless they appear in the model expression or filters as well.
     
     If a named column is not found in the source tables, it will just be skipped. This is 
     to support use cases where data is assembled separately for choosers and alternatives 
@@ -199,7 +200,11 @@ def get_data(tables, fallback_tables=None, filters=None, model_expression=None,
 
     else:
         df = orca.merge_tables(target=tables[0], tables=tables, columns=colnames)
-
+
+    if colnames is not None:
+        if len(df.columns) > len(colnames):
+            df = df[colnames]
+
     df = apply_filter_query(df, filters)
     return df