Skip to content

Commit

Permalink
Merge pull request #76 from UDST/get_data_col_filter_belt_and_suspenders
Browse files Browse the repository at this point in the history
quick fix to handle orca merge_tables bug where more columns are retu…
  • Loading branch information
mxndrwgrdnr committed Dec 13, 2018
2 parents ec3ef21 + 84b7dcb commit 793c807
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 6 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='urbansim_templates',
version='0.1.dev21',
version='0.1.dev22',
description='UrbanSim extension for managing model steps',
author='UrbanSim Inc.',
author_email='info@urbansim.com',
Expand Down
2 changes: 1 addition & 1 deletion urbansim_templates/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = __version__ = '0.1.dev21'
version = __version__ = '0.1.dev22'
4 changes: 2 additions & 2 deletions urbansim_templates/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_get_data(orca_session):
filters = ['age > 20', 'age < 50'],
extra_columns = 'zone_id')

assert(set(df.columns) == set(['tenure', 'pop', 'age', 'building_id', 'zone_id']))
assert(set(df.columns) == set(['tenure', 'pop', 'age', 'zone_id']))
assert(len(df) == 2)


Expand All @@ -84,7 +84,7 @@ def test_get_data_bad_columns(orca_session):
df = utils.get_data(tables = ['households', 'buildings'],
model_expression = 'tenure ~ pop + potato')

assert(set(df.columns) == set(['tenure', 'pop', 'building_id']))
assert(set(df.columns) == set(['tenure', 'pop']))



Expand Down
9 changes: 7 additions & 2 deletions urbansim_templates/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ def get_data(tables, fallback_tables=None, filters=None, model_expression=None,
Default behavior is for the output to inclue all columns. If a model_expression and/or
extra_columns is provided, non-relevant columns will be dropped from the output.
Relevant columns include any mentioned in the model expression, filters, or list of
extras, plus join keys if the data is drawn from multiple tables.
extras. Join keys will *not* be included in the final output even if the data is drawn
from multiple tables, unless they appear in the model expression or filters as well.
If a named column is not found in the source tables, it will just be skipped. This is
to support use cases where data is assembled separately for choosers and alternatives
Expand Down Expand Up @@ -199,7 +200,11 @@ def get_data(tables, fallback_tables=None, filters=None, model_expression=None,

else:
df = orca.merge_tables(target=tables[0], tables=tables, columns=colnames)


if colnames is not None:
if len(df.columns) > len(colnames):
df = df[colnames]

df = apply_filter_query(df, filters)
return df

Expand Down

0 comments on commit 793c807

Please sign in to comment.