From 824b457f98f2ed97e3f8a7871283605dc5cc23a6 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 29 Jul 2014 15:19:47 -0700 Subject: [PATCH 1/4] Add ability to get local columns from wrapped functions The .local_column attribute on all wrapped tables should now work. On wrapped functions (both tables and sources), it will evaluate the function, store frame metadata, and return the frame columns. --- urbansim/sim/simulation.py | 48 ++++++++++++++++++++++----- urbansim/sim/tests/test_simulation.py | 22 ++++++++++++ 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/urbansim/sim/simulation.py b/urbansim/sim/simulation.py index 7a7cf1ab..1a2d0622 100644 --- a/urbansim/sim/simulation.py +++ b/urbansim/sim/simulation.py @@ -54,7 +54,15 @@ def columns(self): Columns in this table. """ - return list(self._frame.columns) + _list_columns_for_table(self.name) + return self.local_columns + _list_columns_for_table(self.name) + + @property + def local_columns(self): + """ + Columns that are part of the wrapped DataFrame. + + """ + return list(self._frame.columns) @property def index(self): @@ -159,11 +167,25 @@ def __init__(self, name, func): @property def columns(self): """ - Columns in this table. (May often be out of date.) + Columns in this table. (May contain only computed columns + if the wrapped function has not been called yet.) """ return self._columns + _list_columns_for_table(self.name) + @property + def local_columns(self): + """ + Only the columns contained in the DataFrame returned by the + wrapped function. (No registered columns included.) + + """ + if self._columns: + return self._columns + else: + self._call_func() + return self._columns + @property def index(self): """ @@ -173,6 +195,19 @@ def index(self): """ return self._index + def _call_func(self): + """ + Call the wrapped function and return the result. Also updates + attributes like columns, index, and length. + + """ + kwargs = _collect_injectables(self._arg_list) + frame = self._func(**kwargs) + self._columns = list(frame.columns) + self._index = frame.index + self._len = len(frame) + return frame + def to_frame(self, columns=None): """ Make a DataFrame with the given columns. @@ -188,11 +223,7 @@ def to_frame(self, columns=None): frame : pandas.DataFrame """ - kwargs = _collect_injectables(self._arg_list) - frame = self._func(**kwargs) - self._columns = list(frame.columns) - self._index = frame.index - self._len = len(frame) + frame = self._call_func() return _DataFrameWrapper(self.name, frame).to_frame(columns) def get_column(self, column_name): @@ -249,8 +280,7 @@ def to_frame(self, columns=None): frame : pandas.DataFrame """ - kwargs = _collect_injectables(self._arg_list) - frame = self._func(**kwargs) + frame = self._call_func() add_table(self.name, frame) return _DataFrameWrapper(self.name, frame).to_frame(columns) diff --git a/urbansim/sim/tests/test_simulation.py b/urbansim/sim/tests/test_simulation.py index 388183a5..a3ec993e 100644 --- a/urbansim/sim/tests/test_simulation.py +++ b/urbansim/sim/tests/test_simulation.py @@ -34,6 +34,7 @@ def test_func(test_frame): table = sim.get_table('test_frame') assert table.columns == ['a', 'b'] + assert table.local_columns == ['a', 'b'] assert len(table) == 3 pdt.assert_index_equal(table.index, df.index) pdt.assert_series_equal(table.get_column('a'), df.a) @@ -289,9 +290,30 @@ def source(): test_df = table.to_frame() pdt.assert_frame_equal(test_df, df) + assert table.columns == list(df.columns) + assert len(table) == len(df) + pdt.assert_index_equal(table.index, df.index) table = sim.get_table('source') assert isinstance(table, sim._DataFrameWrapper) test_df = table.to_frame() pdt.assert_frame_equal(test_df, df) + + +def test_table_func_local_cols(clear_sim, df): + @sim.table('table') + def table(): + return df + sim.add_column('table', 'new', pd.Series(['a', 'b', 'c'], index=df.index)) + + assert sim.get_table('table').local_columns == ['a', 'b'] + + +def test_table_source_local_cols(clear_sim, df): + @sim.table_source('source') + def source(): + return df + sim.add_column('source', 'new', pd.Series(['a', 'b', 'c'], index=df.index)) + + assert sim.get_table('source').local_columns == ['a', 'b'] From e204569e37df06565696108e921004296fe7dba9 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 29 Jul 2014 15:54:16 -0700 Subject: [PATCH 2/4] Add .convert method to table source wrapper The .convert method evaluates the wrapped function, registers the DataFrame under the same name, and returns the new wrapped DataFrame. --- urbansim/sim/simulation.py | 14 +++++++++++--- urbansim/sim/tests/test_simulation.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/urbansim/sim/simulation.py b/urbansim/sim/simulation.py index 1a2d0622..7de99509 100644 --- a/urbansim/sim/simulation.py +++ b/urbansim/sim/simulation.py @@ -263,6 +263,16 @@ class _TableSourceWrapper(_TableFuncWrapper): func : callable """ + def convert(self): + """ + Evaluate the wrapped function, store the returned DataFrame as a + table, and return the new _DataFrameWrapper instance created. + + """ + frame = self._call_func() + add_table(self.name, frame) + return get_table(self.name) + def to_frame(self, columns=None): """ Make a DataFrame with the given columns. The first time this @@ -280,9 +290,7 @@ def to_frame(self, columns=None): frame : pandas.DataFrame """ - frame = self._call_func() - add_table(self.name, frame) - return _DataFrameWrapper(self.name, frame).to_frame(columns) + return self.convert().to_frame(columns) class _ColumnFuncWrapper(object): diff --git a/urbansim/sim/tests/test_simulation.py b/urbansim/sim/tests/test_simulation.py index a3ec993e..4b285cda 100644 --- a/urbansim/sim/tests/test_simulation.py +++ b/urbansim/sim/tests/test_simulation.py @@ -301,6 +301,22 @@ def source(): pdt.assert_frame_equal(test_df, df) +def test_table_source_convert(clear_sim, df): + @sim.table_source('source') + def source(): + return df + + table = sim.get_table('source') + assert isinstance(table, sim._TableSourceWrapper) + + table = table.convert() + assert isinstance(table, sim._DataFrameWrapper) + pdt.assert_frame_equal(table.to_frame(), df) + + table2 = sim.get_table('source') + assert table2 is table + + def test_table_func_local_cols(clear_sim, df): @sim.table('table') def table(): From 42facd58cc2d33bf0eeaf05422d3cfae22615bde Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 29 Jul 2014 16:08:27 -0700 Subject: [PATCH 3/4] add_table and add_source_table return their wrapped results --- urbansim/sim/simulation.py | 17 ++++++++++++++--- urbansim/sim/tests/test_simulation.py | 3 ++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/urbansim/sim/simulation.py b/urbansim/sim/simulation.py index 7de99509..2af478be 100644 --- a/urbansim/sim/simulation.py +++ b/urbansim/sim/simulation.py @@ -270,8 +270,7 @@ def convert(self): """ frame = self._call_func() - add_table(self.name, frame) - return get_table(self.name) + return add_table(self.name, frame) def to_frame(self, columns=None): """ @@ -429,6 +428,10 @@ def add_table(table_name, table): names will be matched to known tables, which will be injected when this function is called. + Returns + ------- + wrapped : `_DataFrameWrapper` or `_TableFuncWrapper` + """ if isinstance(table, pd.DataFrame): table = _DataFrameWrapper(table_name, table) @@ -439,6 +442,8 @@ def add_table(table_name, table): _TABLES[table_name] = table + return table + def table(table_name): """ @@ -468,8 +473,14 @@ def add_table_source(table_name, func): Function argument names will be matched to known injectables, which will be injected when this function is called. + Returns + ------- + wrapped : `_TableSourceWrapper` + """ - _TABLES[table_name] = _TableSourceWrapper(table_name, func) + wrapped = _TableSourceWrapper(table_name, func) + _TABLES[table_name] = wrapped + return wrapped def table_source(table_name): diff --git a/urbansim/sim/tests/test_simulation.py b/urbansim/sim/tests/test_simulation.py index 4b285cda..6806b1bd 100644 --- a/urbansim/sim/tests/test_simulation.py +++ b/urbansim/sim/tests/test_simulation.py @@ -24,7 +24,7 @@ def df(): def test_tables(df, clear_sim): - sim.add_table('test_frame', df) + wrapped_df = sim.add_table('test_frame', df) @sim.table('test_func') def test_func(test_frame): @@ -33,6 +33,7 @@ def test_func(test_frame): assert set(sim.list_tables()) == {'test_frame', 'test_func'} table = sim.get_table('test_frame') + assert table is wrapped_df assert table.columns == ['a', 'b'] assert table.local_columns == ['a', 'b'] assert len(table) == 3 From d47122a7895ac9811834e78e64d13ef7375c1702 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 29 Jul 2014 16:11:17 -0700 Subject: [PATCH 4/4] Upgrade table wrappers to public classes These are going to be commonly used by users so they shouldn't be hidden as private classes in the sim module. --- urbansim/sim/simulation.py | 24 ++++++++++++------------ urbansim/sim/tests/test_mergetables.py | 12 ++++++------ urbansim/sim/tests/test_simulation.py | 8 ++++---- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/urbansim/sim/simulation.py b/urbansim/sim/simulation.py index 2af478be..e3991df7 100644 --- a/urbansim/sim/simulation.py +++ b/urbansim/sim/simulation.py @@ -32,7 +32,7 @@ class SimulationError(Exception): pass -class _DataFrameWrapper(object): +class DataFrameWrapper(object): """ Wraps a DataFrame so it can provide certain columns and handle computed columns. @@ -144,7 +144,7 @@ def __len__(self): return len(self._frame) -class _TableFuncWrapper(object): +class TableFuncWrapper(object): """ Wrap a function that provides a DataFrame. @@ -224,7 +224,7 @@ def to_frame(self, columns=None): """ frame = self._call_func() - return _DataFrameWrapper(self.name, frame).to_frame(columns) + return DataFrameWrapper(self.name, frame).to_frame(columns) def get_column(self, column_name): """ @@ -251,7 +251,7 @@ def __len__(self): return self._len -class _TableSourceWrapper(_TableFuncWrapper): +class TableSourceWrapper(TableFuncWrapper): """ Wraps a function that returns a DataFrame. After the function is evaluated the returned DataFrame replaces the function in the @@ -266,7 +266,7 @@ class _TableSourceWrapper(_TableFuncWrapper): def convert(self): """ Evaluate the wrapped function, store the returned DataFrame as a - table, and return the new _DataFrameWrapper instance created. + table, and return the new DataFrameWrapper instance created. """ frame = self._call_func() @@ -430,13 +430,13 @@ def add_table(table_name, table): Returns ------- - wrapped : `_DataFrameWrapper` or `_TableFuncWrapper` + wrapped : `DataFrameWrapper` or `TableFuncWrapper` """ if isinstance(table, pd.DataFrame): - table = _DataFrameWrapper(table_name, table) + table = DataFrameWrapper(table_name, table) elif isinstance(table, Callable): - table = _TableFuncWrapper(table_name, table) + table = TableFuncWrapper(table_name, table) else: raise TypeError('table must be DataFrame or function.') @@ -475,10 +475,10 @@ def add_table_source(table_name, func): Returns ------- - wrapped : `_TableSourceWrapper` + wrapped : `TableSourceWrapper` """ - wrapped = _TableSourceWrapper(table_name, func) + wrapped = TableSourceWrapper(table_name, func) _TABLES[table_name] = wrapped return wrapped @@ -506,7 +506,7 @@ def get_table(table_name): Returns ------- - table : _DataFrameWrapper or _TableFuncWrapper + table : `DataFrameWrapper`, `TableFuncWrapper`, or `TableSourceWrapper` """ if table_name in _TABLES: @@ -804,7 +804,7 @@ def merge_tables(target, tables, columns=None): ---------- target : str Name of the table onto which tables will be merged. - tables : list of _DataFrameWrapper or _TableFuncWrapper + tables : list of `DataFrameWrapper` or `TableFuncWrapper` All of the tables to merge. Should include the target table. columns : list of str, optional If given, columns will be mapped to `tables` and only those columns diff --git a/urbansim/sim/tests/test_mergetables.py b/urbansim/sim/tests/test_mergetables.py index ffa05aad..846a2f8d 100644 --- a/urbansim/sim/tests/test_mergetables.py +++ b/urbansim/sim/tests/test_mergetables.py @@ -9,7 +9,7 @@ @pytest.fixture def dfa(): - return sim._DataFrameWrapper('a', pd.DataFrame( + return sim.DataFrameWrapper('a', pd.DataFrame( {'a1': [1, 2, 3], 'a2': [4, 5, 6], 'a3': [7, 8, 9]}, @@ -18,7 +18,7 @@ def dfa(): @pytest.fixture def dfz(): - return sim._DataFrameWrapper('z', pd.DataFrame( + return sim.DataFrameWrapper('z', pd.DataFrame( {'z1': [90, 91], 'z2': [92, 93], 'z3': [94, 95], @@ -29,7 +29,7 @@ def dfz(): @pytest.fixture def dfb(): - return sim._DataFrameWrapper('b', pd.DataFrame( + return sim.DataFrameWrapper('b', pd.DataFrame( {'b1': range(10, 15), 'b2': range(15, 20), 'a_id': ['ac', 'ac', 'ab', 'aa', 'ab'], @@ -39,7 +39,7 @@ def dfb(): @pytest.fixture def dfc(): - return sim._DataFrameWrapper('c', pd.DataFrame( + return sim.DataFrameWrapper('c', pd.DataFrame( {'c1': range(20, 30), 'c2': range(30, 40), 'b_id': ['ba', 'bd', 'bb', 'bc', 'bb', 'ba', 'bb', 'bc', 'bd', 'bb']}, @@ -48,14 +48,14 @@ def dfc(): @pytest.fixture def dfg(): - return sim._DataFrameWrapper('g', pd.DataFrame( + return sim.DataFrameWrapper('g', pd.DataFrame( {'g1': [1, 2, 3]}, index=['ga', 'gb', 'gc'])) @pytest.fixture def dfh(): - return sim._DataFrameWrapper('h', pd.DataFrame( + return sim.DataFrameWrapper('h', pd.DataFrame( {'h1': range(10, 15), 'g_id': ['ga', 'gb', 'gc', 'ga', 'gb']}, index=['ha', 'hb', 'hc', 'hd', 'he'])) diff --git a/urbansim/sim/tests/test_simulation.py b/urbansim/sim/tests/test_simulation.py index 6806b1bd..015bfc58 100644 --- a/urbansim/sim/tests/test_simulation.py +++ b/urbansim/sim/tests/test_simulation.py @@ -287,7 +287,7 @@ def source(): return df table = sim.get_table('source') - assert isinstance(table, sim._TableSourceWrapper) + assert isinstance(table, sim.TableSourceWrapper) test_df = table.to_frame() pdt.assert_frame_equal(test_df, df) @@ -296,7 +296,7 @@ def source(): pdt.assert_index_equal(table.index, df.index) table = sim.get_table('source') - assert isinstance(table, sim._DataFrameWrapper) + assert isinstance(table, sim.DataFrameWrapper) test_df = table.to_frame() pdt.assert_frame_equal(test_df, df) @@ -308,10 +308,10 @@ def source(): return df table = sim.get_table('source') - assert isinstance(table, sim._TableSourceWrapper) + assert isinstance(table, sim.TableSourceWrapper) table = table.convert() - assert isinstance(table, sim._DataFrameWrapper) + assert isinstance(table, sim.DataFrameWrapper) pdt.assert_frame_equal(table.to_frame(), df) table2 = sim.get_table('source')