Skip to content

Commit

Permalink
Merge pull request #95 from UDST/save-data
Browse files Browse the repository at this point in the history
[0.2.dev2] Template for saving data
  • Loading branch information
smmaurer authored Mar 5, 2019
2 parents 1c6c649 + da826b3 commit 61e6f7c
Show file tree
Hide file tree
Showing 14 changed files with 512 additions and 110 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

## 0.2 (not yet released)

#### 0.2.dev2 (2019-03-04)

- adds template for saving data: `urbansim_templates.data.SaveTable()`
- renames `TableFromDisk()` to `urbansim_templates.data.LoadTable()`

#### 0.2.dev1 (2019-02-27)

- fixes a crash in small MNL simulation
Expand Down
21 changes: 14 additions & 7 deletions docs/source/data-io.rst → docs/source/data-templates.rst
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
Data I/O template APIs
======================
Data template APIs
==================

Data i/o templates let you set up automated model steps for loading data into Orca or saving outputs to disk.
Data templates help you set up model steps for loading data into `Orca <https://udst.github.io/orca>`__ or saving outputs to disk.

These templates follow the same principles as the statistical model steps. For example, to set up a data table, create an instance of the ``TableFromDisk`` class and set some properties: the table name, file type, path, and anything else that's needed.
These templates follow the same principles as the statistical model steps. For example, to set up a data table, create an instance of the ``LoadTable`` class and set some properties: the table name, file type, path, and anything else that's needed.

Registering this object with ModelManager will save it to disk as a yaml file, and create an Orca step with instructions to set up the table. "Running" the object/step registers the table with Orca, but doesn't read the data from disk yet — Orca loads data lazily as it's needed.

Data registration steps are run automatically when you initialize ModelManager.


Table from disk
---------------
Loading data
------------

.. autoclass:: urbansim_templates.io.TableFromDisk
.. autoclass:: urbansim_templates.data.LoadTable
:members:


Saving data
-----------

.. autoclass:: urbansim_templates.data.SaveTable
:members:
4 changes: 2 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ UrbanSim Templates provides building blocks for Orca-based simulation models. It

The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the `Orca <https://udst.github.io/orca>`__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions.

v0.2.dev1, released February 27, 2019
v0.2.dev2, released March 4, 2019


Contents
Expand All @@ -22,6 +22,6 @@ Contents
getting-started
modelmanager
model-steps
data-io
data-templates
utilities
development
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='urbansim_templates',
version='0.2.dev1',
version='0.2.dev2',
description='UrbanSim extension for managing model steps',
author='UrbanSim Inc.',
author_email='info@urbansim.com',
Expand Down
80 changes: 43 additions & 37 deletions tests/test_tables.py → tests/test_data_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import orca

from urbansim_templates import modelmanager
from urbansim_templates.io import TableFromDisk
from urbansim_templates.data import LoadTable
from urbansim_templates.utils import validate_template


Expand Down Expand Up @@ -45,18 +45,37 @@ def teardown():

def test_template_validity():
"""
Run the template through the standard validation check.
Run the templates through the standard validation check.
"""
assert validate_template(TableFromDisk)
assert validate_template(LoadTable)


def test_property_persistence(orca_session):
"""
Test persistence of properties across registration, saving, and reloading.
"""
pass
t = LoadTable()
t.table = 'buildings'
t.source_type = 'csv'
t.path = 'data/buildings.csv'
t.csv_index_cols = 'building_id'
t.extra_settings = {'make_data_awesome': True} # unfortunately not a valid setting
t.cache = False
t.cache_scope = 'iteration'
t.copy_col = False
t.name = 'buildings-csv'
t.tags = ['awesome', 'data']
t.autorun = False

d1 = t.to_dict()
modelmanager.register(t)
modelmanager.initialize()
d2 = modelmanager.get_step(t.name).to_dict()

assert d1 == d2
modelmanager.remove_step(t.name)


######################################
Expand All @@ -75,7 +94,7 @@ def test_validation_index_unique(orca_session):
d = {'id': [1,2,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d).set_index('id'))

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
t.validate()


Expand All @@ -87,7 +106,7 @@ def test_validation_index_not_unique(orca_session):
d = {'id': [1,1,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d).set_index('id'))

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
try:
t.validate()
except ValueError:
Expand All @@ -104,7 +123,7 @@ def test_validation_multiindex_unique(orca_session):
d = {'id': [1,1,1], 'sub_id': [1,2,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d).set_index(['id', 'sub_id']))

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
t.validate()


Expand All @@ -117,7 +136,7 @@ def test_validation_multiindex_not_unique(orca_session):
d = {'id': [1,1,1], 'sub_id': [2,2,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d).set_index(['id', 'sub_id']))

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
try:
t.validate()
except ValueError:
Expand All @@ -134,7 +153,7 @@ def test_validation_unnamed_index(orca_session):
d = {'id': [1,1,3], 'value': [4,4,4]}
orca.add_table('tab', pd.DataFrame(d)) # generates auto index without a name

t = TableFromDisk(name='tab')
t = LoadTable(table='tab')
try:
t.validate()
except ValueError:
Expand All @@ -155,7 +174,7 @@ def test_validation_columns_vs_other_indexes(orca_session):
d = {'building_id': [1,2,3,4], 'value': [4,4,4,4]}
orca.add_table('buildings', pd.DataFrame(d).set_index('building_id'))

t = TableFromDisk(name='households')
t = LoadTable(table='households')
t.validate()


Expand All @@ -171,7 +190,7 @@ def test_validation_index_vs_other_columns(orca_session):
d = {'household_id': [1,2,3], 'building_id': [2,3,5]}
orca.add_table('households', pd.DataFrame(d).set_index('household_id'))

t = TableFromDisk(name='buildings')
t = LoadTable(table='buildings')
t.validate()


Expand All @@ -188,14 +207,11 @@ def test_validation_with_multiindexes(orca_session):
d = {'home_tract': [55,55,55], 'work_tract': [17,18,19], 'dist': [1,1,1]}
orca.add_table('distances', pd.DataFrame(d).set_index(['home_tract','work_tract']))

t = TableFromDisk(name='choice_table')
t = LoadTable(table='choice_table')
t.validate()


# test that parameters make it through a save
# test validation with stand-alone columns

# test loading an h5 file works
# test passing cache settings


Expand All @@ -208,8 +224,8 @@ def test_csv(orca_session, data):
Test loading data from a CSV file.
"""
t = TableFromDisk()
t.name = 'buildings'
t = LoadTable()
t.table = 'buildings'
t.source_type = 'csv'
t.path = 'data/buildings.csv'
t.csv_index_cols = 'building_id'
Expand All @@ -223,16 +239,16 @@ def test_csv(orca_session, data):
modelmanager.initialize()
assert 'buildings' in orca.list_tables()

modelmanager.remove_step('buildings')
modelmanager.remove_step(t.name)


def test_hdf(orca_session, data):
"""
Test loading data from an HDF file.
"""
t = TableFromDisk()
t.name = 'buildings'
t = LoadTable()
t.table = 'buildings'
t.source_type = 'hdf'
t.path = 'data/buildings.hdf'

Expand All @@ -245,16 +261,16 @@ def test_hdf(orca_session, data):
modelmanager.initialize()
assert 'buildings' in orca.list_tables()

modelmanager.remove_step('buildings')
modelmanager.remove_step(t.name)


def test_extra_settings(orca_session, data):
"""
Test loading data with extra settings, e.g. for compressed files.
"""
t = TableFromDisk()
t.name = 'buildings'
t = LoadTable()
t.table = 'buildings'
t.source_type = 'csv'
t.path = 'data/buildings.csv.gz'
t.csv_index_cols = 'building_id'
Expand All @@ -269,26 +285,16 @@ def test_extra_settings(orca_session, data):
modelmanager.initialize()
assert 'buildings' in orca.list_tables()

modelmanager.remove_step('buildings')


def test_windows_paths(orca_session, data):
"""
Test in Windows that a Windows-style path is properly normalized.
TO DO - implement
"""
pass
modelmanager.remove_step(t.name)


def test_without_autorun(orca_session, data):
"""
Confirm that disabling autorun works.
"""
t = TableFromDisk()
t.name = 'buildings'
t = LoadTable()
t.table = 'buildings'
t.source_type = 'csv'
t.path = 'data/buildings.csv'
t.csv_index_cols = 'building_id'
Expand All @@ -297,7 +303,7 @@ def test_without_autorun(orca_session, data):
modelmanager.register(t)
assert 'buildings' not in orca.list_tables()

modelmanager.remove_step('buildings')
modelmanager.remove_step(t.name)



Loading

0 comments on commit 61e6f7c

Please sign in to comment.