Skip to content

Commit

Permalink
automatically load Crop info from disk if available
Browse files Browse the repository at this point in the history
  • Loading branch information
jcmgray committed Jun 19, 2019
1 parent d31363c commit 1eb2b1d
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 10 deletions.
1 change: 1 addition & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ v0.3.2 (Unreleased)
- Add PBS support to :meth:`xyzpy.Crop.qsub_grow` for distributed crop growing
- Add :func:`xyzpy.save_merge_ds` for manually aggregating datasets to disk
- Add ``allow_incomplete=True`` option to :meth:`xyzpy.Crop.reap` for gathering data even if the crop is not fully grown (:issue:`7` )
- Make new :class:`~xyzpy.Crop` instances by default automatically load information from disk if they have been already prepared/sown (:issue:`7` )


.. _whats-new.0.3.1:
Expand Down
36 changes: 35 additions & 1 deletion tests/test_gen/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import xarray as xr
from numpy.testing import assert_allclose

from xyzpy import combo_runner, combo_runner_to_ds
from xyzpy import combo_runner, combo_runner_to_ds, Runner, Harvester
from xyzpy.gen.batch import (
XYZError,
Crop,
Expand Down Expand Up @@ -288,3 +288,37 @@ def test_reap_to_ds_allow_incomplete(self, fn, var_names, var_dims):
ds = crop.reap_combos_to_ds(var_names=var_names, var_dims=var_dims,
allow_incomplete=True)
assert ds.identical(ds_exp)

def test_new_ds_crop_loads_info_incomplete(self):
def fn(a, b):
return xr.Dataset({'sum': a + b, 'diff': a - b})

with TemporaryDirectory() as tdir:
disk_ds = os.path.join(tdir, 'test.h5')

combos = dict(a=[1], b=[1, 2, 3])
runner = Runner(fn, var_names=None)
harvester = Harvester(runner, disk_ds)
crop = harvester.Crop(name='fn', batchsize=1, parent_dir=tdir)
crop.sow_combos(combos)
for i in range(1, 3):
crop.grow(i)

# try creating crop from fresh
c = Crop(name='fn', parent_dir=tdir)
# crop's harvester should be loaded from disk
assert c.harvester is not None
assert c.harvester is not harvester
ds = c.reap(allow_incomplete=True)
assert isinstance(ds, xr.Dataset)
assert ds['diff'].isnull().sum() == 1
assert harvester.full_ds['diff'].isnull().sum() == 1

# try creating crop from harvester
c = harvester.Crop('fn', parent_dir=tdir)
# crop's harvester should still be harvester
assert c.harvester is not None
assert c.harvester is harvester
ds = c.reap(allow_incomplete=True)
assert isinstance(ds, xr.Dataset)
assert ds['diff'].isnull().sum() == 1
35 changes: 26 additions & 9 deletions xyzpy/gen/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,15 @@ def calc_clean_up_default_res(crop, clean_up, allow_incomplete):
return clean_up, default_result


def check_ready_to_reap(crop, allow_incomplete, wait):
if not (allow_incomplete or wait or crop.is_ready_to_reap()):
raise XYZError("This crop is not ready to reap yet - results are "
"missing. You can reap only finished batches by setting"
" ``allow_incomplete=True``, but be aware this will "
"represent all missing batches with ``np.nan`` and thus"
" might effect data-types.")


class Crop(object):
"""Encapsulates all the details describing a single 'crop', that is,
its location, name, and batch size/number. Also allows tracking of
Expand Down Expand Up @@ -214,6 +223,10 @@ def __init__(self, *,
self.location, self.name, self.parent_dir = \
parse_crop_details(self._fn, self.name, self.parent_dir)

# try loading crop information if it exists
if autoload and self.is_prepared():
self._sync_info_from_disk()

# Save function so it can be automatically loaded with all deps?
if (fn is None) and (save_fn is True):
raise ValueError("Must specify a function for it to be saved!")
Expand Down Expand Up @@ -301,7 +314,7 @@ def load_info(self):
else:
return joblib.load(sfile)

def _sync_info_from_disk(self):
def _sync_info_from_disk(self, only_missing=True):
"""Load information about the saved cases.
"""
settings = self.load_info()
Expand All @@ -314,10 +327,18 @@ def _sync_info_from_disk(self):
runner_pkl = settings['runner']
runner = None if runner_pkl is None else pickle.loads(runner_pkl)

self._fn, self.runner, self.harvester = \
fn, runner, harvester = \
parse_fn_runner_harvester(None, runner, harvester)

self.load_function()
# if crop already has a harvester/runner. (e.g. was instantiated from
# one) by default don't overwrite from disk
if (self.runner is None) or (not only_missing):
self.runner = runner
if (self.harvester) is None or (not only_missing):
self.harvester = harvester

if self.fn is None:
self.load_function()

def save_function_to_disk(self):
"""Save the base function to disk using cloudpickle
Expand Down Expand Up @@ -510,9 +531,7 @@ def reap_combos(self, wait=False, clean_up=None, allow_incomplete=False):
results : nested tuple
'N-dimensional' tuple containing the results.
"""
if not (allow_incomplete or wait or self.is_ready_to_reap()):
raise XYZError("This crop is not ready to reap "
"yet - results are missing.")
check_ready_to_reap(self, allow_incomplete, wait)

clean_up, default_result = calc_clean_up_default_res(
self, clean_up, allow_incomplete
Expand Down Expand Up @@ -579,9 +598,7 @@ def reap_combos_to_ds(self,
xarray.Dataset
Multidimensional labelled dataset contatining all the results.
"""
if not (allow_incomplete or wait or self.is_ready_to_reap()):
raise XYZError("This crop is not ready to reap "
"yet - results are missing.")
check_ready_to_reap(self, allow_incomplete, wait)

clean_up, default_result = calc_clean_up_default_res(
self, clean_up, allow_incomplete
Expand Down

0 comments on commit 1eb2b1d

Please sign in to comment.