Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add experiment flag to pickle only on checkpoints #224

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions flambe/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def __init__(self,
max_failures: int = 1,
stop_on_failure: bool = True,
merge_plot: bool = True,
user_provider: Callable[[], str] = None) -> None:
user_provider: Callable[[], str] = None,
pickle_checkpoints: bool = False) -> None:
super().__init__(env=env, user_provider=user_provider)
self.name = name

Expand Down Expand Up @@ -152,6 +153,7 @@ def __init__(self,
raise TypeError("Pipeline argument is not of type Dict[str, Schema]. "
f"Got {type(pipeline).__name__} instead")
self.pipeline = pipeline
self.pickle_checkpoints = pickle_checkpoints

def process_resources(
self,
Expand Down Expand Up @@ -363,7 +365,8 @@ def trial_name_creator(trial):
'global_vars': resources,
'verbose': verbose,
'custom_modules': list(self.extensions.keys()),
'debug': debug}
'debug': debug,
'pickle_checkpoints': self.pickle_checkpoints}
# Filter out the tensorboard logger as we handle
# general and tensorboard-specific logging ourselves
tune_loggers = list(filter(lambda l: l != tf2_compat_logger and # noqa: E741
Expand Down
15 changes: 12 additions & 3 deletions flambe/experiment/tune_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import shutil

import ray
import torch
import dill

from flambe.compile import load_state_from_file, Schema, Component
from flambe.compile.extensions import setup_default_modules, import_modules
Expand Down Expand Up @@ -36,6 +38,7 @@ def _setup(self, config: Dict):
self.verbose = config['verbose']
self.hyper_params = config['hyper_params']
self.debug = config['debug']
self.pickle_checkpoints = config['pickle_checkpoints']

with TrialLogging(log_dir=self.logdir,
verbose=self.verbose,
Expand Down Expand Up @@ -152,13 +155,19 @@ def _train(self) -> Dict:
def _save(self, checkpoint_dir: str) -> str:
"""Subclasses should override this to implement save()."""
path = os.path.join(checkpoint_dir, "checkpoint.flambe")
self.block.save(path, overwrite=True)
if self.pickle_checkpoints:
torch.save(self.block, path, pickle_module=dill)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no overrite here?

Also still no way of selecting torch instead of dill?

else:
self.block.save(path, overwrite=True)
return path

def _restore(self, checkpoint: str) -> None:
"""Subclasses should override this to implement restore()."""
state = load_state_from_file(checkpoint)
self.block.load_state(state)
if self.pickle_checkpoints:
self.block = torch.load(checkpoint, pickle_protocol=dill)
else:
state = load_state_from_file(checkpoint)
self.block.load_state(state)

def _stop(self):
"""Subclasses should override this for any cleanup on stop."""
Expand Down