-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
273 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
"""Interfaces for data storage backend.""" | ||
|
||
class Cursor: | ||
"""Interface that abstracts the cursor object returned from databases.""" | ||
|
||
def __init__(self): | ||
pass | ||
|
||
def count(self): | ||
"""Return the number of items in this cursor.""" | ||
raise NotImplemented() | ||
|
||
def __iter__(self): | ||
raise NotImplemented() | ||
|
||
|
||
class DataStorage: | ||
""" | ||
Interface for data backends. Defines the API for various data stores --- databases, file stores, etc. --- that | ||
sacred supports. | ||
""" | ||
|
||
def __init__(self): | ||
pass | ||
|
||
def get_run(self, run_id): | ||
"""Return the run associated with the id.""" | ||
raise NotImplemented() | ||
|
||
def get_runs(self, sort_by=None, sort_direction=None, | ||
start=0, limit=None, query={"type": "and", "filters": []}): | ||
"""Return all runs that match the query.""" | ||
raise NotImplemented() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
"""Implements backend storage interface for sacred's file store.""" | ||
|
||
import datetime | ||
import os | ||
import json | ||
|
||
from sacredboard.app.data.datastorage import Cursor, DataStorage | ||
|
||
CONFIG_JSON = "config.json" | ||
RUN_JSON = "run.json" | ||
INFO_JSON = "info.json" | ||
|
||
|
||
def _path_to_file(basepath, run_id, file_name): | ||
return os.path.join(basepath, str(run_id), file_name) | ||
|
||
|
||
def _path_to_config(basepath, run_id): | ||
return _path_to_file(basepath, str(run_id), CONFIG_JSON) | ||
|
||
|
||
def _path_to_info(basepath, run_id): | ||
return _path_to_file(basepath, str(run_id), INFO_JSON) | ||
|
||
|
||
def _path_to_run(basepath, run_id): | ||
return os.path.join(basepath, str(run_id), RUN_JSON) | ||
|
||
|
||
def _read_json(path_to_json): | ||
with open(path_to_json) as f: | ||
return json.load(f) | ||
|
||
|
||
def _create_run(run_id, runjson, configjson, infojson): | ||
runjson["_id"] = run_id | ||
runjson["config"] = configjson | ||
runjson["info"] = infojson | ||
|
||
# TODO probably want a smarter way of detecting | ||
# which values have type "time." | ||
for k in ["start_time", "stop_time", "heartbeat"]: | ||
runjson[k] = datetime.datetime.strptime(runjson[k], | ||
'%Y-%m-%dT%H:%M:%S.%f') | ||
return runjson | ||
|
||
|
||
class FileStoreCursor(Cursor): | ||
"""Implements the cursor for file stores.""" | ||
|
||
def __init__(self, count, iterable): | ||
self.iterable = iterable | ||
self._count = count | ||
|
||
def count(self): | ||
""" | ||
Return the number of runs in this query. | ||
:return: int | ||
""" | ||
return self._count | ||
|
||
def __iter__(self): | ||
return iter(self.iterable) | ||
|
||
|
||
class FileStorage(DataStorage): | ||
"""Object to interface with one of sacred's file stores.""" | ||
|
||
def __init__(self, path_to_dir): | ||
super().__init__() | ||
self.path_to_dir = os.path.expanduser(path_to_dir) | ||
|
||
def get_run(self, run_id): | ||
""" | ||
Return the run associated with a particular `run_id`. | ||
:param run_id: | ||
:return: dict | ||
:raises FileNotFoundError | ||
""" | ||
config = _read_json(_path_to_config(self.path_to_dir, run_id)) | ||
run = _read_json(_path_to_run(self.path_to_dir, run_id)) | ||
info = _read_json(_path_to_info(self.path_to_dir, run_id)) | ||
return _create_run(run_id, run, config, info) | ||
|
||
def get_runs(self, sort_by=None, sort_direction=None, | ||
start=0, limit=None, query={"type": "and", "filters": []}): | ||
""" | ||
Return all runs in the file store. If a run is corrupt -- e.g. missing files --- it is skipped. | ||
:param sort_by: NotImplemented | ||
:param sort_direction: NotImplemented | ||
:param start: NotImplemented | ||
:param limit: NotImplemented | ||
:param query: NotImplemented | ||
:return: FileStoreCursor | ||
""" | ||
all_run_ids = os.listdir(self.path_to_dir) | ||
|
||
def run_iterator(): | ||
blacklist = set(["_sources"]) | ||
for id in all_run_ids: | ||
if id in blacklist: | ||
continue | ||
try: | ||
yield self.get_run(id) | ||
except FileNotFoundError: | ||
# An incomplete experiment is a corrupt experiment. Skip it for now. | ||
pass | ||
|
||
count = len(all_run_ids) | ||
return FileStoreCursor(count, run_iterator()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
"""Tests for the file storage backend.""" | ||
|
||
# coding=utf-8 | ||
import bson | ||
import pytest | ||
import json | ||
import tempfile | ||
import os | ||
|
||
from sacredboard.app.data.filestorage import FileStorage | ||
|
||
|
||
def create_tmp_datastore(): | ||
""" | ||
Rather than mocking the file system, this actually creates some temporary files that emulate the file store system | ||
in Sacred. Unfortunately, Sacred and Sacredboard are completely decoupled, which makes it impossible to ensure that | ||
this standard is upheld throughout the sacred system. | ||
:return: dict | ||
""" | ||
config = {"length": None, "n_input": 255, "batch_size": None, | ||
"dataset_path": "./german-nouns.hdf5", "validation_ds": "validation", | ||
"log_dir": "./log/rnn500_dropout0.5_lrate1e-4_minibatch_1000steps", | ||
"seed": 144363069, "dropout_keep_probability": 0.5, | ||
"max_character_ord": 255, "training_ds": "training", "num_classes": 3, | ||
"training_steps": 1000, "learning_rate": 0.0001, "hidden_size": 500} | ||
|
||
run = {"status": "COMPLETED", | ||
"_id": "57f9efb2e4b8490d19d7c30e", | ||
"resources": [], | ||
"host": {"os": "Linux", | ||
"os_info": "Linux-3.16.0-38-generic-x86_64-with-LinuxMint-17.2-rafaela", | ||
"cpu": "Intel(R) Core(TM) i3 CPU M 370 @ 2.40GHz", | ||
"python_version": "3.4.3", | ||
"python_compiler": "GCC 4.8.4", | ||
"cpu_count": 4, | ||
"hostname": "ntbacer"}, | ||
"experiment": {"doc": None, "sources": [[ | ||
"/home/martin/mnt/noun-classification/train_model.py", | ||
"86aaa9b81d6e32a181598ed78bb1d7a1"]], | ||
"dependencies": [["h5py", "2.6.0"], | ||
["numpy", "1.11.2"], | ||
["sacred", "0.6.10"]], | ||
"name": "German nouns"}, | ||
"result": 2403.52, "artifacts": [], "comment": "", | ||
# N.B. time formatting is different between mongodb and file store. | ||
"start_time": "2017-06-02T07:13:05.305845", | ||
"stop_time": "2017-06-02T07:14:02.455460", | ||
"heartbeat": "2017-06-02T07:14:02.452597", | ||
"captured_out": "Output: \n"} | ||
info = {"info": "present"} | ||
|
||
experiment_dir = tempfile.mkdtemp() | ||
experiment42 = os.path.join(experiment_dir, "42") # experiment number 42 | ||
os.mkdir(experiment42) | ||
|
||
with open(os.path.join(experiment42, "config.json"), 'w') as config_file: | ||
json.dump(config, config_file) | ||
|
||
with open(os.path.join(experiment42, "run.json"), 'w') as run_file: | ||
json.dump(run, run_file) | ||
|
||
with open(os.path.join(experiment42, "info.json"), 'w') as info_file: | ||
json.dump(info, info_file) | ||
|
||
return experiment_dir | ||
|
||
@pytest.fixture | ||
def tmpfilestore() -> FileStorage: | ||
"""Fixture that prepares a file store in /tmp for dependency injection.""" | ||
dir = create_tmp_datastore() | ||
return FileStorage(dir) | ||
|
||
def test_get_run(tmpfilestore : FileStorage): | ||
"""Tests the get_run function.""" | ||
run42 = tmpfilestore.get_run(42) | ||
|
||
for key in ["info", "resources", "host", "experiment", "result", "artifacts", "comment", "start_time", "stop_time", | ||
"heartbeat", "captured_out", "config"]: | ||
assert key in run42 | ||
|
||
def test_get_runs(tmpfilestore : FileStorage): | ||
"""Tests the get_runs function.""" | ||
runs = tmpfilestore.get_runs() | ||
runs = list(runs) | ||
|
||
assert 1 == len(runs) | ||
|
||
run = runs[0] | ||
for key in ["info", "resources", "host", "experiment", "result", "artifacts", "comment", "start_time", "stop_time", | ||
"heartbeat", "captured_out", "config"]: | ||
assert key in run |