In [1]:
from __future__ import print_function
from openpathsampling.experimental.storage.ops_storage import OPSStorage, ops_class_info, ops_schema
from openpathsampling.experimental.storage.sql_backend import SQLStorageBackend
import numpy as np
from openpathsampling.experimental.storage.serialization_helpers import get_uuid, set_uuid

import openpathsampling as paths

import collections

from openpathsampling.experimental.storage.storable_functions import (
    StorableFunction, StorageFunctionHandler
)

In [2]:
import logging
import sys

root = logging.getLogger()
root.setLevel(logging.DEBUG)

ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
root.addHandler(ch)

## Playing around with a storable function

In [3]:
def get_x(snap):
    from time import sleep
    sleep(2)
    return snap.xyz[0][0]

In [4]:
cv = StorableFunction(get_x, result_type='float')

In [5]:
import openpathsampling.engines.toy as toys
snap = toys.Snapshot(coordinates=np.array([[3.0, 0.0]]))

In [6]:
%%time
cv(snap)

2020-07-27 15:58:54,852 - openpathsampling.experimental.storage.storable_functions - DEBUG - <bound method StorableFunction._get_cached of <openpathsampling.experimental.storage.storable_functions.StorableFunction object at 0x113d34d30>>
2020-07-27 15:58:54,855 - openpathsampling.experimental.storage.storable_functions - DEBUG - <bound method StorableFunction._get_storage of <openpathsampling.experimental.storage.storable_functions.StorableFunction object at 0x113d34d30>>
2020-07-27 15:58:54,895 - openpathsampling.experimental.storage.storable_functions - DEBUG - <bound method StorableFunction._eval of <openpathsampling.experimental.storage.storable_functions.StorableFunction object at 0x113d34d30>>
CPU times: user 42.5 ms, sys: 2.08 ms, total: 44.6 ms
Wall time: 2.05 s


3.0

In [7]:
%%time
cv(snap)

2020-07-27 15:58:56,912 - openpathsampling.experimental.storage.storable_functions - DEBUG - <bound method StorableFunction._get_cached of <openpathsampling.experimental.storage.storable_functions.StorableFunction object at 0x113d34d30>>
CPU times: user 1.23 ms, sys: 804 µs, total: 2.03 ms
Wall time: 2.61 ms


3.0

## Manually registering storable function with SQL backend

In [8]:
backend = SQLStorageBackend("test.sql", mode='w', echo=False)
storage = OPSStorage.from_backend(
    backend=backend,
    schema=ops_schema,
    class_info=ops_class_info,
)

2020-07-27 15:58:56,952 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table uuid
2020-07-27 15:58:56,953 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table tables
2020-07-27 15:58:56,959 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table samples
2020-07-27 15:58:56,966 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table sample_sets
2020-07-27 15:58:56,975 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table trajectories
2020-07-27 15:58:56,982 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table move_changes
2020-07-27 15:58:56,993 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table steps
2020-07-27 15:58:57,008 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table details
2020-07-27 15:58:57,015 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table storable_f

In [9]:
uuid = get_uuid(cv)

In [10]:
_ = cv(snap)
cv.local_cache.result_dict

2020-07-27 15:58:57,085 - openpathsampling.experimental.storage.storable_functions - DEBUG - <bound method StorableFunction._get_cached of <openpathsampling.experimental.storage.storable_functions.StorableFunction object at 0x113d34d30>>


{'104474954970302820103606700453371314192': 3.0}

In [11]:
backend.register_storable_function(table_name=uuid, result_type=cv.result_type)

2020-07-27 15:58:57,104 - openpathsampling.experimental.storage.sql_backend - INFO - Registering storable function: UUID: 104474954970302820103606700453371314188 (float)


In [12]:
backend.add_storable_function_results(table_name=uuid,
                                      result_dict=cv.local_cache.result_dict)

2020-07-27 15:58:57,151 - openpathsampling.experimental.storage.sql_backend - DEBUG - Found 0 UUIDs


In [13]:
backend.load_storable_function_results(uuid, [get_uuid(snap)])

2020-07-27 15:58:57,208 - openpathsampling.experimental.storage.sql_backend - DEBUG - Found 1 UUIDs


{'104474954970302820103606700453371314192': 3.0}

In [14]:
backend.load_storable_function_table(get_uuid(cv))

{'104474954970302820103606700453371314192': 3.0}

## New backend; use storable func with storage

This will be roughly the full integration.

In [15]:
backend = SQLStorageBackend("test.sql", mode='w')
storage = OPSStorage.from_backend(
    backend=backend,
    schema=ops_schema,
    class_info=ops_class_info
)

2020-07-27 15:58:57,258 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table uuid
2020-07-27 15:58:57,260 - openpathsampling.experimental.storage.sql_backend - INFO - Add schema table tables


In [16]:
storage.save(cv)

2020-07-27 15:58:57,276 - openpathsampling.experimental.storage.storage - DEBUG - Starting save
2020-07-27 15:58:57,278 - openpathsampling.experimental.storage.sql_backend - DEBUG - Looking for 1 UUIDs
2020-07-27 15:58:57,279 - openpathsampling.experimental.storage.sql_backend - DEBUG - New block of 1 UUIDs
2020-07-27 15:58:57,296 - openpathsampling.experimental.storage.sql_backend - DEBUG - Found 0 UUIDs
2020-07-27 15:58:57,297 - openpathsampling.experimental.storage.storage - DEBUG - Listing all objects to save
2020-07-27 15:58:57,301 - openpathsampling.experimental.storage.storage - DEBUG - Checking if objects already exist in database
2020-07-27 15:58:57,302 - openpathsampling.experimental.storage.sql_backend - DEBUG - Looking for 2 UUIDs
2020-07-27 15:58:57,304 - openpathsampling.experimental.storage.sql_backend - DEBUG - New block of 2 UUIDs
2020-07-27 15:58:57,312 - openpathsampling.experimental.storage.sql_backend - DEBUG - Found 0 UUIDs
2020-07-27 15:58:57,314 - openpathsampli

TypeError: Schema registration problem. Your schema may already have tables of the same names.

In [None]:
list(storage.simulation_objects)

In [None]:
backend.metadata.tables.keys()

In [None]:
storage._sf_handler.functions[0].local_cache.result_dict

In [None]:
backend.load_storable_function_results(uuid, [get_uuid(snap)])

In [None]:
storage.close()

In [None]:
backend = SQLStorageBackend("test.sql", mode='r')
storage = OPSStorage.from_backend(
    backend=backend,
    schema=ops_schema,
    class_info=ops_class_info
)

In [None]:
cv_reloaded = storage.load([get_uuid(cv)])[0]

In [None]:
cv_reloaded.local_cache.result_dict

In [None]:
cv_reloaded.local_cache != cv.local_cache

In [None]:
%%time
cv_reloaded.local_cache.clear()
cv_reloaded(snap)

In [None]:
# TODO: tests need a test of a bad load as well (missing should show up)

In [None]:
cv_reloaded.local_cache.clear()
cv_reloaded.preload_cache()

In [None]:
%%time
cv_reloaded(snap)

## Store/load multiple values at once

This checks that the backend is able to load multiple UUIDs at once

In [None]:
backend = SQLStorageBackend("test.sql", mode='w')
storage = OPSStorage.from_backend(
    backend=backend,
    schema=ops_schema,
    class_info=ops_class_info
)

In [None]:
new_snap = toys.Snapshot(coordinates=np.array([[2.0, 0.0]]))

In [None]:
%%time
cv.local_cache.clear()
cv._handler = None
cv([snap, new_snap])

In [None]:
%%time
# get using cache
cv([snap, new_snap])

In [None]:
storage.save(cv)

In [None]:
%%time
# get using storage
cv.local_cache.clear()
cv([snap, new_snap])

## Save multiple times

The second time we save things, the `find_uuid` skips over the already-saved function. This means that we need another mechanism for storing mutable information (such as the `StorableFunctionResults`) to disk.

This is part of what the `StorableFunctionHandler` manages. Since the handler contains the function, it can store things to disk with an update.

In [None]:
backend = SQLStorageBackend("test.sql", mode='w')
storage = OPSStorage.from_backend(
    backend=backend,
    schema=ops_schema,
    class_info=ops_class_info
)
cv = StorableFunction(get_x, result_type='float')
vol = paths.CVDefinedVolume(cv, lambda_min=0.0, lambda_max=1.0)
cv(snap)
storage.save(vol)

In [None]:
new_snap = toys.Snapshot(coordinates=np.array([[2.0, 0.0]]))
cv(new_snap)

In [None]:
storage.backend.load_storable_function_table(get_uuid(cv))

In [None]:
storage.save(vol)

In [None]:
storage.backend.load_storable_function_table(get_uuid(cv))

In [None]:
storage.save_function_results()

In [None]:
storage.backend.load_storable_function_table(get_uuid(cv))

In [None]:
list(storage._sf_handler.canonical_functions.values())[0].local_cache.result_dict

## Mock of multiple computers creating and returning results

In [None]:
cv = StorableFunction(get_x, result_type='float')
cv(snap)

### Computer 1

We mock the serialization and transfer over the network by doing a `to_dict`/`from_dict` cycle, which will create a new instance in memory with the same UUID.

In [None]:
cv_1 = StorableFunction.from_dict(cv.to_dict())
set_uuid(cv_1, get_uuid(cv))

In [None]:
print("\n".join([repr(cv), repr(cv_1)]))

In [None]:
cv_1.local_cache.result_dict

In [None]:
snap_1 = toys.Snapshot(coordinates=np.array([[4.0, 0.0]]))

In [None]:
%%time
cv_1(snap_1)

In [None]:
cv_1.local_cache.result_dict

### Computer 2

In [None]:
cv_2 = StorableFunction.from_dict(cv.to_dict())
set_uuid(cv_2, get_uuid(cv))

In [None]:
print("\n".join([repr(cv), repr(cv_1), repr(cv_2)]))

In [None]:
snap_2 = toys.Snapshot(coordinates=np.array([[5.0, 0.0]]))

In [None]:
cv_2(snap_2)

In [None]:
cv_2.local_cache.result_dict

### Gather operation

Note that for the real thing, we'll need to ensure that the serializers/deserializers (be they from `dask` or custom) correctly recreate the objects.

Attention: The save command must be issued separately for each copy of the CV. This is because the CVs have the same UUID, and we use sets/dicts to combine repeats of the same UUID.

In [None]:
backend = SQLStorageBackend("test.sql", mode='w')
storage = OPSStorage.from_backend(
    backend=backend,
    schema=ops_schema,
    class_info=ops_class_info
)

In [None]:
storage.save([cv])

In [None]:
storage.save([cv_1])

In [None]:
storage.save([cv_2])

In [None]:
cv_2.local_cache.parent

In [None]:
cv.local_cache.result_dict

In [None]:
storage._sf_handler.canonical_functions

In [None]:
print("\n".join([str(getattr(obj, '__uuid__')) for obj in [cv, cv_reloaded, cv_1, cv_2]]))

In [None]:
results = storage._sf_handler.canonical_functions[get_uuid(cv)].local_cache.result_dict

In [None]:
assert len(results) == 3

In [None]:
results

In [None]:
# should be Dict[str, List[StorableFunction]]
# dict length 1; the single value in dict is list length 3
storage._sf_handler.all_functions

In [None]:
assert len(storage._sf_handler.all_functions) == 1
assert len(list(storage._sf_handler.all_functions.values())[0]) == 3

In [None]:
storage._sf_handler.clear_non_canonical()

In [None]:
storage._sf_handler.all_functions

In [None]:
assert len(storage._sf_handler.all_functions) == 1
assert len(list(storage._sf_handler.all_functions.values())[0]) == 1