In [1]:
import openpathsampling as paths
from openpathsampling.engines import features

from openpathsampling.experimental.simstore.serialization_helpers import get_uuid

from openpathsampling.experimental.storage.snapshots import (
    schema_from_entries, schema_for_snapshot, replace_schema_dimensions,
    snapshot_registration_info
)

import simtk.unit

## Parsing types

### Abstract

In [2]:
openmm_schema = schema_from_entries([features.statics, features.kinetics], lazies=['statics', 'kinetics'])
openmm_schema

{'statics': [('coordinates',
   'simtk(unit.nanometer)*ndarray.float32({n_atoms},{n_spatial})'),
  ('box_vectors',
   'simtk(unit.nanometer)*ndarray.float32({n_spatial},{n_spatial})'),
  ('engine', 'uuid')],
 'kinetics': [('velocities',
   'simtk(unit.nanometer/unit.picosecond)*ndarray.float32({n_atoms},{n_spatial})'),
  ('engine', 'uuid')],
 'snapshot': [('statics', 'lazy'),
  ('kinetics', 'lazy'),
  ('is_reversed', 'bool')]}

In [3]:
string = openmm_schema['kinetics'][0][1]
string

'simtk(unit.nanometer/unit.picosecond)*ndarray.float32({n_atoms},{n_spatial})'

In [4]:
import re

In [5]:
pattern = re.compile("simtk\((.*)\)\*((ndarray|float).*)")

In [6]:
match = pattern.match(string)

In [7]:
match.group(1), match.group(2)

('unit.nanometer/unit.picosecond', 'ndarray.float32({n_atoms},{n_spatial})')

In [8]:
float_string = "simtk(unit.nanometer/unit.picosecond**2)*float"

In [9]:
match = pattern.match(float_string)
match.group(0)

'simtk(unit.nanometer/unit.picosecond**2)*float'

In [10]:
match.group(1), match.group(2)

('unit.nanometer/unit.picosecond**2', 'float')

### From snapshot

In [11]:
import pkg_resources
pdb = pkg_resources.resource_filename('openpathsampling.tests',
                                      'test_data/ala_small_traj.pdb')
snap = paths.engines.openmm.snapshot_from_pdb(pdb)

In [12]:
schema_for_snapshot(snap)

{'statics': [('coordinates',
   'simtk(unit.nanometer)*ndarray.float32({n_atoms},{n_spatial})'),
  ('box_vectors',
   'simtk(unit.nanometer)*ndarray.float32({n_spatial},{n_spatial})'),
  ('engine', 'uuid')],
 'kinetics': [('velocities',
   'simtk(unit.nanometer/unit.picosecond)*ndarray.float32({n_atoms},{n_spatial})'),
  ('engine', 'uuid')],
 'snapshot': [('statics', 'lazy'),
  ('kinetics', 'lazy'),
  ('is_reversed', 'bool'),
  ('engine', 'uuid')]}

In [13]:
schema, info = snapshot_registration_info(snap, 0)

In [14]:
assert len(info) == 3

In [15]:
info[0]

ClassInfo(table=snapshot0, cls=<class 'openpathsampling.engines.openmm.snapshot.Snapshot'>, lookup_result=('84082411518742377705054655985724948562', <class 'openpathsampling.engines.openmm.snapshot.Snapshot'>), find_uuids=None)

In [16]:
info[1]

ClassInfo(table=statics0, cls=<class 'openpathsampling.engines.features.shared.StaticContainer'>, lookup_result=('84082411518742377705054655985724948562', <class 'openpathsampling.engines.features.shared.StaticContainer'>), find_uuids=None)

In [17]:
info[2]

ClassInfo(table=kinetics0, cls=<class 'openpathsampling.engines.features.shared.KineticContainer'>, lookup_result=('84082411518742377705054655985724948562', <class 'openpathsampling.engines.features.shared.KineticContainer'>), find_uuids=None)

In [18]:
schema

{'statics0': [('coordinates',
   'simtk(unit.nanometer)*ndarray.float32({n_atoms},{n_spatial})'),
  ('box_vectors',
   'simtk(unit.nanometer)*ndarray.float32({n_spatial},{n_spatial})'),
  ('engine', 'uuid')],
 'kinetics0': [('velocities',
   'simtk(unit.nanometer/unit.picosecond)*ndarray.float32({n_atoms},{n_spatial})'),
  ('engine', 'uuid')],
 'snapshot0': [('statics', 'lazy'),
  ('kinetics', 'lazy'),
  ('is_reversed', 'bool'),
  ('engine', 'uuid')]}

## Using handler

In [19]:
from openpathsampling.experimental.storage.simtk_unit import SimtkQuantityHandler

In [20]:
handler = SimtkQuantityHandler.from_type_string(float_string)

In [21]:
from simtk import unit
q = 1.0 * unit.meter / unit.second**2

In [22]:
handler.serialize(q)

9.999999999999999e-16

In [23]:
handler.deserialize(handler.serialize(q))

Quantity(value=9.999999999999999e-16, unit=nanometer/(picosecond**2))

## Using storage

In [24]:
from openpathsampling.experimental.simstore import SQLStorageBackend
from openpathsampling.experimental.storage import Storage

In [25]:
backend = SQLStorageBackend('test.db', mode='w')
storage = Storage.from_backend(backend)

In [26]:
import logging
import sys

def enable_logging(level=logging.DEBUG):

    root = logging.getLogger()
    root.setLevel(level)

    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(level)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    root.addHandler(ch)

### Checking `is_special`

In [27]:
assert storage.class_info.is_special(snap.statics)

In [28]:
assert storage.class_info.is_special(snap.kinetics)

### Saving

In [29]:
# ensure that we have the handler, whether running before or after that is default
handlers = storage.class_info.attribute_handlers
#storage.class_info.attribute_handlers = list(set(handlers) | {SimtkQuantityHandler})

In [30]:
storage.save(snap)

In [31]:
storage.class_info['statics0'].serializer

<openpathsampling.experimental.simstore.serialization.SchemaSerializer at 0x7fb65fa28e80>

In [32]:
storage.class_info['statics0'].serializer.attribute_handlers

{'coordinates': <bound method SimtkQuantityHandler.serialize of <openpathsampling.experimental.storage.simtk_unit.SimtkQuantityHandler object at 0x7fb65fa28fd0>>,
 'box_vectors': <bound method SimtkQuantityHandler.serialize of <openpathsampling.experimental.storage.simtk_unit.SimtkQuantityHandler object at 0x7fb65fa284a8>>,
 'engine': <function openpathsampling.experimental.simstore.uuids.get_uuid(obj)>}

In [33]:
ser = storage.class_info['statics0'].serializer(snap.statics)

In [34]:
ser['box_vectors']

b'\x9f\xcd&@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9f\xcd&@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9f\xcd&@'

In [35]:
existing = {get_uuid(snap.engine): snap.engine}
ser_copy = {k: v for k, v in ser.items()}
deser = storage.class_info['statics0'].deserializer(get_uuid(snap.statics), ser_copy, existing)

In [36]:
deser.coordinates

Quantity(value=array([[-0.3479,  0.1589,  0.1044],
       [-0.3046,  0.0591,  0.0972],
       [-0.3382,  0.0141,  0.0038],
       ...,
       [-0.93  ,  1.1963,  1.1873],
       [-0.9368,  1.2558,  1.262 ],
       [-0.8732,  1.1257,  1.2181]], dtype=float32), unit=nanometer)

In [37]:
storage.close()

### Loading

In [38]:
Storage._known_storages = {}
backend = SQLStorageBackend("test.db", mode='r')
storage = Storage.from_backend(backend)
# can't add this after, since type registration happens earlier; must be inte
#storage.class_info.attribute_handlers = list(set(handlers) | {SimtkQuantityHandler})

In [39]:
backend.table_to_class

{'samples': openpathsampling.sample.Sample,
 'sample_sets': openpathsampling.sample.SampleSet,
 'trajectories': openpathsampling.engines.trajectory.Trajectory,
 'move_changes': openpathsampling.movechange.MoveChange,
 'steps': openpathsampling.pathsimulators.path_simulator.MCStep,
 'details': openpathsampling.pathmover.Details,
 'storable_functions': openpathsampling.experimental.simstore.storable_functions.StorableFunction,
 'simulation_objects': openpathsampling.netcdfplus.base.StorableObject,
 'storage_objects': openpathsampling.experimental.simstore.storage.GeneralStorage,
 'statics0': openpathsampling.engines.features.shared.StaticContainer,
 'kinetics0': openpathsampling.engines.features.shared.KineticContainer,
 'snapshot0': openpathsampling.engines.openmm.snapshot.Snapshot}

In [40]:
storage.class_info.attribute_handlers

[openpathsampling.experimental.simstore.attribute_handlers.NDArrayHandler,
 openpathsampling.experimental.simstore.attribute_handlers.StandardHandler,
 openpathsampling.experimental.storage.simtk_unit.SimtkQuantityHandler]

In [41]:
storage.class_info['statics0'].serializer

<openpathsampling.experimental.simstore.serialization.SchemaSerializer at 0x7fb65fa31ac8>

In [42]:
ser_copy = {k: v for k, v in ser.items()}
deser = storage.class_info['statics0'].deserializer(get_uuid(snap.statics), ser_copy, existing)

In [43]:
deser.box_vectors

Quantity(value=array([[2.6063, 0.    , 0.    ],
       [0.    , 2.6063, 0.    ],
       [0.    , 0.    , 2.6063]], dtype=float32), unit=nanometer)

In [44]:
storage.class_info['statics0'].deserializer.attribute_handlers

{'coordinates': <bound method SimtkQuantityHandler.deserialize of <openpathsampling.experimental.storage.simtk_unit.SimtkQuantityHandler object at 0x7fb65e2bf5c0>>,
 'box_vectors': <bound method SimtkQuantityHandler.deserialize of <openpathsampling.experimental.storage.simtk_unit.SimtkQuantityHandler object at 0x7fb65e2bf390>>,
 'engine': <function openpathsampling.experimental.simstore.serialization_helpers.search_caches(key, cache_list, raise_error=True)>}

In [45]:
storage.backend.known_types

{'uuid': ('uuid', None),
 'lazy': ('lazy', None),
 'list_uuid': ('list_uuid', None),
 'str': ('str', None),
 'json': ('json', None),
 'json_obj': ('json_obj', None),
 'int': ('int', None),
 'float': ('float', None),
 'function': ('function', None),
 'ndarray': ('ndarray', None),
 'bool': ('bool', None),
 'simtk(unit.nanometer)*ndarray.float32(1651,3)': ('ndarray', None),
 'simtk(unit.nanometer)*ndarray.float32(3,3)': ('ndarray', None),
 'simtk(unit.nanometer/unit.picosecond)*ndarray.float32(1651,3)': ('ndarray',
  None)}

In [46]:
snap = [s for s in storage.snapshots][0]

NOTE: So far we only have proxies for the statics/kinetics!

In [47]:
vars(snap)

{'__uuid__': 84082411518742377705054655985724948568,
 '_lazy': {<openpathsampling.netcdfplus.proxy.DelayedLoader at 0x7fb65d8ed588>: <LazyLoader for StaticContainer UUID 84082411518742377705054655985724948564>,
  <openpathsampling.netcdfplus.proxy.DelayedLoader at 0x7fb65d8ed5c0>: <LazyLoader for KineticContainer UUID 84082411518742377705054655985724948566>},
 '_reversed': None,
 'is_reversed': False,
 'engine': <openpathsampling.engines.openmm.tools.FileEngine at 0x7fb65e29c0b8>}

In [48]:
# unfortunately, get_uuid seems to trigger loading; want to fix that;
# anything other than repr, uuid, type should trigger getattr; need to play
# with lazies some more
get_uuid(snap.kinetics)

'84082411518742377705054655985724948566'

In [49]:
xyz = snap.xyz

In [50]:
snap.coordinates

Quantity(value=array([[-0.3479,  0.1589,  0.1044],
       [-0.3046,  0.0591,  0.0972],
       [-0.3382,  0.0141,  0.0038],
       ...,
       [-0.93  ,  1.1963,  1.1873],
       [-0.9368,  1.2558,  1.262 ],
       [-0.8732,  1.1257,  1.2181]], dtype=float32), unit=nanometer)

In [51]:
assert isinstance(snap.box_vectors, simtk.unit.Quantity)

In [52]:
snap.xyz

array([[-0.3479,  0.1589,  0.1044],
       [-0.3046,  0.0591,  0.0972],
       [-0.3382,  0.0141,  0.0038],
       ...,
       [-0.93  ,  1.1963,  1.1873],
       [-0.9368,  1.2558,  1.262 ],
       [-0.8732,  1.1257,  1.2181]], dtype=float32)

In [53]:
type(snap.kinetics)

openpathsampling.experimental.simstore.proxy.make_lazy_class.<locals>.LazyLoader

In [54]:
storage.class_info['statics0']

ClassInfo(table=statics0, cls=<class 'openpathsampling.engines.features.shared.StaticContainer'>, lookup_result=('84082411518742377705054655985724948562', <class 'openpathsampling.engines.features.shared.StaticContainer'>), find_uuids=<openpathsampling.experimental.simstore.serialization_helpers.SchemaFindUUIDs object at 0x7fb65fa07588>)

### Re-saving

In [55]:
Storage._known_storages = {}
backend = SQLStorageBackend("test.db", mode='r')
storage = Storage.from_backend(backend)

In [56]:
storage.backend.metadata.tables['kinetics0']

Table('kinetics0', MetaData(bind=Engine(sqlite:///test.db)), Column('idx', INTEGER(), table=<kinetics0>, primary_key=True, nullable=False), Column('uuid', VARCHAR(), table=<kinetics0>), Column('velocities', BLOB(), table=<kinetics0>), Column('engine', VARCHAR(), table=<kinetics0>), schema=None)

In [57]:
snap = [s for s in storage.snapshots][0]

In [58]:
backend = SQLStorageBackend("test2.db", mode='w')
new_storage = Storage.from_backend(backend)

In [59]:
enable_logging()

In [60]:
vars(snap)

{'__uuid__': 84082411518742377705054655985724948568,
 '_lazy': {<openpathsampling.netcdfplus.proxy.DelayedLoader at 0x7fb65d8ed588>: <LazyLoader for StaticContainer UUID 84082411518742377705054655985724948564>,
  <openpathsampling.netcdfplus.proxy.DelayedLoader at 0x7fb65d8ed5c0>: <LazyLoader for KineticContainer UUID 84082411518742377705054655985724948566>},
 '_reversed': None,
 'is_reversed': False,
 'engine': <openpathsampling.engines.openmm.tools.FileEngine at 0x7fb65e2e0b00>}

In [61]:
new_storage.save(snap)

2020-12-07 18:25:54,652 - openpathsampling.experimental.simstore.storage - DEBUG - Starting save
2020-12-07 18:25:54,665 - openpathsampling.experimental.simstore.sql_backend - DEBUG - Looking for 1 UUIDs
2020-12-07 18:25:54,669 - openpathsampling.experimental.simstore.sql_backend - DEBUG - New block of 1 UUIDs
2020-12-07 18:25:54,679 - openpathsampling.experimental.simstore.sql_backend - DEBUG - Found 0 UUIDs
2020-12-07 18:25:54,680 - openpathsampling.experimental.simstore.storage - DEBUG - Listing all objects to save
2020-12-07 18:25:54,750 - openpathsampling.experimental.simstore.storage - DEBUG - Found 3 objects
2020-12-07 18:25:54,751 - openpathsampling.experimental.simstore.storage - DEBUG - {'84082411518742377705054655985724948568': <openpathsampling.engines.openmm.snapshot.Snapshot object at 0x7fb65e154208>, '84082411518742377705054655985724948562': <openpathsampling.engines.openmm.tools.FileEngine object at 0x7fb65e2e0b00>, '84082411518742377705054655985724948494': <openpathsam

2020-12-07 18:25:54,964 - openpathsampling.experimental.simstore.storage - DEBUG - Identifying classes for 0 lazy proxies
2020-12-07 18:25:54,964 - openpathsampling.experimental.simstore.sql_backend - DEBUG - Looking for 0 UUIDs
2020-12-07 18:25:54,967 - openpathsampling.experimental.simstore.sql_backend - DEBUG - New block of 0 UUIDs
2020-12-07 18:25:54,971 - openpathsampling.experimental.simstore.sql_backend - DEBUG - Found 0 UUIDs
2020-12-07 18:25:54,973 - openpathsampling.experimental.simstore.storage - DEBUG - Reconstructing from 0 objects
2020-12-07 18:25:54,975 - openpathsampling.experimental.simstore.storage - DEBUG - Checking if objects already exist in database
2020-12-07 18:25:54,979 - openpathsampling.experimental.simstore.sql_backend - DEBUG - Looking for 5 UUIDs
2020-12-07 18:25:54,979 - openpathsampling.experimental.simstore.sql_backend - DEBUG - New block of 5 UUIDs
2020-12-07 18:25:54,982 - openpathsampling.experimental.simstore.sql_backend - DEBUG - Found 0 UUIDs
2020

In [62]:
vars(snap)

{'__uuid__': 84082411518742377705054655985724948568,
 '_lazy': {<openpathsampling.netcdfplus.proxy.DelayedLoader at 0x7fb65d8ed588>: <openpathsampling.engines.features.shared.StaticContainer object at 0x7fb65f9322e8>,
  <openpathsampling.netcdfplus.proxy.DelayedLoader at 0x7fb65d8ed5c0>: <openpathsampling.engines.features.shared.KineticContainer object at 0x7fb65f9335c0>},
 '_reversed': None,
 'is_reversed': False,
 'engine': <openpathsampling.engines.openmm.tools.FileEngine at 0x7fb65e2e0b00>}