### PyEmma Featurizer Support

In [1]:
import openpathsampling as paths
import numpy as np

In [2]:
#! lazy
import pyemma.coordinates as coor

In [3]:
#! lazy
ref_storage = paths.Storage('engine_store_test.nc', mode='r')

11-02-17 23:12:05 openpathsampling.netcdfplus.netcdfplus INFO     Open existing netCDF file 'engine_store_test.nc' for reading - reading from existing file
11-02-17 23:12:05 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover'] and instatiated with ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover']
11-02-17 23:12:05 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['samples', 'movepath'] and instatiated with ['samples', 'movepath']
11-02-17 23:12:05 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['simulation', 'mccycle', 'previous', 'active', 'change'] and instatiated with ['simulation', 'mccycle', 'previous', 'active', 'change']
11-02-17 23:12:05 openpathsampling.netcdfplus.util INFO     Ran load_indices in time 0.001828
11-02-17 23:12:05 openpathsampling.storage.storage INFO     Opening a

In [5]:
#! lazy
storage = paths.Storage('delete.nc', 'w')
storage.trajectories.save(ref_storage.trajectories[0])

11-02-17 23:12:08 openpathsampling.netcdfplus.netcdfplus INFO     Create new netCDF file 'delete.nc' for writing - deleting existing file
11-02-17 23:12:08 openpathsampling.netcdfplus.netcdfplus INFO     Setup netCDF file and create variables
11-02-17 23:12:08 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover'] and instatiated with ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover']
11-02-17 23:12:08 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['samples', 'movepath'] and instatiated with ['samples', 'movepath']
11-02-17 23:12:08 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['simulation', 'mccycle', 'previous', 'active', 'change'] and instatiated with ['simulation', 'mccycle', 'previous', 'active', 'change']
11-02-17 23:12:08 openpathsampling.netcdfplus.netcdfplus INFO     Initializi

221422574805173574613592029305145131028L

Import a PyEmma Coordinates Module

Using of pyemma featurizers or general other complex code requires a little trick to be storable. Since storing of code only works if we are not dependend on the context (scope) we need to wrap the construction of our featurizer in a function, that gets all it needs from the global scope as a parameter

In [6]:
def pyemma_generator(f):
    f.add_inverse_distances(f.pairs(f.select_Backbone()))

In [7]:
cv = paths.collectivevariable.PyEMMAFeaturizerCV(
    'pyemma', 
    pyemma_generator, 
    topology=ref_storage.snapshots[0].topology
).with_diskcache()

Now use this featurizer generating function to build a collective variable out of it. All we need for that is a name as usual, the generating function, the list of parameters - here only the topology and at best a test snapshot, a template.

In [8]:
cv(ref_storage.trajectories[0]);

Let's save it to the storage

In [9]:
#! lazy
print storage.save(cv)

(store.attributes[Attribute] : 1 object(s), 20, 40349013809773564465302081709801472176L)


and apply the featurizer to a trajectory

In [10]:
cv(storage.trajectories[0]);

Sync to make sure the cache is written to the netCDF file.

In [11]:
cv(storage.snapshots.all());

In [12]:
py_cv = storage.cvs['pyemma']

In [13]:
store = storage.stores['cv%d' % storage.idx(py_cv)]
nc_var = store.variables['value']

In [14]:
assert(nc_var.shape[1] == 15)
print nc_var.shape[1]

15


In [15]:
assert(nc_var.var_type == 'numpy.float32')
print nc_var.var_type

numpy.float32


In [16]:
#! ignore
print storage.variables['attributes_json'][:]

[ u'{"_cls":"PyEMMAFeaturizerCV","_dict":{"topology":{"_store":"topologies","_hex_uuid":"0xa6946fccc13c11e68416000000000002L"},"name":"pyemma","featurizer":{"_marshal":"YwEAAAABAAAAAwAAAEMAAABzIAAAAHwAAGoAAHwAAGoBAHwAAGoCAIMAAIMBAIMBAAFkAABTKAEAAABOKAMAAAB0FQAAAGFkZF9pbnZlcnNlX2Rpc3RhbmNlc3QFAAAAcGFpcnN0DwAAAHNlbGVjdF9CYWNrYm9uZSgBAAAAdAEAAABmKAAAAAAoAAAAAHMeAAAAPGlweXRob24taW5wdXQtNi00Nzc0ZDhlZGRkMDA+dBAAAABweWVtbWFfZ2VuZXJhdG9yAQAAAHMCAAAAAAE=","_module_vars":[],"_global_vars":[]},"kwargs":{}}}']


In [17]:
py_cv_idx = storage.idx(py_cv)
print py_cv_idx
py_emma_feat = storage.vars['attributes_json'][py_cv_idx]

0


In [18]:
erg = py_emma_feat(storage.snapshots);

In [19]:
#! lazy
print erg[:,2:4]

[[ 2.68972969  2.06547379]
 [ 2.66780734  2.04628825]
 [ 2.61396885  2.00944018]
 [ 2.61284137  2.00203228]
 [ 2.6829567   2.04546595]
 [ 2.73087001  2.06900215]
 [ 2.79744959  2.09469533]
 [ 2.79462361  2.09850192]
 [ 2.74107337  2.08592916]
 [ 2.75102925  2.09090185]]


In [20]:
storage.close()
ref_storage.close()

In [21]:
#! lazy
storage = paths.Storage('delete.nc', 'r')

11-02-17 23:12:18 openpathsampling.netcdfplus.netcdfplus INFO     Open existing netCDF file 'delete.nc' for reading - reading from existing file
11-02-17 23:12:18 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover'] and instatiated with ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover']
11-02-17 23:12:18 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['samples', 'movepath'] and instatiated with ['samples', 'movepath']
11-02-17 23:12:18 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['simulation', 'mccycle', 'previous', 'active', 'change'] and instatiated with ['simulation', 'mccycle', 'previous', 'active', 'change']
11-02-17 23:12:18 openpathsampling.netcdfplus.util INFO     Ran load_indices in time 0.000899


In [24]:
cv = storage.cvs[0]

Make sure that we get the same result

In [25]:
assert np.allclose(erg, cv(storage.snapshots))

In [26]:
storage.close()