# Computations
to get correct values to compare with pytest assert

In [1]:
import numpy as np
import jax.numpy as jnp
import os
import pickle

In [2]:
import sys
sys.path.append('../')  # since MDRefine is in a different folder

In [3]:
from MDRefine import load_data

## test for data_loading

### copy data and reduce them (less frames and observables)

just to test

#### load data and save as pickle

In [4]:
from MDRefine import load_data

In [5]:
infos = {}
infos['global'] = {'path_directory': 'DATA_test', 'system_names': ['AAAA', 'CAAU']}

for name in infos['global']['system_names']:
    infos[name] = {}
    infos[name]['g_exp'] = ['NOEs', ('uNOEs','<')]
    infos[name]['obs'] = ['NOEs', 'uNOEs']

infos['global']['temperature'] = 1 # namely, energies are in unit of k_B T (default value)

data = load_data(infos, stride=2)




loading data from directory...
loading  AAAA
loading  CAAU
done


In [6]:
with open('DATA_test/data_stride2.pkl', 'wb') as f:
    pickle.dump(vars(data), f)

### test load_data

compare output of load_data with the pre-loaded data object, which is stored as pickle

In [3]:
from MDRefine import load_data

In [4]:
infos = {}

# Firstly, define global properties, valid for all the systems:

infos['global'] = {'path_directory': 'DATA_test', 'system_names': ['AAAA', 'CAAU']}

# Then, define properties which are specific of each system, like experimental data and observables

for name in infos['global']['system_names']:
    infos[name] = {}
    
    # experimental observables (average and uncertainty), corresponding to 'file_name'.npy in DATA/system_name/g_exp/
    # uNOEs values are upper bounds, so specify '<' with ('uNOEs','<')
    infos[name]['g_exp'] = ['NOEs', ('uNOEs','<')]
    
    # observables from MD simulations, corresponding to 'file_name'.npy in DATA/system_name/observables/
    # they must correspond also to items of infos[name]['g_exp']
    infos[name]['obs'] = ['NOEs', 'uNOEs']

# If some properties are the same for all the systems, you can store them just once in infos['global']

infos['global']['temperature'] = 1 # namely, energies are in unit of k_B T (default value)
# (in this case, you could do this also for 'g_exp' and 'obs')

In [5]:
data = load_data(infos)



loading data from directory...
loading  AAAA
loading  CAAU
done


In [6]:
vars(data.mol['AAAA'])

{'temperature': 1,
 'gexp': {'NOEs': array([[7.74312661e-04, 4.56174227e-04],
         [8.17622013e-05, 7.09253186e-05]]),
  'uNOEs': array([[1.58193730e-04, 2.39877632e-05],
         [7.22476158e-05, 9.51458553e-06]])},
 'names': {'NOEs': array([["A1-H1'", 'A1-H8'],
         ["A1-H1'", 'A2-H8']], dtype='<U7'),
  'uNOEs': array([["A1-H1'", "A2-H4'"],
         ["A1-H1'", "A3-H1'"]], dtype='<U7')},
 'ref': {'NOEs': '=', 'uNOEs': '<'},
 'g': {'NOEs': memmap([[3.78438242e-04, 6.64434046e-05],
          [8.24084855e-04, 5.37090818e-05],
          [4.24279075e-04, 8.45648974e-05],
          [5.57313382e-04, 6.57341152e-05],
          [3.65299667e-04, 3.69220197e-06],
          [2.89047486e-03, 1.00485659e-05],
          [4.10240405e-04, 8.74714679e-05],
          [3.12228408e-03, 1.53735527e-05],
          [4.78437869e-03, 1.16904630e-05],
          [2.48920987e-04, 1.28376223e-06],
          [2.85325595e-03, 1.08458007e-05],
          [7.43237499e-04, 2.90249372e-05],
          [2.33285697e

In [31]:
with open('DATA_test/data_stride2.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

In [32]:
loaded_data

{'properties': <MDRefine.data_loading.datapropertiesclass at 0x7f6aa861d198>,
 'mol': {'AAAA': <MDRefine.data_loading.data_class at 0x7f6aa8604080>,
  'CAAU': <MDRefine.data_loading.data_class at 0x7f6aa8619a90>}}

In [8]:
for s in infos['global']['system_names']:
    my_dict1 = vars(data.mol[s])
    my_dict2 = vars(loaded_data['sys'][s])
    
    for k in my_dict1.keys():
        if k in ['gexp', 'names', 'g']:
            for k2 in data.mol[s].gexp.keys():
                # self.assertTrue((my_dict1[k][k2] == my_dict2[k][k2]).all())
                assert np.array_equal(my_dict1[k][k2], my_dict2[k][k2])

In [15]:
for k2 in my_dict1['names'].keys():

    my_array1 = my_dict1['names'][k2]
    my_array2 = my_dict2['names'][k2]

    x1, y1 = np.shape(my_array1)
    x2, y2 = np.shape(my_array2)

    assert x1 == x2
    assert y1 == y2

    for ix in range(x1):
        for iy in range(y1):
            assert my_array1[ix][iy] == my_array2[ix][iy]

In [12]:
dict(my_dict1['names']['uNOEs'])

{"C1-H1'": "A2-H3'"}

In [55]:
vars(data.properties).keys()

dict_keys(['system_names'])

In [66]:
vars(loaded_data['properties'])

{'system_names': ['AAAA', 'CAAU']}

In [67]:
dir(loaded_data['properties'])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'system_names',
 'tot_n_experiments']

In [70]:
data.properties.system_names

['AAAA', 'CAAU']

In [7]:
data.properties.tot_n_experiments(data)

8

In [9]:
class my_data():
    def __init__(self):
        self.sys = {}

my_loaded_data = my_data()
my_loaded_data.mol['AAAA'] = loaded_data['sys']['AAAA']
my_loaded_data.mol['CAAU'] = loaded_data['sys']['CAAU']


In [10]:
loaded_data['properties'].tot_n_experiments(my_loaded_data)

8

In [74]:
# assert list(vars(data.properties).keys()) == list(vars(loaded_data['properties']).keys())
assert dir(data.properties) == dir(loaded_data['properties'])

assert data.properties.system_names == loaded_data['properties'].system_names
assert data.properties.tot_n_experiments(data) == loaded_data['properties'].tot_n_experiments(loaded_data)

AttributeError: 'dict' object has no attribute 'sys'

In [77]:
# loaded_data['properties'].tot_n_experiments(loaded_data)

loaded_data

{'_global_': <MDRefine.data_loading.data_global_class at 0x7f3eb86abcc0>,
 'sys': {'AAAA': <MDRefine.data_loading.data_class at 0x7f3eb86abc18>,
  'CAAU': <MDRefine.data_loading.data_class at 0x7f3eb86ab940>}}

In [59]:
data.properties.__dir__()

['system_names',
 '__module__',
 '__doc__',
 '__init__',
 'tot_n_experiments',
 '__dict__',
 '__weakref__',
 '__repr__',
 '__hash__',
 '__str__',
 '__getattribute__',
 '__setattr__',
 '__delattr__',
 '__lt__',
 '__le__',
 '__eq__',
 '__ne__',
 '__gt__',
 '__ge__',
 '__new__',
 '__reduce_ex__',
 '__reduce__',
 '__subclasshook__',
 '__init_subclass__',
 '__format__',
 '__sizeof__',
 '__dir__',
 '__class__']

In [12]:
assert vars(data.mol['AAAA']) == vars(loaded_dict['sys']['AAAA'])

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

#### assert does not work
because we cannot compare two dictionaries containing numpy arrays

In [15]:
my_dict1 = {'a': np.array([1.5, 2.5]), 'b': np.array([1.2, 2.4, 3.6])}
my_dict2 = {'a': np.array([1.5, 2.5]), 'b': np.array([1.2, 2.4, 3.6])}

assert (my_dict1 == my_dict2).all()

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

this works:

In [16]:
assert (my_dict1['a'] == my_dict2['a']).all()

#### so, let's proceed for extended

In [30]:
vars(data.mol['AAAA'])

{'temperature': 1,
 'gexp': {'NOEs': array([[7.74312661e-04, 4.56174227e-04],
         [8.17622013e-05, 7.09253186e-05]]),
  'uNOEs': array([[1.58193730e-04, 2.39877632e-05],
         [7.22476158e-05, 9.51458553e-06]])},
 'names': {'NOEs': array([["A1-H1'", 'A1-H8'],
         ["A1-H1'", 'A2-H8']], dtype='<U7'),
  'uNOEs': array([["A1-H1'", "A2-H4'"],
         ["A1-H1'", "A3-H1'"]], dtype='<U7')},
 'ref': {'NOEs': '=', 'uNOEs': '<'},
 'g': {'NOEs': memmap([[3.78438242e-04, 6.64434046e-05],
          [8.24084855e-04, 5.37090818e-05],
          [4.24279075e-04, 8.45648974e-05],
          [5.57313382e-04, 6.57341152e-05],
          [3.65299667e-04, 3.69220197e-06],
          [2.89047486e-03, 1.00485659e-05],
          [4.10240405e-04, 8.74714679e-05],
          [3.12228408e-03, 1.53735527e-05],
          [4.78437869e-03, 1.16904630e-05],
          [2.48920987e-04, 1.28376223e-06],
          [2.85325595e-03, 1.08458007e-05],
          [7.43237499e-04, 2.90249372e-05],
          [2.33285697e

In [28]:
vars(data.mol['AAAA']).keys()


dict_keys(['temperature', 'gexp', 'names', 'ref', 'g', 'weights', 'n_experiments', 'n_frames'])

In [35]:
my_dict1['n_experiments']

{'NOEs': 2, 'uNOEs': 2}

In [33]:
s = 'AAAA'

my_dict1 = vars(data.mol[s])
my_dict2 = vars(loaded_dict['sys'][s])

for k in my_dict1.keys():

    if k in ['temperature', 'ref', 'n_experiments', 'n_frames']:
        assert my_dict1[k] == my_dict2[k]

    elif k in ['gexp', 'names', 'g']:
        for k2 in data.mol[s].gexp.keys():
            assert (my_dict1[k][k2] == my_dict2[k][k2]).all()

    elif k in ['weights']:
        assert (my_dict1[k] == my_dict2[k]).all()



In [39]:
self.assertlist(my_dict1.keys()) == list(my_dict2.keys())

In [41]:
my_dict1

{'temperature': 1,
 'gexp': {'NOEs': array([[7.74312661e-04, 4.56174227e-04],
         [8.17622013e-05, 7.09253186e-05]]),
  'uNOEs': array([[1.58193730e-04, 2.39877632e-05],
         [7.22476158e-05, 9.51458553e-06]])},
 'names': {'NOEs': array([["A1-H1'", 'A1-H8'],
         ["A1-H1'", 'A2-H8']], dtype='<U7'),
  'uNOEs': array([["A1-H1'", "A2-H4'"],
         ["A1-H1'", "A3-H1'"]], dtype='<U7')},
 'ref': {'NOEs': '=', 'uNOEs': '<'},
 'g': {'NOEs': memmap([[3.78438242e-04, 6.64434046e-05],
          [8.24084855e-04, 5.37090818e-05],
          [4.24279075e-04, 8.45648974e-05],
          [5.57313382e-04, 6.57341152e-05],
          [3.65299667e-04, 3.69220197e-06],
          [2.89047486e-03, 1.00485659e-05],
          [4.10240405e-04, 8.74714679e-05],
          [3.12228408e-03, 1.53735527e-05],
          [4.78437869e-03, 1.16904630e-05],
          [2.48920987e-04, 1.28376223e-06],
          [2.85325595e-03, 1.08458007e-05],
          [7.43237499e-04, 2.90249372e-05],
          [2.33285697e

### include also forward model and force-field correction

save complete pickle with stride=2 as data_complete_stride2.pkl

In [7]:
infos = {'global': {
    'path_directory': 'DATA_test',
    'system_names': ['AAAA', 'CAAU'],
    'g_exp': ['backbone1_gamma_3J', 'backbone2_beta_epsilon_3J', 'sugar_3J', 'NOEs' , ('uNOEs', '<')],
    'forward_qs': ['backbone1_gamma', 'backbone2_beta_epsilon','sugar'],
    'obs': ['NOEs', 'uNOEs'],
    'forward_coeffs': 'original_fm_coeffs'}}

stride = 2

In [8]:
def forward_model_fun(fm_coeffs, forward_qs, selected_obs=None):

    # 1. compute the cosine (which is the quantity you need in the forward model;
    # you could do this just once before loading data)
    forward_qs_cos = {}

    for type_name in forward_qs.keys():
        forward_qs_cos[type_name] = jnp.cos(forward_qs[type_name])

    # if you have selected_obs, compute only the corresponding observables
    if selected_obs is not None:
        for type_name in forward_qs.keys():
            forward_qs_cos[type_name] = forward_qs_cos[type_name][:,selected_obs[type_name+'_3J']]

    # 2. compute observables (forward_qs_out) through forward model
    forward_qs_out = {
        'backbone1_gamma_3J': fm_coeffs[0]*forward_qs_cos['backbone1_gamma']**2 + fm_coeffs[1]*forward_qs_cos['backbone1_gamma'] + fm_coeffs[2],
        'backbone2_beta_epsilon_3J': fm_coeffs[3]*forward_qs_cos['backbone2_beta_epsilon']**2 + fm_coeffs[4]*forward_qs_cos['backbone2_beta_epsilon'] + fm_coeffs[5],
        'sugar_3J': fm_coeffs[6]*forward_qs_cos['sugar']**2 + fm_coeffs[7]*forward_qs_cos['sugar'] + fm_coeffs[8] }

    return forward_qs_out

In [9]:
infos['global']['forward_model'] = forward_model_fun

In [10]:
import jax.numpy as jnp

In [11]:
infos['global']['names_ff_pars'] = ['sin alpha', 'cos alpha']

def ff_correction(pars, f):
    out = jnp.matmul(pars, (f[:, [0, 6]] + f[:, [1, 7]] + f[:, [2, 8]]).T)
    return out

def ff_correction_hexamers(pars, f):
    out = jnp.matmul(pars, (f[:, [0, 10]] + f[:, [1, 11]] + f[:, [2, 12]] + f[:, [3, 13]] + f[:, [4, 14]]).T)
    return out

infos['global']['ff_correction'] = ff_correction

In [12]:
data = load_data(infos, stride=stride)

loading data from directory...
loading  AAAA
loading  CAAU
done


In [14]:
del data.mol['AAAA'].forward_model
del data.mol['CAAU'].forward_model

In [15]:
del data.mol['AAAA'].ff_correction
del data.mol['CAAU'].ff_correction

In [57]:
def my_forward_model(a, b, c=None):
    try:
        out = infos['global']['forward_model'](a, b, c)
    except:
        assert c is None, 'you have selected_obs but the forward model is not suitably defined!'
        out = infos['global']['forward_model'](a, b)
    return out

data.mol['AAAA'].forward_model = my_forward_model  # info['forward_model']
data.mol['CAAU'].forward_model = my_forward_model  # info['forward_model']

In [27]:
with open('DATA_test/data_complete_stride2.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

In [73]:
dir(loaded_data['_global_'])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'forward_coeffs_0',
 'names_ff_pars',
 'system_names',
 'tot_n_experiments']

In [69]:
list(loaded_data['_global_'].forward_coeffs_0)
list(loaded_data['_global_'].forward_coeffs_0.keys())

['A_gamma',
 'B_gamma',
 'C_gamma',
 'A_beta',
 'B_beta',
 'C_beta',
 'A_sugar',
 'B_sugar',
 'C_sugar']

In [67]:
assert (list(loaded_data['properties'].forward_coeffs_0) == list(data.properties.forward_coeffs_0)).all()
assert list(loaded_data['properties'].forward_coeffs_0.keys()) == list(data.properties.forward_coeffs_0.keys())

In [70]:
loaded_data['properties'].names_ff_pars

['sin alpha', 'cos alpha']

### test on data for alchemical calculations

In [17]:
infos = {'global': {'temperature': 2.476, 'path_directory': 'DATA_test'}}

cycle_names = ['A1']

names = {}
for name in cycle_names:
    names[name] = []
    for string in ['AS','AD','MS','MD']:
        names[name].append((name + '_' + string))

infos['global']['cycle_names'] = names
infos['global']['system_names'] = [s2 for s in list(names.values()) for s2 in s]

# force-field correction terms

n_charges = 5

infos['global']['names_ff_pars'] = ['DQ %i' % (i+1) for i in range(n_charges)] + ['cos eta']

columns = []
for i in range(n_charges):
    columns.append('DQ %i' % (i+1))
    columns.append('DQ %i%i' % (i+1,i+1))
for i in range(n_charges):
    for j in range(i+1,n_charges):
        columns.append('DQ %i%i' % (i+1,j+1))
columns.append('cos eta')

# only methylated (M) systems have a force-field correction

for name in infos['global']['system_names']: infos[name] = {}

for name in infos['global']['cycle_names'].keys():
    for s in ['D','S']:
        infos[name + '_M' + s]['ff_terms'] = columns

In [18]:
names_charges = ['N6', 'H61', 'N1', 'C10', 'H101/2/3']

In [19]:
def ff_correction(phi, ff_terms):

    n_charges = 5

    phi_vector = []
    for i in range(n_charges):
        phi_vector.extend([phi[i], phi[i]**2])
    for i in range(n_charges):
        for j in range(i+1,n_charges):
            phi_vector.append(phi[i]*phi[j])
    phi_vector.append(-phi[-1])
    phi_vector = jnp.array(phi_vector)

    correction = jnp.matmul(ff_terms, phi_vector)

    return correction

In [20]:
for k in infos['global']['system_names']:
    if k[-2] == 'M': 
        infos[k]['ff_correction'] = ff_correction

In [21]:
data = load_data(infos, stride=2)

loading data from directory...
loading  A1_AS
loading  A1_AD
loading  A1_MS
loading  A1_MD
done


In [22]:
del data.mol['A1_MS'].ff_correction
del data.mol['A1_MD'].ff_correction

In [24]:
with open('DATA_test/data_alchemical_stride2.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

In [25]:
loaded_data

{'properties': <MDRefine.data_loading.datapropertiesclass at 0x7f6aaaf6cd30>,
 'mol': {'A1_AS': <MDRefine.data_loading.data_class at 0x7f6aa8619668>,
  'A1_AD': <MDRefine.data_loading.data_class at 0x7f6aa8619be0>,
  'A1_MS': <MDRefine.data_loading.data_class at 0x7f6aa8619e80>,
  'A1_MD': <MDRefine.data_loading.data_class at 0x7f6aa8619e10>},
 'cycle': {'A1': <MDRefine.data_loading.data_cycle_class at 0x7f6aa8619c50>}}

In [105]:
vars(data.cycle['A1'])

{'gexp_DDG': [6.3, 0.5], 'temperature': 2.476}

In [104]:
data.cycle

{'A1': <MDRefine.data_loading.data_cycle_class at 0x7f32c0554390>}

In [103]:
loaded_data['cycle']['A1'].temperature

loaded_data['cycle']['A1'].gexp_DDG

[6.3, 0.5]

## test for loss_and_minimizer

### compute_new_weights, compute_D_KL and l2_regularization

In [4]:
from MDRefine import compute_new_weights, compute_D_KL, l2_regularization

In [5]:
w0 = np.array([0.5, 0.5])
correction = np.array([0, 1])

w, logZ = compute_new_weights(w0, correction)

print(w, logZ)



[0.73105858 0.26894142] -0.3798854930417225


In [6]:
compute_D_KL(weights_P=w, correction_ff=1/2*correction, temperature=2, logZ_P=logZ)

DeviceArray(0.31265014, dtype=float64)

In [7]:
pars = np.array([1.2, 1.5])

l2_regularization(pars)

(DeviceArray(3.69, dtype=float64), array([2.4, 3. ]))

### test for compute_DeltaDeltaG_terms

In [18]:
from MDRefine import load_data

In [20]:
infos = {'global': {'temperature': 2.476, 'path_directory': 'DATA_test'}}

cycle_names = ['A1']

names = {}
for name in cycle_names:
    names[name] = []
    for string in ['AS','AD','MS','MD']:
        names[name].append((name + '_' + string))

infos['global']['cycle_names'] = names
infos['global']['system_names'] = [s2 for s in list(names.values()) for s2 in s]

# force-field correction terms

n_charges = 5

infos['global']['names_ff_pars'] = ['DQ %i' % (i+1) for i in range(n_charges)] + ['cos eta']

columns = []
for i in range(n_charges):
    columns.append('DQ %i' % (i+1))
    columns.append('DQ %i%i' % (i+1,i+1))
for i in range(n_charges):
    for j in range(i+1,n_charges):
        columns.append('DQ %i%i' % (i+1,j+1))
columns.append('cos eta')

# only methylated (M) systems have a force-field correction

for name in infos['global']['system_names']: infos[name] = {}

for name in infos['global']['cycle_names'].keys():
    for s in ['D', 'S']:
        infos[name + '_M' + s]['ff_terms'] = columns

names_charges = ['N6', 'H61', 'N1', 'C10', 'H101/2/3']

def ff_correction(phi, ff_terms):

    n_charges = 5

    phi_vector = []
    for i in range(n_charges):
        phi_vector.extend([phi[i], phi[i]**2])
    for i in range(n_charges):
        for j in range(i+1,n_charges):
            phi_vector.append(phi[i]*phi[j])
    phi_vector.append(-phi[-1])
    phi_vector = jnp.array(phi_vector)

    correction = jnp.matmul(ff_terms, phi_vector)

    return correction

for k in infos['global']['system_names']:
    if k[-2] == 'M': 
        infos[k]['ff_correction'] = ff_correction

In [22]:
data = load_data(infos)

loading data from directory...
loading  A1_AS
loading  A1_AD
loading  A1_MS
loading  A1_MD
done


In [23]:
from MDRefine import compute_DeltaDeltaG_terms

In [27]:
out2 = compute_DeltaDeltaG_terms(data, logZ_P={'A1_MS': 1., 'A1_MD': 1.5})

In [30]:
out = ({'A1_MS': 255.7655459570046, 'A1_MD': 256.2379948027602},
 {'A1': 135.84140982133923},
 67.92070491066961)

In [28]:
assert out2 == out

In [None]:
assert out2[0].keys() == out[0].keys()

for k in out2[0].keys():
    assert out2[0][k]

### load the data before the following steps

In [8]:
infos = {'global': {
    'path_directory': 'DATA_test',
    'system_names': ['AAAA', 'CAAU'],
    'g_exp': ['backbone1_gamma_3J', 'backbone2_beta_epsilon_3J', 'sugar_3J', 'NOEs' , ('uNOEs', '<')],
    'forward_qs': ['backbone1_gamma', 'backbone2_beta_epsilon','sugar'],
    'obs': ['NOEs', 'uNOEs'],
    'forward_coeffs': 'original_fm_coeffs'}}

def forward_model_fun(fm_coeffs, forward_qs, selected_obs=None):

    # 1. compute the cosine (which is the quantity you need in the forward model;
    # you could do this just once before loading data)
    forward_qs_cos = {}

    for type_name in forward_qs.keys():
        forward_qs_cos[type_name] = jnp.cos(forward_qs[type_name])

    # if you have selected_obs, compute only the corresponding observables
    if selected_obs is not None:
        for type_name in forward_qs.keys():
            forward_qs_cos[type_name] = forward_qs_cos[type_name][:,selected_obs[type_name+'_3J']]

    # 2. compute observables (forward_qs_out) through forward model
    forward_qs_out = {
        'backbone1_gamma_3J': fm_coeffs[0]*forward_qs_cos['backbone1_gamma']**2 + fm_coeffs[1]*forward_qs_cos['backbone1_gamma'] + fm_coeffs[2],
        'backbone2_beta_epsilon_3J': fm_coeffs[3]*forward_qs_cos['backbone2_beta_epsilon']**2 + fm_coeffs[4]*forward_qs_cos['backbone2_beta_epsilon'] + fm_coeffs[5],
        'sugar_3J': fm_coeffs[6]*forward_qs_cos['sugar']**2 + fm_coeffs[7]*forward_qs_cos['sugar'] + fm_coeffs[8] }

    return forward_qs_out

infos['global']['forward_model'] = forward_model_fun
infos['global']['names_ff_pars'] = ['sin alpha', 'cos alpha']

def ff_correction(pars, f):
    out = jnp.matmul(pars, (f[:, [0, 6]] + f[:, [1, 7]] + f[:, [2, 8]]).T)
    return out

infos['global']['ff_correction'] = ff_correction

data = load_data(infos)

loading data from directory...
loading  AAAA
loading  CAAU
done


### test for compute_chi2

In [9]:
from MDRefine import compute_chi2

In [10]:
out = compute_chi2(data.mol['AAAA'].ref, data.mol['AAAA'].weights, data.mol['AAAA'].g, data.mol['AAAA'].gexp)

In [42]:
out

({'backbone1_gamma_3J': DeviceArray([2.2820567 , 2.37008063], dtype=float64),
  'backbone2_beta_epsilon_3J': DeviceArray([6.39268088, 3.86126331], dtype=float64),
  'sugar_3J': DeviceArray([3.71089481, 4.77456358], dtype=float64),
  'NOEs': DeviceArray([1.87342536e-03, 4.30196379e-05], dtype=float64),
  'uNOEs': DeviceArray([1.33028693e-05, 5.82998086e-06], dtype=float64)},
 {'backbone1_gamma_3J': DeviceArray(1.08493846, dtype=float64),
  'backbone2_beta_epsilon_3J': DeviceArray(1.88280674, dtype=float64),
  'sugar_3J': DeviceArray(2.14070494, dtype=float64),
  'NOEs': DeviceArray(6.1036602, dtype=float64),
  'uNOEs': DeviceArray(0., dtype=float64)},
 {'backbone1_gamma_3J': DeviceArray([-1.0119622 ,  0.24672042], dtype=float64),
  'backbone2_beta_epsilon_3J': DeviceArray([-1.37154608,  0.0408422 ], dtype=float64),
  'sugar_3J': DeviceArray([1.14059654, 0.91637572], dtype=float64),
  'NOEs': DeviceArray([ 2.40941428, -0.54624448], dtype=float64),
  'uNOEs': DeviceArray([0., 0.], dtype=f

In [43]:
out_test = ({'backbone1_gamma_3J': np.array([2.2820567 , 2.37008063]),
  'backbone2_beta_epsilon_3J': np.array([6.39268088, 3.86126331]),
  'sugar_3J': np.array([3.71089481, 4.77456358]),
  'NOEs': np.array([1.87342536e-03, 4.30196379e-05]),
  'uNOEs': np.array([1.33028693e-05, 5.82998086e-06])},
 {'backbone1_gamma_3J': np.array(1.08493846),
  'backbone2_beta_epsilon_3J': np.array(1.88280674),
  'sugar_3J': np.array(2.14070494),
  'NOEs': np.array(6.1036602),
  'uNOEs': np.array(0.)},
 {'backbone1_gamma_3J': np.array([-1.0119622 ,  0.24672042]),
  'backbone2_beta_epsilon_3J': np.array([-1.37154608,  0.0408422 ]),
  'sugar_3J': np.array([1.14059654, 0.91637572]),
  'NOEs': np.array([ 2.40941428, -0.54624448]),
  'uNOEs': np.array([0., 0.])},
 np.array(11.21211034))

### test for gamma_function

In [9]:
from MDRefine import gamma_function

In [28]:
flatten_g = np.hstack([data.mol['AAAA'].g[k] for k in data.mol['AAAA'].n_experiments.keys()])
flatten_gexp = np.vstack([data.mol['AAAA'].gexp[k] for k in data.mol['AAAA'].n_experiments.keys()])


In [50]:
flatten_gexp.shape

(10, 2)

In [31]:
lambdas = np.array([0.02276649, 0.92055914, 0.54435632, 0.28184011, 0.75414035,
       0.75551687, 0.47772936, 0.8749338, 0.7059772 , 0.96640172])

In [32]:
alpha = 1.5

out = gamma_function(lambdas, flatten_g, flatten_gexp, data.mol['AAAA'].weights, alpha, True)

In [33]:
out[0]

DeviceArray(6.27214047, dtype=float64)

In [63]:
out

(DeviceArray(6.27231308, dtype=float64),
 array([ 3.34791024e-01,  3.63254555e+00,  6.39012045e+00,  1.29484769e+00,
         4.05246153e+00,  1.92475534e+00, -8.35131574e-06,  5.11595544e-05,
         1.48046374e-04,  7.04939569e-05]),
 DeviceArray([3.54204586e+00, 1.47434153e+00, 3.89708214e+00,
              3.45636268e+00, 4.92762134e-01, 4.02511408e+00,
              7.82813097e-04, 3.06092488e-05, 1.01479652e-05,
              1.75379015e-06], dtype=float64))

In [None]:
out_test = ((6.27231308),
 np.array([ 3.34791024e-01,  3.63254555e+00,  6.39012045e+00,  1.29484769e+00,
         4.05246153e+00,  1.92475534e+00, -8.35131574e-06,  5.11595544e-05,
         1.48046374e-04,  7.04939569e-05]),
 np.array([3.54204586e+00, 1.47434153e+00, 3.89708214e+00,
              3.45636268e+00, 4.92762134e-01, 4.02511408e+00,
              7.82813097e-04, 3.06092488e-05, 1.01479652e-05,
              1.75379015e-06]))

### test for loss_function

In [6]:
from MDRefine import loss_function, minimizer

In [7]:
alpha = 1.5

In [8]:
beta = +np.infty
gamma = +np.infty

pars_ff_fm = np.zeros([])

regularization = None

In [8]:
my_list = []
for k in data.properties.system_names:
    my_list = my_list + list(data.mol[k].ref.values())

if ('>' in my_list) or ('<' in my_list) or ('><' in my_list):

    bounds = {}

    for name_sys in data.properties.system_names:
        bounds[name_sys] = []
        for name_type in data.mol[name_sys].n_experiments.keys():
            if name_type in data.mol[name_sys].ref.keys():
                if data.mol[name_sys].ref[name_type] == '=':
                    bounds[name_sys] = bounds[name_sys] + [(-np.inf, +np.inf)]*data.mol[name_sys].g[name_type].shape[1]
                elif data.mol[name_sys].ref[name_type] == '<':
                    bounds[name_sys] = bounds[name_sys] + [(0, +np.inf)]*data.mol[name_sys].g[name_type].shape[1]
                elif data.mol[name_sys].ref[name_type] == '>':
                    bounds[name_sys] = bounds[name_sys] + [(-np.inf, 0)]*data.mol[name_sys].g[name_type].shape[1]
            elif data.mol[name_sys].ref[name_type[:-6]] == '><':
                bounds[name_sys] = bounds[name_sys] + [(-np.inf, 0)]*data.mol[name_sys].g[name_type].shape[1]
                # bounds = bounds + [[0,+np.inf]]*data.g[name_sys][name_type+' LOWER'].shape[1]
else:
    bounds = None

In [27]:
data.mol['AAAA'].gexp

{'backbone1_gamma_3J': array([[3.8, 1.5],
        [2. , 1.5]]),
 'backbone2_beta_epsilon_3J': array([[8.45, 1.5 ],
        [3.8 , 1.5 ]]),
 'sugar_3J': array([[2. , 1.5],
        [3.4, 1.5]]),
 'NOEs': array([[7.74312661e-04, 4.56174227e-04],
        [8.17622013e-05, 7.09253186e-05]])}

In [21]:
data.mol['AAAA'].gexp

{'backbone1_gamma_3J': array([[3.8, 1.5],
        [2. , 1.5]]),
 'backbone2_beta_epsilon_3J': array([[8.45, 1.5 ],
        [3.8 , 1.5 ]]),
 'sugar_3J': array([[2. , 1.5],
        [3.4, 1.5]]),
 'NOEs': array([[7.74312661e-04, 4.56174227e-04],
        [8.17622013e-05, 7.09253186e-05]])}

In [8]:
out = loss_function(pars_ff_fm, data, regularization, alpha)

out_test = 0.9795744972945789

assert out == out_test

AssertionError: 

In [26]:
lambdas

NameError: name 'lambdas' is not defined

In [19]:
out = loss_function(None, data, regularization, alpha, if_save=True)  # , bounds=bounds)

vars(out)

{'loss': 1.2945948672167045,
 'minis': {'AAAA':       fun: -0.6537553044397795
   hess_inv: array([[1, 0, 0, 0, 0, 0, 0, 0],
         [0, 1, 0, 0, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0],
         [0, 0, 0, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 0, 1]])
        jac: array([ 1.40092771e-04, -1.47990682e-04,  1.81891425e-04, -1.30458846e-04,
         -2.28204309e-04, -4.49754484e-04, -3.88718328e-08,  5.60720698e-09])
    message: 'Optimization terminated successfully.'
       nfev: 3
        nit: 1
       njev: 3
     status: 0
    success: True
          x: array([-1.43841397e-01, -3.74331000e-03, -1.41605966e-01, -3.06426301e-02,
         -1.62745831e-02,  2.46488051e-01,  3.89578404e+02, -3.17660021e+03]),
  'CAAU':       fun: -0.2093079403713568
   hess_inv: array([[ 9.73604194e-01,  2.97885537e-01, -2.14022936e-02,
           7.84921513e-03, -1.49685433e-02, -4.01655

In [10]:
print(out.loss, out.loss_explicit)

0.9795742646405314 3.071400229762583


In [14]:
vars(out).keys()

dict_keys(['loss', 'minis', 'loss_explicit', 'D_KL_alpha', 'abs_difference', 'av_g', 'chi2', 'logZ_new', 'weights_new'])

In [16]:
out.minis['AAAA'].x

array([-1.45173008e-01, -1.80978387e-02, -1.29233238e-01, -5.89017852e-03,
        4.91468556e-02,  2.67252544e-01,  3.04451660e-03, -1.29453418e-04])

In [29]:
my_lambdas = out.minis['AAAA'].x

my_lambdas

array([ 0.29539736,  0.0079259 ,  4.77419326,  0.31817716,  0.13310911,
       -4.40441813,  1.02989413, -0.23891025])

In [30]:
out = loss_function(None, data, regularization, alpha, if_save=True, fixed_lambdas=my_lambdas)

vars(out)

TypeError: dot_general requires contracting dimensions to have the same shape, got [8] and [0].

In [27]:
my_lambdas = out.min_lambdas

In [28]:
print(out.abs_difference)

{'AAAA': DeviceArray(8.3641795e-08, dtype=float64), 'CAAU': DeviceArray(6.33082126e-07, dtype=float64)}


In [23]:
print(out.loss, out.loss_explicit)

1.2945948672167045 1.294595583940625


In [17]:
vars(data.mol['AAAA'])

{'temperature': 1.0,
 'gexp': {'backbone1_gamma_3J': array([[3.8, 1.5],
         [2. , 1.5]]),
  'backbone2_beta_epsilon_3J': array([[8.45, 1.5 ],
         [3.8 , 1.5 ]]),
  'sugar_3J': array([[2. , 1.5],
         [3.4, 1.5]]),
  'NOEs': array([[7.74312661e-04, 4.56174227e-04],
         [8.17622013e-05, 7.09253186e-05]])},
 'names': {'backbone1_gamma_3J': array([['0-1H5H4'],
         ['0-2H5H4']], dtype='<U7'),
  'backbone2_beta_epsilon_3J': array([['0-H3P'],
         ['1-1H5P']], dtype='<U6'),
  'sugar_3J': array([['0-H1H2'],
         ['0-H2H3']], dtype='<U6'),
  'NOEs': array([["A1-H1'", 'A1-H8'],
         ["A1-H1'", 'A2-H8']], dtype='<U7')},
 'ref': {'backbone1_gamma_3J': '=',
  'backbone2_beta_epsilon_3J': '=',
  'sugar_3J': '=',
  'NOEs': '='},
 'g': {'NOEs': memmap([[3.78438242e-04, 6.64434046e-05],
          [8.24084855e-04, 5.37090818e-05],
          [4.24279075e-04, 8.45648974e-05],
          [5.57313382e-04, 6.57341152e-05],
          [3.65299667e-04, 3.69220197e-06],
       

In [22]:
print(out.loss, out.loss_explicit)

1.2945948672167045 1.294595583940625


In [17]:
lambdas

NameError: name 'lambdas' is not defined

In [26]:
out = minimizer(data, alpha=1.5)

In [19]:
vars(out).keys()

dict_keys(['loss', 'min_lambdas', 'minis', 'time', 'D_KL_alpha', 'abs_difference', 'av_g', 'chi2', 'logZ_new', 'weights_new'])

In [20]:
out.min_lambdas

{'AAAA': {'backbone1_gamma_3J': DeviceArray([-0.14383808, -0.00373141], dtype=float64),
  'backbone2_beta_epsilon_3J': DeviceArray([-0.14171542, -0.03066674], dtype=float64),
  'sugar_3J': DeviceArray([-0.0162459 ,  0.24643319], dtype=float64),
  'NOEs': DeviceArray([  389.57840425, -3176.60020902], dtype=float64)},
 'CAAU': {'backbone1_gamma_3J': DeviceArray([0.11518549, 0.00292463], dtype=float64),
  'backbone2_beta_epsilon_3J': DeviceArray([-0.02531414, -0.03830517], dtype=float64),
  'sugar_3J': DeviceArray([0.18129277, 0.01856438], dtype=float64),
  'NOEs': DeviceArray([  178.06783987, -1665.02939879], dtype=float64)}}

In [20]:
out.abs_difference

{'AAAA': DeviceArray(4.63608732e-07, dtype=float64),
 'CAAU': DeviceArray(4.80920487e-06, dtype=float64)}

In [24]:
print(out.abs_difference)

{'AAAA': DeviceArray(1.66750715, dtype=float64), 'CAAU': DeviceArray(0.42253844, dtype=float64)}


In [13]:
vars(out).keys()

dict_keys(['loss', 'minis', 'loss_explicit', 'D_KL_alpha', 'abs_difference', 'av_g', 'chi2', 'logZ_new', 'weights_new'])

In [15]:
out.loss

for k in out.minis.keys():
    out.minis[k].fun
    out.minis[k].jac # array
    out.minis[k].hess_inv # array
    out.minis[k].nfev
    out.minis[k].status
    out.minis[k].status # boolean
    out.minis[k].x # array

out.loss_explicit
out.D_KL_alpha # dict
out.abs_difference # dict




0.9795744972945789

In [38]:
out.abs_difference

{'AAAA': DeviceArray(1.66750701, dtype=float64),
 'CAAU': DeviceArray(0.42432127, dtype=float64)}

In [39]:
out.av_g

{'AAAA': {'backbone1_gamma_3J': DeviceArray([3.30983059, 1.93880851], dtype=float64),
  'backbone2_beta_epsilon_3J': DeviceArray([8.01428114, 3.78043748], dtype=float64),
  'sugar_3J': DeviceArray([2.1655801 , 4.30178665], dtype=float64),
  'NOEs': DeviceArray([1.57755869e-03, 4.74169667e-05], dtype=float64),
  'uNOEs': DeviceArray([1.27004099e-05, 2.81632871e-06], dtype=float64)},
 'CAAU': {'backbone1_gamma_3J': DeviceArray([1.99908652, 3.76260489], dtype=float64),
  'backbone2_beta_epsilon_3J': DeviceArray([3.60637195, 2.07788195], dtype=float64),
  'sugar_3J': DeviceArray([1.60420163, 4.59034891], dtype=float64),
  'NOEs': DeviceArray([6.81823468e-04, 2.89600933e-05], dtype=float64),
  'uNOEs': DeviceArray([4.43725944e-06, 1.22278801e-05], dtype=float64)}}

In [24]:
out.minis['AAAA'].keys()

dict_keys(['fun', 'jac', 'hess_inv', 'nfev', 'njev', 'status', 'success', 'message', 'x', 'nit'])

In [32]:
out.minis['AAAA'].x

array([-1.45172739e-01, -1.80982654e-02, -1.29232939e-01, -5.88983853e-03,
        4.91465321e-02,  2.67252139e-01,  3.04496095e-03, -1.29472416e-04,
       -5.47116642e-04, -2.60681573e-04])

### test for minimizer

In [11]:
from MDRefine import minimizer

In [12]:
result = minimizer(data, alpha=1.5)

In [9]:
vars(result).keys()

dict_keys(['loss', 'min_lambdas', 'minis', 'time', 'D_KL_alpha', 'abs_difference', 'av_g', 'chi2', 'logZ_new', 'weights_new'])

save also output of scipy.optimize.minimize as dict in order to avoid warnings

In [4]:
test_result = pickle.load(open('DATA_test/result1.pkl', 'rb'))

In [5]:
test_result['minis']['AAAA'] = dict(test_result['minis']['AAAA'])
test_result['minis']['CAAU'] = dict(test_result['minis']['CAAU'])

In [11]:
loaded_result = pickle.load(open('DATA_test/result1.pkl', 'rb'))

### test for select_traintest

In [5]:
from MDRefine import minimizer, select_traintest

In [6]:
def forward_model_regularization(coeffs, coeffs_0):
    regularization = (
    3/8*(coeffs[0]-coeffs_0['A_gamma'])**2+1/2*(coeffs[1]-coeffs_0['B_gamma'])**2+(coeffs[2]-coeffs_0['C_gamma'])**2+(coeffs[0]-coeffs_0['A_gamma'])*(coeffs[2]-coeffs_0['C_gamma'])+
    3/8*(coeffs[3]-coeffs_0['A_beta'])**2+1/2*(coeffs[4]-coeffs_0['B_beta'])**2+(coeffs[5]-coeffs_0['C_beta'])**2+(coeffs[3]-coeffs_0['A_beta'])*(coeffs[5]-coeffs_0['C_beta'])+
    3/8*(coeffs[6]-coeffs_0['A_sugar'])**2+1/2*(coeffs[7]-coeffs_0['B_sugar'])**2+(coeffs[8]-coeffs_0['C_sugar'])**2+(coeffs[6]-coeffs_0['A_sugar'])*(coeffs[8]-coeffs_0['C_sugar']))

    return regularization

regularization = {'force_field_reg': 'KL divergence', 'forward_model_reg': forward_model_regularization}

In [9]:
out = select_traintest(data, random_state=0)
print(out)

data_train = out[0]
data_test = out[1]

Input random_state employed both for test_obs and test_frames


  w = data_mol.weights[test_frames_mol]


(<MDRefine.loss_and_minimizer.select_traintest.<locals>.my_data_traintest object at 0x7f73efaaeef0>, <MDRefine.loss_and_minimizer.select_traintest.<locals>.my_data_traintest object at 0x7f73efaaef28>, {'AAAA': {'backbone2_beta_epsilon_3J': DeviceArray([0, 1], dtype=int64), 'backbone1_gamma_3J': DeviceArray([], dtype=int64), 'sugar_3J': DeviceArray([], dtype=int64), 'NOEs': DeviceArray([], dtype=int64)}, 'CAAU': {'backbone2_beta_epsilon_3J': DeviceArray([0], dtype=int64), 'sugar_3J': DeviceArray([0], dtype=int64), 'backbone1_gamma_3J': DeviceArray([], dtype=int64), 'NOEs': DeviceArray([], dtype=int64)}}, {'AAAA': DeviceArray([ 1,  3,  6, 15, 23, 26, 43, 47, 53, 54, 56, 58, 60, 62, 70,
             71, 74, 85, 94, 95], dtype=int64), 'CAAU': DeviceArray([  0,   2,  10,  23,  33,  34,  36,  37,  43,  46,  56,  58,
              59,  68,  71,  78,  86,  95,  96, 100], dtype=int64)})


In [28]:
from MDRefine import compute_chi2

In [29]:
compute_chi2(data_train.mol['AAAA'].ref, data_train.mol['AAAA'].weights, data_train.mol['AAAA'].g, data_train.mol['AAAA'].gexp)

({'backbone1_gamma_3J': DeviceArray([2.30906413, 2.47600636], dtype=float64),
  'sugar_3J': DeviceArray([3.88398218, 4.82629187], dtype=float64),
  'NOEs': DeviceArray([1.88726155e-03, 4.26039734e-05], dtype=float64)},
 {'backbone1_gamma_3J': DeviceArray(1.08865414, dtype=float64),
  'sugar_3J': DeviceArray(2.48164327, dtype=float64),
  'NOEs': DeviceArray(6.25717668, dtype=float64)},
 {'backbone1_gamma_3J': DeviceArray([-0.99395724,  0.31733758], dtype=float64),
  'sugar_3J': DeviceArray([1.25598812, 0.95086125], dtype=float64),
  'NOEs': DeviceArray([ 2.43974521, -0.55210507], dtype=float64)},
 DeviceArray(9.82747409, dtype=float64))

In [12]:
data_test.mol['CAAU'].gexp_new

{'backbone2_beta_epsilon_3J': array([[3.7, 1.5]]),
 'sugar_3J': array([[1. , 1.5]])}

In [11]:
minimizer(data_train, regularization=regularization, alpha=1, beta=1, gamma=1, data_test=data_test)

    #    original_data, *, regularization: dict = None, alpha: float = +numpy.inf, beta: float = +numpy.inf, gamma: float = +numpy.inf,
    #     gtol: float = 1e-3, gtol_inn: float = 1e-3, data_test: dict = None, starting_pars: numpy.ndarray = None):


New evaluation:
loss:  0.7612440289861
gradient:  [-0.54068472  0.11795326 -0.0109484   0.02555865  0.03171804 -0.002445
 -0.0050247  -0.01178146  0.2707445   0.33788134  0.45826581] 

New evaluation:
loss:  1.6187981299096614
gradient:  [ 2.10244658 -0.15234032 -0.11672741 -0.08500067 -0.24836887  0.00528501
 -0.01244037 -0.0167208  -0.78274413 -0.45834792 -1.39662136] 

New evaluation:
loss:  0.6868960863143665
gradient:  [ 0.17109476  0.07232597 -0.03295275  0.00326952 -0.02556601 -0.00020869
 -0.00510116 -0.00916136  0.05826983  0.17863232  0.07502135] 

New evaluation:
loss:  0.7391789973139598
gradient:  [-0.5559358  -0.02594162  0.04794161  0.03102466  0.1432578   0.0127858
  0.00505491  0.0227649  -0.23087195 -0.18205785 -0.40692797] 

New evaluation:
loss:  0.674267797388161
gradient:  [-0.05390883  0.0412979  -0.00810748  0.01194915  0.02635007  0.00376384
 -0.00198807  0.00061825 -0.02889941  0.06952625 -0.07126729] 

New evaluation:
loss:  0.6711860848306833
gradient:  [ 0.

<MDRefine.loss_and_minimizer.minimizer.<locals>.Result_class at 0x7f73ec040ef0>

### test for MDRefinement

In [14]:
from MDRefine import MDRefinement

In [15]:
def forward_model_regularization(coeffs, coeffs_0):
    regularization = (
    3/8*(coeffs[0]-coeffs_0['A_gamma'])**2+1/2*(coeffs[1]-coeffs_0['B_gamma'])**2+(coeffs[2]-coeffs_0['C_gamma'])**2+(coeffs[0]-coeffs_0['A_gamma'])*(coeffs[2]-coeffs_0['C_gamma'])+
    3/8*(coeffs[3]-coeffs_0['A_beta'])**2+1/2*(coeffs[4]-coeffs_0['B_beta'])**2+(coeffs[5]-coeffs_0['C_beta'])**2+(coeffs[3]-coeffs_0['A_beta'])*(coeffs[5]-coeffs_0['C_beta'])+
    3/8*(coeffs[6]-coeffs_0['A_sugar'])**2+1/2*(coeffs[7]-coeffs_0['B_sugar'])**2+(coeffs[8]-coeffs_0['C_sugar'])**2+(coeffs[6]-coeffs_0['A_sugar'])*(coeffs[8]-coeffs_0['C_sugar']))

    return regularization

regularization = {'force_field_reg': 'KL divergence', 'forward_model_reg': forward_model_regularization}

In [21]:
out = MDRefinement(infos, regularization=regularization, starting_alpha=1, starting_beta=1, starting_gamma=1, which_set='validation', results_folder_name='DATA_test/results')

loading data from directory...
loading  AAAA
loading  CAAU
done

search for optimal hyperparameters ...
Input random_state employed both for test_obs and test_frames


  w = data_mol.weights[test_frames_mol]


Input random_state employed both for test_obs and test_frames
Input random_state employed both for test_obs and test_frames
Input random_state employed both for test_obs and test_frames
Input random_state employed both for test_obs and test_frames

log10 hyperpars:  [('alpha', 0.0), ('beta', 0.0), ('gamma', 0.0)]
av. chi2:  4.697786266513191
av. gradient:  [-3.05034953 -0.42527657  0.68407875]

log10 hyperpars:  [('alpha', 0.9668579060932906), ('beta', 0.13479832635946298), ('gamma', -0.2168298887044528)]
av. chi2:  5.316094771690143
av. gradient:  [ 3.77295022 -0.4521357   0.14764627]

log10 hyperpars:  [('alpha', 0.36924104848643996), ('beta', 0.05147920397144987), ('gamma', -0.08280688914458248)]
av. chi2:  4.18724522883611
av. gradient:  [ 0.14268008 -0.07403945  0.20080806]

optimal hyperparameters: alpha: 2.340135734692994 beta: 1.1258465568636136 gamma: 0.8264053324749188

refinement with optimal hyperparameters...
New evaluation:
loss:  1.693658375638443
gradient:  [-1.40800486

In [28]:
v = b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'

print(type(v))

if isinstance(v, bytes):
    print(str(v, 'utf-8'))

<class 'bytes'>
CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL


In [27]:
path_list = ['DATA_test/' + s + '/' for s in os.listdir('DATA_test/') if s[:7] == 'results']


In [28]:
for s in ['ff_AAAA', 'ff_CAAU', 'new_AAAA', 'new_CAAU']:

    my_vec0 = np.load(path_list[0] + 'weights_%s.npy' % s)
    my_vec1 = np.load(path_list[1] + 'weights_%s.npy' % s)

    assert np.sum(my_vec0 - my_vec1)**2 < 1e-5


In [25]:
import pandas

In [56]:
for s in ['hyper_search', 'min_lambdas', 'result']:

    if s == 'result': usecols = lambda x: x != 'time'
    else: usecols = None

    my_vec0 = np.array(pandas.read_csv(path_list[0] + s, index_col=0, usecols=usecols))
    my_vec1 = np.array(pandas.read_csv(path_list[1] + s, index_col=0, usecols=usecols))

    assert np.sum(my_vec0 - my_vec1)**2 < 1e-5

In [69]:
my_df0 = pandas.read_csv(path_list[0] + 'input', index_col=0, usecols=usecols)
my_df1 = pandas.read_csv(path_list[1] + 'input', index_col=0, usecols=usecols)

assert list(my_df0.columns) == list(my_df1.columns)

for s in list(my_df0.columns):
    assert my_df0[s][0] == my_df1[s][0]

stride
starting_alpha
starting_beta
starting_gamma
random_states
which_set
gtol
ftol


In [31]:
v1 = np.random.random((3,4))
v2 = np.random.random((3,4))

wh = np.argwhere(v1 == 0)

if wh.shape[0] != 0:
    assert v1[v1 == 0] == v2[v1 == 0]

wh = np.argwhere(v1 != 0)

if wh.shape[0] != 0:
    print(np.sum((v2[v1 != 0] - v1[v1 != 0])**2/v1[v1 != 0]))

[[0 0]
 [0 1]
 [0 2]
 [0 3]
 [1 0]
 [1 1]
 [1 2]
 [1 3]
 [2 0]
 [2 1]
 [2 2]
 [2 3]]
[[0.52922059 0.25758993 0.20693372 0.83827828]
 [0.5290291  0.898943   0.82404715 0.27793765]
 [0.35514264 0.15783067 0.32902135 0.49281009]]
[0.52922059 0.25758993 0.20693372 0.83827828 0.5290291  0.898943
 0.82404715 0.27793765 0.35514264 0.15783067 0.32902135 0.49281009]
5.100697732913282
