In [None]:
import torch
from langevin_sampling.samplers import LangevinDynamics, MetropolisAdjustedLangevin
import numpy as np
import matplotlib.pyplot as plt
from math import floor
import copy
from autoimpute.imputations import MiceImputer, MultipleImputer, SingleImputer
import pandas as pd
import dill
from tqdm import tqdm
from mdfeature.KramersRateEvaluator import KramersRateEvaluator
np.random.seed(10)
torch.manual_seed(10)

device = torch.device('cpu')
torch.set_default_tensor_type('torch.FloatTensor')

def double_well_negative_log(x):
    h = 1.75
    c = 2
    return -(1/4)*(x**2)*(h**4) + (1/2)*(c**2)*(x**4)

In [None]:
#dill.dump(est_samples, file = open("double_well_samples.pickle", "wb"))
est_samples = dill.load(open("double_well_samples.pickle", "rb"))


In [None]:
x = torch.randn([1], requires_grad=True, device=device)
max_itr = int(3e6)
burn_in_samples = 50

MALA = True

if MALA is True:
    sampler = MetropolisAdjustedLangevin(
        x,
        double_well_negative_log,
        lr=1e-1,
        lr_final=4e-2,
        max_itr=max_itr,
        device=device
    )
else:
    sampler = LangevinDynamics(
        x,
        double_well_negative_log,
        lr=0.5,
        lr_final=1e-2,
        max_itr=max_itr,
        device=device
    )

hist_samples = []
loss_log = []
for j in tqdm(range(max_itr)):
    est, loss = sampler.sample()
    loss_log.append(loss)
    hist_samples.append(est.cpu().numpy())
        
est_samples = np.array(hist_samples).flatten()[burn_in_samples:]

fig = plt.figure(dpi=150, figsize=(9, 4))
plt.hist(est_samples, bins=200, density=True)
plt.xlabel('x coordinate')
plt.ylabel('Probability Density')
plt.show()
plt.tight_layout()
plt.show()

In [None]:
import scipy.interpolate as interpolate

def free_energy_estimate(samples):
    
    # histogram
    counts, coordinate = np.histogram(samples, bins=200)
    robust_counts = counts[np.where(counts>50)]
    robust_coordinates = coordinate[np.where(counts>50)]
    
    # log noraml
    normalised_counts = robust_counts / np.sum(counts)
    with np.errstate(divide='ignore'):
        free_energy = - np.log(normalised_counts)
    
    # nan imputation
    for index, energy in enumerate(free_energy):
        if energy == np.inf or energy == -np.inf:
            free_energy[index] = np.nan
            
    df = pd.DataFrame({'CV':robust_coordinates, 'F':free_energy})
    si = MiceImputer(return_list=True, strategy={"F": "interpolate"},n=1)
    output = si.fit_transform(df)[0][1]
    
    # smoothing
    
    dx = 0.001
    sigma = 0.03
    interpolated_F = interpolate.interp1d(output.CV, output.F, fill_value='extrapolate')
    smoothed_range = np.arange(min(output.CV), max(output.CV), dx)
    sampled_F = interpolated_F(smoothed_range)

    gx = np.arange(-3 * sigma, 3 * sigma, dx)
    gaussian = (1/(np.sqrt(2*np.pi *sigma**2))) * np.exp(-(gx / sigma) ** 2 / 2)
    smoothed_F = np.convolve(sampled_F, gaussian, mode="same") * dx
    
    smoothed_range2 = smoothed_range[np.where((np.abs(smoothed_range-max(smoothed_range))>3*sigma) & (np.abs(smoothed_range-min(smoothed_range))>3*sigma))]
    smoothed_F = smoothed_F[np.where((np.abs(smoothed_range-max(smoothed_range))>3*sigma) & (np.abs(smoothed_range-min(smoothed_range))>3*sigma))]

    return smoothed_F, smoothed_range2

free_energy, coordinates = free_energy_estimate(samples=est_samples)

linear_shift = free_energy[floor(len(free_energy)/2)] - double_well_negative_log(0)

fig = plt.figure(figsize=(15,10))
plt.plot(coordinates, free_energy - linear_shift, 'k')
plt.xlabel('x', fontsize=16)
plt.ylabel('Estimated Free Energy', fontsize=16)

x_range = np.arange(min(coordinates), max(coordinates), (max(coordinates)-min(coordinates))/1000)
plt.plot(x_range, double_well_negative_log(x_range))

In [None]:
kre = KramersRateEvaluator(verbose=True)

kre.fit(est_samples, 
        beta=1, 
        sigmaD=0.075,
        sigmaF=0.01,
        step_size=4e-2,
        lag=1,
        bins=300, 
        impute_free_energy_nans = False,
        minima_prominance=1.1, 
        include_endpoint_minima=False,
        cluster_type='kmeans',
        options={'k': 200, 'stride': 5, 'max_iter': 150,
         'max_centers': 20, 'metric': 'euclidean', 'n_jobs': None, 'dmin': 0.002})

MFPT 

$$
\tau = \int_{x_{min}=-\frac{h^2}{2c}}^{x_{max}=0} \frac{dy}{D(y)} \exp{\left(\beta F(y)\right)} \int_{\infty}^{y} dz \exp{\left(-\beta F(z)\right)}
$$

Kramer's Rate

$$
\nu = \frac{1}{2\tau} 
$$

(Note how rate of crossing is $1/2$ the rate of arrival)

In [None]:
from scipy import integrate
from scipy.interpolate import interp1d

gamma = 1
temperature = 300 
R = 0.0083144621 # Universal Gas Constant kJ/K/mol
beta = 1.0/(temperature*R) # units (kJ/mol)**(-1)
x_max = 0
x_left_min = - 0.765625 # - h^2 / (2*c)
free_energy_interpolate = interp1d(coordinates, free_energy)

def diffusion_coefficient(y, beta, gamma):
    return 1/(beta*gamma)

def kramers_integrand(z, y, beta):
    integrand = np.exp(beta*free_energy_interpolate(y))*np.exp(-beta*free_energy_interpolate(z))/(diffusion_coefficient(y, beta, gamma))
    
    return integrand

def lim0(y, beta):
    # technically -inf to y
    
    return [-1.22, y]
            
lim1 = [x_left_min, x_max]

print(f'With a uniform diffusion coefficient of {diffusion_coefficient(0, beta, gamma)}, we obtain a Kramers rate of:')
tau, _ = integrate.nquad(kramers_integrand, [lim0, lim1], args=(beta,))

rate = 1/(2*tau)

print(rate)

In [None]:
import pyemma

def relabel_trajectory_by_state_chronology(traj, state_centers):
    sorted_indices = np.argsort(np.argsort(state_centers))

    # relabel states in trajectory
    for idx, state in enumerate(traj):
        traj[idx] = sorted_indices[traj[idx]]

    return traj

print(est_samples)

cluster = pyemma.coordinates.cluster_kmeans(est_samples, k=150, max_iter=100)
discrete_traj = cluster.dtrajs[0]
cluster_centers = cluster.clustercenters.flatten()
discrete_traj = relabel_trajectory_by_state_chronology(discrete_traj, cluster_centers)
sorted_state_centers = np.sort(cluster_centers)

In [None]:
its = pyemma.msm.its(discrete_traj)
#pyemma.plots.plot_implied_timescales(its)
plt.plot(discrete_traj[0:1000])

In [None]:
initial_range = (-0.80, -0.72)   # located in left well
final_range = (0.72, 0.80)       # located in right well

def compute_states_for_range(given_range, sorted_state_centers):
    nstates = len(sorted_state_centers)
    voronoi_cell_boundaries = [(sorted_state_centers[i+1]+sorted_state_centers[i])/2
                           for i in range(len(sorted_state_centers)-1)]
    lower_value = given_range[0]
    higher_value = given_range[1]
    number_of_lower_states = len([boundary for boundary in voronoi_cell_boundaries if boundary < lower_value])
    number_of_upper_states = len([boundary for boundary in voronoi_cell_boundaries if boundary > higher_value])
    
    lower_state_index = number_of_lower_states
    upper_state_index = nstates - number_of_upper_states
    
    states_in_range = np.arange(lower_state_index, upper_state_index, 1)
    
    return states_in_range

initial_states = compute_states_for_range(initial_range, sorted_state_centers)
final_states = compute_states_for_range(final_range, sorted_state_centers)



msm = pyemma.msm.estimate_markov_model(discrete_traj, lag=1)
mfpt = msm.mfpt(A=initial_states, B=final_states)
print(mfpt)

In [None]:
learning_rate = 4e-2
real_mfpt = mfpt * learning_rate
kramers_rate = 1/(2*real_mfpt)
print(f"The estimated Kramers rate for this system is {round(kramers_rate, 3)}/s")
gamma = 1.933805/kramers_rate
print(f"This implies that the gamma value is {round(gamma, 2)}")
print(f"And that the diffusion coefficient is {round(1/(beta*gamma), 5)}")

## Computing Diffusion Coefficients Through Kramers-Moyal Expansion

$$
c^{(n)}(x, \tau) = \int dx' \left(x' - x\right)^n p\left(x', \tau \vert x \right) = \left< \left(x(t+\tau)-x(t)\right)^n\right>_{x(t)=x}
$$

$$
c^{(n)}(x_i, \tau) = \sum_{j=1}^{N_{bin}} \left(x_j - x_i\right)^n p_{ij}^{(\tau)}
$$

$$
c^{(1)} \approx D^{(1)}(X) \tau
$$

$$
c^{(2)}(X, \tau) \approx 2 D^{(2)}(X) \tau + \left(D^{(1)}(X) \tau \right)^2
$$

$$
D^{(2)}(x_i) \approx \frac{c^{(2)}(x_i, \tau) - c^{(1)}(x_i, \tau)^2}{2 \tau}
$$

In [None]:
def calculate_c(X, n, P):
    return np.sum([(X[j]-X)**n * P[:,j] for j in range(len(X))], axis=0)


def compute_drift_and_diffusion(traj, lag, step, k):
    cluster = pyemma.coordinates.cluster_kmeans(traj, k=k, max_iter=100)
    discrete_traj = cluster.dtrajs[0]
    cluster_centers = cluster.clustercenters.flatten()
    discrete_traj = relabel_trajectory_by_state_chronology(discrete_traj, cluster_centers)
    X = np.sort(cluster_centers)

    msm = pyemma.msm.estimate_markov_model(discrete_traj, lag=lag)

    c1 = calculate_c(X, 1, msm.transition_matrix)
    c2 = calculate_c(X, 2, msm.transition_matrix)
    c4 = calculate_c(X, 4, msm.transition_matrix)
    
    tau = lag * step
    
    D2 = (c2 - c1 **2)/(2*tau)
    D4 = (1/(4*3*2*tau))*c4

    plt.plot(X, D4/(D2**2))

compute_drift_and_diffusion(est_samples, lag=1, step=4e-2, k=30)

In [None]:
def compute_drift_and_diffusion_2(traj, lag, step, k, ax):
    cluster = pyemma.coordinates.cluster_kmeans(traj, k=k, max_iter=100)
    discrete_traj = cluster.dtrajs[0]
    cluster_centers = cluster.clustercenters.flatten()
    discrete_traj = relabel_trajectory_by_state_chronology(discrete_traj, cluster_centers)
    X = np.sort(cluster_centers)

    msm = pyemma.msm.estimate_markov_model(discrete_traj, lag=lag)

    c1 = calculate_c(X, 1, msm.transition_matrix)
    c2 = calculate_c(X, 2, msm.transition_matrix)
    
    tau = lag * step
    
    D2 = (c2 - c1 **2)/(2*tau)
    ax.plot(X, D2, label=f"k={k}")
    
    return 

In [None]:
lags = [1,2,3,4,5,7,9,13,17]
cm = plt.cm.PuBuGn(np.linspace(0.25, 0.75, len(lags)))
ax = plt.subplot(111)
ax.set_prop_cycle('color', list(cm))
for idx, lag in enumerate(lags):
    compute_drift_and_diffusion_2(est_samples, lag=lag, step=4e-2, k=50, ax=ax)
plt.legend(fontsize=18)
plt.xlabel("x", fontsize=25)
plt.ylabel(r"$D^{(2)}(x)$", fontsize=25)
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)

In [None]:
k = [10,15,20,30,50,70,90,120,150]
cm = plt.cm.PuBuGn(np.linspace(0.25, 0.75, len(lags)))
ax = plt.subplot(111)
ax.set_prop_cycle('color', list(cm))
for idx, lag in enumerate(lags):
    compute_drift_and_diffusion_2(est_samples, lag=5, step=4e-2, k=k[idx], ax=ax)
plt.legend(fontsize=18)
plt.xlabel("x", fontsize=25)
plt.ylabel(r"$D^{(2)}(x)$", fontsize=25)
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)

In [None]:
plt.plot(peak_Ds)
plt.plot(std_Ds)

In [None]:
compute_drift_and_diffusion(est_samples, lag=1, step=4e-2, k=10, axs=axs, idx=0)

In [None]:
tau = 1 * 4e-2
cluster = pyemma.coordinates.cluster_kmeans(est_samples, k=4, max_iter=100)
discrete_traj = cluster.dtrajs[0]
cluster_centers = cluster.clustercenters.flatten()
discrete_traj = relabel_trajectory_by_state_chronology(discrete_traj, cluster_centers)
X = np.sort(cluster_centers)
msm = pyemma.msm.estimate_markov_model(discrete_traj, lag=1)
c1 = calculate_c(X, 1, msm.transition_matrix)
c2 = calculate_c(X, 2, msm.transition_matrix)
D2 = (c2 - c1 **2)/(2*tau)

print("sorted centers", X)
print("transition matrix", msm.transition_matrix)
print("c1", c1)
print("c2", c2)
print("D2", D2)

In [None]:
sorted centers [-0.8242084  -0.31454545  0.3092177   0.8194292 ]
transition matrix [[0.74613474 0.23868719 0.0143054  0.00087267]
 [0.31079655 0.49564999 0.17528846 0.01826499]
 [0.01844829 0.17360506 0.49550507 0.31244158]
 [0.00085115 0.01368135 0.23630303 0.74916447]]
c1 [ 0.13929848 -0.02835096  0.03021309 -0.13747782]
c2 [0.08273556 0.1724196  0.1725795  0.08140567]

In [None]:
from mdfeature.KramersRateEvaluator import KramersRateEvaluator

In [None]:
kre = KramersRateEvaluator(verbose=True)

In [None]:
kre.fit(est_samples, 
        beta, 
        sigmaD=0.002,
        sigmaF=0.00025,
        lags= [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
        bins=200, 
        step_size=4e-2,
        minima_prominance=0.5,
        cluster_type='kmeans',
        options={'k': 50, 'stride': 5, 'max_iter': 150,
         'max_centers': 1000, 'metric': 'euclidean', 'n_jobs': None, 'dmin': 0.002})