In [None]:
import autograd.numpy as np
import pandas as pd
import dill
import pyemma
from math import floor
from autograd import grad
from mdfeature.KramersRateEvaluator import KramersRateEvaluator

Data restore

In [None]:
#dill.dump(double_well_traj, file = open("double_well_traj.pickle", "wb"))
double_well_traj = dill.load(open("double_well_traj.pickle", "rb"))

# Toy Problems in Potential Energy Landscapes

In this notebook we look at
1. Double-Well (1D)
2. Quadruple-Well  (2D)
3. Ring Double Well (2D)
4. Muller-Brown Potential Energy Surface (2D)

In [None]:
def cart2pol(x, y):
    r = np.sqrt(x**2 + y**2)
    theta = np.arctan2(y, x)
    
    return r, theta

In [None]:
# Shallow well (1D)
def shallow_well_potential(x):
    return 0.01 * x**2

# Double well (1D)
def double_well_potential(x):
    h = 2
    c = 2
    return -(1/4)*(x**2)*(h**4) + (1/2)*(c**2)*(x**4)

# Quadruple Well (2D)
def quadruple_well_potential(x):
    h = 2
    c = 2
    return (-(1/4)*(x[0]**2)*(h**4) + (1/2)*(c**2)*(x[0]**4))+(-(1/4)*(x[1]**2)*(h**4) + (1/2)*(c**2)*(x[1]**4))

# Ring Double Well (2D)
def ring_double_well_potential(x):
    theta0 = np.pi
    r0 = 1
    w = 0.2
    d = 5
    r, theta = cart2pol(x[0], x[1])
    
    return (1/r) * np.exp(r/r0) - d * np.exp(-((x[0]-r0)**2 + (x[1])**2)/(2*w**2)) - d * np.exp(-((x[0]-r0*np.cos(theta0))**2 + (x[1]-r0*np.sin(theta0))**2)/(2*w**2))

# Muller-Brown Potential (2D)
def muller_brown_potential(x):
    A = (-200, -100, -170, 15)
    a = (-1, -1, -6.5, 0.7)
    b = (0, 0, 11, 0.6)
    c = (-10, -10, -6.5, 0.7)
    x0 = (1, 0, -0.5, -1)
    y0 = (0, 0.5, 1.5, 1)
    
    V = 0
    for k in range(4):
        V += A[k]*np.exp(a[k]*(x[0]-x0[k])**2 + b[k]*(x[0]-x0[k])*(x[1]-y0[k]) + c[k]*(x[1]-y0[k])**2)
        
    return V

Functions for free energy plots

In [None]:
def free_energy_estimate(samples, beta, minimum_counts=50):
    # histogram
    counts, coordinate = np.histogram(samples, bins=200)
    robust_counts = counts[np.where(counts>minimum_counts)]
    robust_coordinates = coordinate[np.where(counts>minimum_counts)]
    
    # log noraml
    normalised_counts = robust_counts / np.sum(counts)
    with np.errstate(divide='ignore'):
        free_energy = - (1/beta)* np.log(normalised_counts)
    
    return free_energy, robust_coordinates

def plot_free_energy_estimate(potential, samples, beta, minimum_counts=50):
    estimated_free_energy, coordinates = free_energy_estimate(samples, beta, minimum_counts)
    linear_shift = estimated_free_energy[floor(len(estimated_free_energy)/2)] - potential(0)

    fig = plt.figure(figsize=(6,6))
    plt.plot(coordinates, estimated_free_energy - linear_shift, 'k', label='estimated')
    plt.xlabel('x', fontsize=16)
    plt.ylabel('F', fontsize=16)
    x_range = np.arange(min(coordinates), max(coordinates), (max(coordinates)-min(coordinates))/1000)
    plt.plot(x_range, potential(x_range), label='actual')
    plt.legend()
    plt.title('Free Energy Surface', fontsize=16)

In [None]:
def project_points_to_line(points, coords, theta):
    # coords = (x0, y0), a point that the line goes through
    # theta is the orientation of the line (e.g. theta = 0 is parallel to the x axis, theta = pi/2 is parallel to the y axis)
    a = coords
    b = coords + np.array([np.cos(theta), np.sin(theta)])
    ap = points - a
    ab = b - a
    projected_points = np.dot(ap, ab) / np.dot(ab, ab)
    
    return projected_points

In [None]:
def plot_free_energy_surface(samples, beta, slice_centre, slice_angle, minimum_counts=50):
    concatenated_samples = np.concatenate(samples)
    projected_samples = project_points_to_line(concatenated_samples, np.array(slice_centre), slice_angle)
    free_energy, coordinates = free_energy_estimate(projected_samples, beta, minimum_counts)
    fig, axs = plt.subplots(1, 3)
    fig.set_size_inches(18,5)
    axs[0].hist2d(concatenated_samples[:,0], concatenated_samples[:,1], bins=300)
    axs[0].plot(slice_centre[0], slice_centre[1], 'rx', markersize=12)
    max_x = max(concatenated_samples[:,0])
    min_x = min(concatenated_samples[:,0])
    x_range = np.arange(min_x, max_x, (max_x-min_x)/1000)
    m = np.tan(slice_angle); c = slice_centre[1] - m * slice_centre[0]
    y_range = m * x_range + c
    axs[0].plot(x_range, y_range, 'r')
    axs[1].hist(projected_samples, bins=100)
    axs[2].plot(coordinates, free_energy)
    plt.show()    
    
    return projected_samples

Evaluation functions

In [None]:
def relabel_trajectory_by_coordinate_chronology(traj, state_centers):
    sorted_indices = np.argsort(np.argsort(state_centers))

    # relabel states in trajectory
    for idx, state in enumerate(traj):
        traj[idx] = sorted_indices[traj[idx]]

    return traj

In [None]:
def compute_discrete_trajectory(trajectory, k=30):
    cluster = pyemma.coordinates.cluster_kmeans(trajectory, k=k)
    discrete_traj = cluster.dtrajs[0]
    cluster_centers = cluster.clustercenters.flatten()
    discrete_traj = relabel_trajectory_by_coordinate_chronology(discrete_traj, cluster_centers)
    cluster_centers = np.sort(cluster_centers)
    
    return discrete_traj, cluster_centers

def calculate_cni(i, X, n, P):
    return np.sum([(X[j] - X[i])** n * P[i,j] for j in range(len(X))])

def calculate_c(X, n, P):
    return np.array([calculate_cni(i, X, n, P) for i in range(len(X))])

In [None]:
def correlation_coefficients_check(beta, potential, discrete_traj, cluster_centers, lag, time_step):
    tau = lag * time_step
    
    msm = pyemma.msm.estimate_markov_model(discrete_traj, lag)
    
    x_min = min(cluster_centers); x_max = max(cluster_centers)
    x_range = np.arange(x_min, x_max, (x_max-x_min)/1000)
    grad_potential = grad(potential)
    
    D1_theory = -beta * np.array([grad_potential(x) for x in x_range])
    D2_theory = np.array([2 for x in x_range])
    
    C1_theory = tau * D1_theory
    C2_theory = 2 * D2_theory * tau + C1_theory ** 2
    
    C1_exp = calculate_c(cluster_centers, 1, msm.transition_matrix)
    C2_exp = calculate_c(cluster_centers, 2, msm.transition_matrix)
    
    D1_exp = C1_exp / tau
    D2_exp = (C2_exp - C1_exp ** 2)/(2*tau)
    
    fig, axs = plt.subplots(2, 2)
    fig.set_size_inches(12,8)
    
    axs[0,0].set_title('C1')
    axs[0,0].plot(x_range, C1_theory, label='theory')
    axs[0,0].plot(cluster_centers, C1_exp, label='exp')
    axs[0,0].legend()
    
    axs[0,1].set_title('C2')
    axs[0,1].plot(x_range, C2_theory, label='theory')
    axs[0,1].plot(cluster_centers, C2_exp, label='exp')
    axs[0,1].legend()
    
    axs[1,0].set_title('D1')
    axs[1,0].plot(x_range, D1_theory, label='theory')
    axs[1,0].plot(cluster_centers, D1_exp, label='exp')
    axs[1,0].legend()
    
    axs[1,1].set_title('D2')
    axs[1,1].plot(x_range, D2_theory, label='theory')
    axs[1,1].plot(cluster_centers, D2_exp, label='exp')
    axs[1,1].legend()
    
    plt.show()

In [None]:
def compute_analytic_kramers_rate(potential, beta, initial_x, final_x, D):
    free_energy = 
    well_integrand = [np.exp(- beta * free_energy[x]) for x in range(len(free_energy))]
    # assuming constant diffusion coefficient
    barrier_integrand = [np.exp(beta * free_energy[x])/D for x in range(len(free_energy))]
    mid_x = int(np.floor((initial_x+final_x)/2))
    if final_x > initial_x:
        well_integral = integrate.simpson(well_integrand[initial_x: mid_x + 1], self.coordinates[initial_x: mid_x + 1])
        barrier_integral = integrate.simpson(barrier_integrand[initial_x + 1: final_x], self.coordinates[initial_x + 1:final_x])
    else:
        well_integral = integrate.simpson(well_integrand[mid_x: initial_x+1], self.coordinates[mid_x: initial_x+1])
        barrier_integral = integrate.simpson(barrier_integrand[final_x + 1: initial_x], self.coordinates[final_x + 1: initial_x])

## Langevin Dynamics

In [None]:
import matplotlib.pyplot as plt 
from mdfeature.Langevin import LangevinDynamics

temperature = 300
R = 0.0083144621  # Universal Gas Constant kJ/K/mol
beta = 1.0 / (temperature * R)  # units (kJ/mol)**(-1)

step_size = 5e-3

shallow_well_sampler = LangevinDynamics(x0=0.0, potential=shallow_well_potential, beta=beta, time_step=step_size)
double_well_sampler = LangevinDynamics(x0=0.0, potential=double_well_potential, beta=beta, time_step=step_size)
quadruple_well_sampler = LangevinDynamics(x0=[0.0,0.0], potential=quadruple_well_potential, beta=beta, time_step=step_size)
ring_double_well_sampler = LangevinDynamics(x0=[0.2,1.0], potential=ring_double_well_potential, beta=beta, time_step=step_size)
muller_brown_sampler = LangevinDynamics(x0=[0.0,0.0], potential=muller_brown_potential, beta=beta, time_step=step_size)

Kramers Rate Evaluator

In [None]:
kre = KramersRateEvaluator(verbose=True)

### Shallow Well 

In [None]:
shallow_well_samples = shallow_well_sampler.simulate(number_of_steps=5000000, burn_in=4000)
shallow_well_traj = np.concatenate(shallow_well_samples).ravel()

### Double well

In [None]:
double_well_samples = double_well_sampler.simulate(number_of_steps=5000000, burn_in=10000)
double_well_traj = np.concatenate(double_well_samples).ravel()
plt.hist(double_well_traj, bins=100)
plt.show()

In [None]:
plot_free_energy_estimate(double_well_potential, double_well_samples, beta, minimum_counts=500)

In [None]:
discrete_traj, cluster_centers = compute_discrete_trajectory(double_well_traj)
correlation_coefficients_check(beta, double_well_potential, discrete_traj, cluster_centers, 1, step_size)

In [None]:
kre.fit(double_well_traj, 
        beta, 
        sigmaD=0.002,
        sigmaF=0.00025,
        lag=35,
        time_step=step_size,
        k=30)

### Quadruple well 

In [None]:
quadruple_well_samples = quadruple_well_sampler.simulate(number_of_steps=1000000, burn_in=4000)

In [None]:
quadruple_well_projected_traj = plot_free_energy_surface(quadruple_well_samples, beta, np.array([0,0]), 0, minimum_counts=50)

In [None]:
kre.fit(quadruple_well_projected_traj, 
        beta, 
        sigmaD=0.5,
        sigmaF=0.00025,
        lag = 3,
        bins=200, 
        step_size=step_size,
        minima_prominance=2.0, 
        cluster_type='kmeans',
        options={'k': 200, 'stride': 5, 'max_iter': 150,
         'max_centers': 1000, 'metric': 'euclidean', 'n_jobs': None, 'dmin': 0.002})

### Ring double well

In [None]:
ring_double_well_samples = ring_double_well_sampler.simulate(number_of_steps=1000000, burn_in=4000)

In [None]:
ring_double_well_projected_traj = plot_free_energy_surface(ring_double_well_samples, beta, np.array([0,0]), 0, minimum_counts=5000)

### Muller brown samples

In [None]:
muller_brown_samples = muller_brown_sampler.simulate(number_of_steps=1000000, burn_in=4000)

In [None]:
muller_brown_projected_traj = plot_free_energy_surface(muller_brown_samples, beta, np.array([0,0]), np.pi/8, minimum_counts=5000)