# String Simulation Analysis

This notebook will help you analyse the convergence of the string-method and if you are lucky extract a nice free energy surface. 

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%load_ext lab_black
import numpy as np
import glob as glob
from math import ceil
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import MultipleLocator, FormatStrFormatter, AutoMinorLocator
from MDAnalysis.analysis.align import AlignTraj
import MDAnalysis as mda
import nglview as nv
import pickle
import logging
import sys
import os
import pyemma

logging.getLogger("matplotlib").setLevel(logging.ERROR)
logging.getLogger("blib2to3.pgen2.driver").setLevel(logging.WARNING)
logging.getLogger("pyemma").setLevel(logging.NOTSET)

In [None]:
def colorbar(mappable, cmap, norm, label0, size=10):
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    ax = mappable.axes
    fig = ax.figure
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm)
    cbar.set_label(label0, size=size)
    return cbar

In [None]:
plt.rcParams["axes.facecolor"] = "#f9f9fb"
plt.rcParams["grid.color"] = "white"
plt.rcParams["grid.linestyle"] = "-"
plt.rcParams["grid.linewidth"] = 2
plt.rcParams["axes.grid"] = True
plt.rcParams["lines.solid_capstyle"] = "round"

In [None]:
def natural_sort(l):
    """
    Takes as input a list l of strings and sorts it with natural order.
      Parameters
      ----------
      l: list of strings.
      Returns
      -------
      l sorted
    """
    from re import split

    assert isinstance(l, list), "l is not a list!"
    for i in l:
        assert isinstance(i, str), "List contains non-string elements."
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in split("([0-9]+)", key)]
    return sorted(l, key=alphanum_key)

# String Convergence Analysis

## Extract CVs

In the cell bellow you can select which will be the simulation directory (in case this notebook is elsewhere). If the notebook is in the simulation directory just leave it as ".".

In [None]:
%ls ../data/raw

In [None]:
simulation_directory = "/data/sperez/Projects/string_sims/data/raw/C2I_v1/"
simulation_directory = "/data/sperez/Projects/string_sims/data/raw/C2I_lb_v2/"
simulation_directory = "/data/sperez/Projects/string_sims/data/raw/C2I_lb_v1/"
os.chdir(simulation_directory)
os.getcwd()

In [None]:
%ls md

Load the strings in the `strings` variable.

In [None]:
files = natural_sort(glob.glob("./strings/string[0-9]*txt"))

In [None]:
strings = np.array([np.loadtxt(file).T for file in files])

In [None]:
with open("cv.pkl", "rb") as file:
    cvs, ndx_groups = pickle.load(file)

In [None]:
print("String details")
print("")
print(f"Number of string: {strings.shape[0]}")
print(f"Number of cvs: {strings.shape[1]}")
print(f"Number of beads per string: {strings.shape[2]}")

# Analyze string convergence
In these next plots you will be able to study the convergence of the string. At convergence the strings should be oscillating around an equilibrium position and not drift over the different iterations.

## Strings as a function of time
In this plot we can see the evolution of each string CV as function of the timeration number separatelly.

You can change two parameters in these plots the `start_iteration` before which all data is not plotted and the `n_average` which is the number of strings iterations to average in one block of strings. This is done in order to cancel some of the noisyness in the representation, to reduce the number of strings in the plot and to see more clearly if there is average drift.

+ borrar último string
+ probar con el GPCR

In [None]:
start_iteration = 0
n_average = 20


n_plots = strings.shape[1]
n_strings = strings.shape[0]
fig, ax = plt.subplots(ceil(n_plots / 2), 2, figsize=(20, 8 * ceil(n_plots / 2)))
ax = ax.flatten()
cmap = plt.cm.viridis_r
n_colors = (n_strings - start_iteration) // n_average + 1
colors = cmap(np.linspace(0, 1, n_colors))  # yellow to blue
norm = mpl.colors.Normalize(vmin=start_iteration, vmax=n_strings - 1)

for i, a in enumerate(ax[:n_plots]):
    a.plot(strings[0, i, :], ls=":", marker=".", label="string0", color="r")
    for jj, j in enumerate(range(start_iteration, n_strings, n_average)):
        string = np.mean(strings[j : j + n_average, i, :], axis=0)
        a.plot(string, ls="-", marker="o", color=colors[jj])
    av = np.mean(strings[start_iteration:, i, :], axis=0)
    std = np.std(strings[start_iteration:, i, :], axis=0)
    #    a.fill_between(
    #        np.arange(len(av)),
    #        av + std,
    #        av - std,
    #        alpha=0.4,
    #        label=f"std(string{start_iteration}-{n_strings})",
    #    )
    #    a.plot(
    #        av,
    #        ls="-",
    #        marker=".",
    #        color="k",
    #        label=f"mean(string{start_iteration}-{n_strings})",
    #    )
    a.set_ylabel(
        f"{list(ndx_groups.keys())[2*i]} - {list(ndx_groups.keys())[2*i+1]} (nm)",
        size=18,
        labelpad=16,
    )
    a.set_xlabel("bead number", size=15, labelpad=13)
    a.set_xlim(left=0, right=strings.shape[2] - 1)
    a.xaxis.set_minor_locator(MultipleLocator(1))
    a.xaxis.set_major_locator(MultipleLocator(1))
    a.yaxis.set_minor_locator(MultipleLocator(0.1))
    a.yaxis.set_major_locator(MultipleLocator(0.1))
    a.grid(which="minor")
    a.tick_params(axis="y", labelsize=14)
    a.tick_params(axis="x", labelsize=11)
    a.set_title(f"cv{i}")
    if i % 2 != 0:
        a.legend()
        cbar = colorbar(a, cmap, norm, "iteration number", 20)
if n_plots % 2:
    fig.delaxes(ax[-1])

In [None]:
n_plots = strings.shape[1]
n_strings = strings.shape[0]
fig, ax = plt.subplots(ceil(n_plots / 2), 2, figsize=(20, 8 * ceil(n_plots / 2)))
ax = ax.flatten()
for i, a in enumerate(ax[:n_plots]):
    x = np.arange(n_strings)
    y = strings[:, i, :] - strings[0, i, :]
    y = np.sqrt(np.sum(y * y, axis=1) / strings.shape[2])
    a.plot(x, y)

    a.set_ylabel(
        f"RMSD[{list(ndx_groups.keys())[2*i]} - {list(ndx_groups.keys())[2*i+1]} (nm)]",
        size=18,
        labelpad=16,
    )
    a.set_xlabel("iteration number", size=15, labelpad=13)
    a.set_title(f"cv{i}")
if n_plots % 2:
    fig.delaxes(ax[-1])

## Evolution over CVs that are a function of the cvs

If you are interested in studying the convergence of cvs that are a function of CVs (for example averaging over symmetrical distances). You can construct a `reduced_string` array in which cvs are a function of the cvs used for the string method. In the example bellow, we produce two cvs which are the mean of cvs used in the string method simulation. Then, similar plotting as before can be done. 

In addition if you are interested in the convergence of some other cv which is not a function of the cvs used in the string method you can also study them! Just extract the average value of that particular CV in the `md/*/*/restrained/traj_comp.xtc` for all the restrained simulation and shape them into an `reduced_string` numpy array with shape (n_iterations, n_cvs, n_beads).

If this sort of analysis is meaningless in your system, for example because the chosen cvs are very diagnostic, please ignore this section.

In [None]:
reduced_string = np.hstack(
    [
        np.mean(strings[:, 0:2, :], axis=1, keepdims=True),
        np.mean(strings[:, 10:12, :], axis=1, keepdims=True),
    ]
)
reduced_string_labels = ["SF (nm)", "IG (nm)"]

In [None]:
start_iteration = 100
n_average = 10

n_strings = strings.shape[0]
fig, ax = plt.subplots(1, 1, figsize=(10, 7))
cmap = plt.cm.viridis_r
n_colors = (n_strings - start_iteration) // n_average + 1
colors = cmap(np.linspace(0, 1, n_colors))  # yellow to blue
norm = mpl.colors.Normalize(vmin=start_iteration, vmax=n_strings - 1)
ax.plot(
    reduced_string[0, 0, :],
    reduced_string[0, 1, :],
    ls=":",
    marker=".",
    label="string0",
    color="k",
)
av_0 = np.mean(reduced_string[start_iteration:, 0, :], axis=0)
std_0 = np.std(reduced_string[start_iteration:, 0, :], axis=0)
av_1 = np.mean(reduced_string[start_iteration:, 1, :], axis=0)
std_1 = np.std(reduced_string[start_iteration:, 1, :], axis=0)
ax.plot(
    av_0,
    av_1,
    ls="-",
    marker=".",
    color="k",
    label=f"mean(string{start_iteration}-{n_strings})",
)

for jj, j in enumerate(range(start_iteration, n_strings, n_average)):
    av_0 = np.mean(reduced_string[j:, 0, :], axis=0)
    std_0 = np.std(reduced_string[j:, 0, :], axis=0)
    av_1 = np.mean(reduced_string[j:, 1, :], axis=0)
    std_1 = np.std(reduced_string[j:, 1, :], axis=0)
    ax.errorbar(
        av_0, av_1, fmt="--", xerr=std_0, yerr=std_1, color=colors[jj], alpha=0.9
    )


ax.set_ylabel(
    reduced_string_labels[1],
    size=18,
    labelpad=16,
)
ax.set_xlabel(
    reduced_string_labels[0],
    size=18,
    labelpad=16,
)

ax.xaxis.set_minor_locator(MultipleLocator(0.1))
ax.xaxis.set_major_locator(MultipleLocator(0.1))
ax.yaxis.set_minor_locator(MultipleLocator(0.1))
ax.yaxis.set_major_locator(MultipleLocator(0.1))
ax.grid(which="minor")
ax.tick_params(axis="y", labelsize=14)
ax.tick_params(axis="x", labelsize=11)
ax.legend()
cbar = colorbar(ax, cmap, norm, "iteration number", 20)

In [None]:
n_strings = reduced_string.shape[0]
fig, ax = plt.subplots(1, 1, figsize=(10, 8))
x = np.arange(n_strings)
y = reduced_string[:, :, :] - reduced_string[0, :, :]
y = np.sqrt(np.sum(y * y, axis=(1, 2)) / strings.shape[2])
ax.plot(x, y)
ax.set_ylabel(
    f"RMSD[Reduced string (nm)]",
    size=18,
    labelpad=16,
)
ax.set_xlabel("iteration number", size=15, labelpad=13)
ax.set_title(f"RMSD[Reduced string]")

# New CVs

## Sum of CVs cv

In [None]:
def scale_cv(cv):
    cv = (cv - cv.min()) / (cv.max() - cv.min())
    return cv

In [None]:
cv = (
    0.5 * scale_cv(strings[:, 11:12, :])
    + 0.5 * scale_cv(strings[:, 10:11, :])
    - 0.5 * scale_cv(strings[:, 0:1, :])
    - 0.5 * scale_cv(strings[:, 1:2, :])
)

In [None]:
start_iteration = 0
n_average = 20

n_strings = cv.shape[0]
fig, ax = plt.subplots(1, 1, figsize=(20, 8))
cmap = plt.cm.viridis_r
n_colors = (n_strings - start_iteration) // n_average + 1
colors = cmap(np.linspace(0, 1, n_colors))  # yellow to blue
norm = mpl.colors.Normalize(vmin=start_iteration, vmax=n_strings - 1)

ax.plot(cv[-1, 0, :], ls=":", marker=".", label="string0", color="r")
for jj, j in enumerate(range(start_iteration, n_strings, n_average)):
    string = np.mean(cv[j : j + n_average, 0, :], axis=0)
    ax.plot(string, ls="-", marker="o", color=colors[jj])
av = np.mean(cv[start_iteration:, 0, :], axis=0)
std = np.std(cv[start_iteration:, 0, :], axis=0)
#    a.fill_between(
#        np.arange(len(av)),
#        av + std,
#        av - std,
#        alpha=0.4,
#        label=f"std(string{start_iteration}-{n_strings})",
#    )
#    a.plot(
#        av,
#        ls="-",
#        marker=".",
#        color="k",
#        label=f"mean(string{start_iteration}-{n_strings})",
#    )
ax.set_ylabel(
    f"Two-gate cv",
    size=18,
    labelpad=16,
)
ax.set_xlabel("bead number", size=15, labelpad=13)
ax.set_xlim(left=0, right=strings.shape[2] - 1)
ax.xaxis.set_minor_locator(MultipleLocator(1))
ax.xaxis.set_major_locator(MultipleLocator(1))
ax.yaxis.set_minor_locator(MultipleLocator(0.1))
ax.yaxis.set_major_locator(MultipleLocator(0.1))
ax.grid(which="minor")
ax.tick_params(axis="y", labelsize=14)
ax.tick_params(axis="x", labelsize=11)
ax.set_title(f"Two-gate cv")
if i % 2 != 0:
    a.legend()
    cbar = colorbar(a, cmap, norm, "iteration number", 20)
if n_plots % 2:
    fig.delaxes(ax[-1])

## Path CV

# Free Energy Surface

Once your strings are converged, the swarms are sampling over and over the same part of phase space and we can discretrize it and do statistics on the jumps. This will result in a free energy surface along some cv, which may not need to be the ones that parametrize the string. It is very important to keep in mind that a converged string does not imply a converged FES and it might be necessary to do one or two (or more) hundred additional iterations.


Now instead of using the data in `strings/string*.xtx` we will use the data in `md/*/*/s*/pullx.xvg` if we want to use the cvs of the string. Otherwise, you add here code that reads `md/*/*/s*/traj_comp.xtc`, calculates your desired cv and then shapes the data into the correct shape `(n_iterations*n_swarms_per_iter*n_beads, n_frames_per_iter, n_cvs)`. 

The path of `sys.path.append` should lead to the library `string-method-gmxapi`  

In [None]:
sys.path.append("../../../../string-method-gmxapi")
from stringmethod.config import *
from stringmethod.postprocessing import *

In [None]:
def load_swarm_data(extract, first_iteration=1, last_iteration=None):
    if last_iteration == None:
        last_iteration = sys.maxsize
    if extract:
        config = load_config("config.json")

        ce = CvValueExtractor.from_config(
            config=config,
            first_iteration=first_iteration,  # Exclude the first iterations to let the system equilibrate.
            last_iteration=last_iteration,  # Usefull to make blocks of the simulation
        )
        ce.run()
        ce.persist()
    return np.load("postprocessing/cv_coordinates.npy")

In [None]:
def show_fes(
    grid,
    free_energy,
    fe_cut_off=None,
    cv_labels=["cv0 (nm)", "cv1 (nm)"],
    cbar_label="Free Energy (kT)",
    ax=None,
    fig=None,
    f_min=None,
    f_max=None,
):

    if ax is None:
        fig, ax = plt.subplots(1, 1)
    if fe_cut_off == None:
        fe_cut_off = sys.maxsize
    free_energy[free_energy > fe_cut_off] = np.nan
    cv_0 = grid[:, 0]
    if free_energy.shape[1] == 1:
        ax.plot(cv_0, free_energy, "--o")
        ax.set_ylabel("Free Energy (kT)")
    else:
        cv_1 = grid[:, 1]
        n_colors = 50
        im = ax.contourf(
            cv_0,
            cv_1,
            free_energy.T,
            levels=n_colors,
            # norm=mpl.colors.PowerNorm(gamma=-1 / 3),
            cmap=plt.cm.RdYlBu_r,
            vmin=f_min,
            vmax=f_max,
        )
        if f_min is None:
            f_min = np.nanmin(free_energy)
        if f_max is None:
            f_max = np.nanmax(free_energy)
        cmap = plt.cm.RdYlBu_r
        colors = cmap(np.linspace(0, 1, n_colors))  # yellow to blue
        norm = mpl.colors.Normalize(vmin=f_min, vmax=f_max)
        cbar = colorbar(ax, cmap, norm, cbar_label, 20)
        ax.set_ylabel(cv_labels[1])
        # ax.yaxis.set_minor_locator(MultipleLocator(0.1))
        # ax.yaxis.set_major_locator(MultipleLocator(0.1))
    ax.set_xlabel(cv_labels[0])
    # ax.xaxis.set_minor_locator(MultipleLocator(0.1))
    # ax.xaxis.set_major_locator(MultipleLocator(0.1))
    if fig is not None:
        fig.tight_layout()
        return fig, ax

In [None]:
def calculate_transition_matrix(
    cv_coordinates,
    n_grid_points=15,
    T=300,
    kB=0.001987204,
    convergence_cutoff=1.0e-8,
    method="eigenvector",
    symm=False,
):
    config = load_config("config.json")
    tc = TransitionCountCalculator.from_config(
        config=config,
        # You probably want to play around with n_grid_points.
        # It sets the resolution. Its optimal value depends on your swarm trajectory length and sample size
        n_grid_points=n_grid_points,
        cv_coordinates=cv_coordinates,
    )
    tc.run()
    tc.persist()
    if symm:
        tc.transition_count = 0.5 * (tc.transition_count + tc.transition_count)
        print("symmetrize!")
    fc = FreeEnergyCalculator.from_config(
        config=config,
        grid=tc.grid,
        transition_count=tc.transition_count,
        T=T,
        kB=kB,
        method=method,
        convergence_cutoff=convergence_cutoff,
    )
    fc.run()
    fc.persist()
    return tc.grid, fc.free_energy, tc.transition_count

The `load_swarm_data` function will load the swarm data in the `cv_coordinates`. If you set `extract=True` it will read the data from the swarm files. If you have done this previously you can set `extract=False` so the function just reads `postprocessing/cv_coordinates.npy`. `first_iteration` can be used to exclude initial swarms as equilibration and `last_iteration` can be done to exclude some iterations for example if you want to estimate the FES convergence by comparing blocks of data.

In [None]:
%ls md/

In [None]:
#%rm -r md/277/
#%rm postprocessing/*

In [None]:
cv_coordinates = load_swarm_data(extract=True, first_iteration=100, last_iteration=None)

+ You can do some function of the cvs, like the mean of several:

In [None]:
cv_coordinates_clean = np.concatenate(
    [
        np.concatenate([cv_coordinates[:, :, 0:1], cv_coordinates[:, :, 1:2]], axis=0),
        np.concatenate(
            [cv_coordinates[:, :, 10:11], cv_coordinates[:, :, 11:12]], axis=0
        ),
    ],
    axis=2,
)

In [None]:
cv_coordinates_clean = np.concatenate(
    [
        np.mean([cv_coordinates[:, :, 0:1], cv_coordinates[:, :, 1:2]], axis=0),
        np.mean([cv_coordinates[:, :, 10:11], cv_coordinates[:, :, 11:12]], axis=0),
    ],
    axis=2,
)

In [None]:
cv_coordinates_clean = (
    0.5 * scale_cv(cv_coordinates[:, :, 10:11])
    + 0.5 * scale_cv(cv_coordinates[:, :, 11:12])
    - 0.5 * scale_cv(cv_coordinates[:, :, 0:1])
    - 0.5 * scale_cv(cv_coordinates[:, :, 1:2])
)

In [None]:
def get_path_variable(path, vec, lam=None):
    from numpy.linalg import norm

    n_beads = path.shape[1]

    if lam is None:
        lam = (
            2.3
            * (n_beads - 1)
            / np.sum([norm(path[:, i] - path[:, i + 1]) for i in range(1, n_beads - 1)])
        )

    array = np.array(
        [np.exp(-lam * norm(vec - bead)) for i, bead in enumerate(path[:, 1:].T)]
    )

    s = np.sum(np.arange(0, n_beads - 1) * array) / np.sum(array) / (n_beads - 1)
    return s, lam

In [None]:
lam = None
lam = 1 / 2
cv_coordinates_clean = np.zeros((cv_coordinates.shape[0], cv_coordinates.shape[1], 1))
cv = np.zeros(
    (cv_coordinates.shape[0], cv_coordinates.shape[1], cv_coordinates.shape[2])
)
for i in range(cv_coordinates.shape[2]):
    cv[:, :, i : i + 1] = cv_coordinates[:, :, i : i + 1]
    cv[:, :, i : i + 1] = cv_coordinates[:, :, i : i + 1]

path = strings[-1, :, :]
for i in range(cv.shape[0]):
    for j in range(cv.shape[1]):
        s, lam = get_path_variable(path, cv[i, j, :], lam)
        cv_coordinates_clean[i, j, 0] = s

In [None]:
print(lam)

In [None]:
print(1 / lam)

In [None]:
plt.hist(cv_coordinates_clean.reshape(cv_coordinates_clean.size))

+ You can of course make your own function that extracts cvs from the trajectory and makes a cv_coordinates_clean with the correct shape.

Of the above cells only run the one you are more interested in.

This functions takes the `cv_coordinates_clean` numpy array and calculates a transition matrix by doing a simple grid on the cv space. It also calculates the probability of transition using the master equation and this results in the calculation of a FES. `n_grid_points` choose the number of grid points of the grid, the coarse the grid the more detailed (and noisy) the FES. This parameter should be varied to obtain an acceptable signal-to-noise ration. The temperature `T` and the value of `kB` can be set too. `kB` is used to give units to the FES. In this example we will use "kBT" units since kBT=1.

The function bellow takes the `grid` and `free_energy` arrays from the previous step and plots the free energy surface. The function returns the matplotlib `fig` and `ax` for you to format further if you want. `fe_cut_off` is a maximum value of free energy overwhich nothing is plotted and `cv_labels` are the labels of the cvs.

In [None]:
n_max = strings.shape[0]
n_skip = 25
n_start = 100
n_swarms = 32
n_beads = 16
n_axes = (n_max - n_start) // n_skip + 1

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 7 * 1))
grid, free_energy, tc = calculate_transition_matrix(
    cv_coordinates_clean[
        :,
        :,
        :,
    ],
    n_grid_points=30,
    T=1,
    kB=1,
    method="eigenvector",
    convergence_cutoff=1.0e-10,
)
show_fes(
    grid,
    free_energy,
    # fe_cut_off=50.0,
    cv_labels=["SF (nm)", "IG (nm)"],
    cbar_label="Free Energy (kT)",
    ax=ax,
    f_min=0,
    f_max=20,
)
ax.grid(b=None)

fig.tight_layout()

In [None]:
def get_cvs(cv_coordinates_clean, value):
    a = np.where(
        cv_coordinates_clean
        == cv_coordinates_clean.flat[np.abs(cv_coordinates_clean - value).argmin()]
    )
    return a[0][0]

In [None]:
cv_coordinates[get_cvs(cv_coordinates_clean, -0.4), 1, [0, 1, 10, 11]]

In [None]:
cv_coordinates[get_cvs(cv_coordinates_clean, -0.6), 1, [0, 1, 10, 11]]

In [None]:
cv_coordinates[get_cvs(cv_coordinates_clean, 0.2), 1, [0, 1, 10, 11]]

In [None]:
cv_coordinates[get_cvs(cv_coordinates_clean, 0.5), 1, [0, 1, 10, 11]]

In [None]:
fig, ax = plt.subplots(n_axes, 1, figsize=(10, 7 * n_axes))
ax = ax.flatten()
for i in range(n_axes):
    grid, free_energy, tc = calculate_transition_matrix(
        cv_coordinates_clean[
            n_start : n_start + (i + 1) * n_skip * n_swarms * n_beads,
            :,
            :,
        ],
        n_grid_points=40,
        T=1,
        kB=1,
        convergence_cutoff=1.0e-8,
    )
    show_fes(
        grid,
        free_energy,
        fe_cut_off=100.0,
        cv_labels=["SF (nm)", "IG (nm)"],
        cbar_label="Free Energy (kT)",
        ax=ax[i],
        f_min=0,
        f_max=15,
    )
    ax[i].grid(b=None)

# fig.tight_layout()

In [None]:
def make_msm(cv0, n_grid):
    cv = cv0.copy()
    trj_concat = cv.reshape(cv.shape[0] * 2, cv.shape[2]).copy()
    cv[:, :, 0] -= cv[:, :, 0].min()
    cv[:, :, 1] -= cv[:, :, 1].min()
    dx = cv[:, :, 0].max() / n_grid
    dy = cv[:, :, 1].max() / n_grid
    cv[:, :, 0] = cv[:, :, 0] // dx
    cv[:, :, 1] = cv[:, :, 1] // dy

    state_traj = []
    for k in range(0, cv.shape[0]):
        state0 = cv[k, 0, 1] * n_grid + cv[k, 0, 0]
        state1 = cv[k, 1, 1] * n_grid + cv[k, 1, 0]
        state_traj.append(np.array([state0, state1], dtype=int))

    msm = pyemma.msm.bayesian_markov_model(
        state_traj,
        lag=1,
        dt_traj="0.01 ns",
        reversible=False,
    )
    print("fraction of states used = {:.2f}".format(msm.active_state_fraction))
    print("fraction of counts used = {:.2f}".format(msm.active_count_fraction))
    return trj_concat, msm

In [None]:
n_grid = 20
trj_concat, msm = make_msm(cv_coordinates_clean, n_grid)

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(10, 8), sharex=True, sharey=True)
pyemma.plots.plot_free_energy(
    *trj_concat.T,
    weights=np.concatenate(msm.trajectory_weights()),
    ax=axes,
    nbins=50,
    legacy=False,
    cmap="RdYlBu_r",
    # ncontours=45,
    kt=1,
    # vmax=10,
    cbar_label="free energy (kcal/mol)"
)
axes.grid(b=None)
axes.set_xlabel("SF (nm)")
axes.set_ylabel("IG (nm)")
axes.set_title("Reweighted free energy surface", fontweight="bold")
fig.tight_layout()

To save the plot.

In [None]:
fig.savefig(f"./free_energy2.svg", transparent=True)

## InfleCS

In [None]:
%pwd

In [None]:
sys.path.append("/home/sperez/Projects/InfleCS")
import free_energy_clustering as FEC

In [None]:
data_0 = cv_coordinates_clean[:, 0, :]
data_1 = cv_coordinates_clean[:, 1, :]
data = np.concatenate((data_0, data_1), axis=0)

In [None]:
fec = FEC.FreeEnergyClustering(
    data,
    min_n_components=1,
    max_n_components=20,
    temperature=290.0,
    n_iterations=5,
    n_grids=80,
    n_splits=1,
    stack_landscapes=False,
    data_weights=np.concatenate(msm.trajectory_weights()),
)

In [None]:
coords, FE_landscape, FE_points = fec.landscape()

In [None]:
start_iteration = 1
n_average = 1

n_strings = strings.shape[0]
fig, ax = plt.subplots(1, 1, figsize=(10, 7))
cmap = plt.cm.viridis_r
n_colors = (n_strings - start_iteration) // n_average + 1
colors = cmap(np.linspace(0, 1, n_colors))  # yellow to blue
norm = mpl.colors.Normalize(vmin=start_iteration, vmax=n_strings - 1)
ax.plot(
    reduced_string[0, 0, :],
    reduced_string[0, 1, :],
    ls=":",
    marker=".",
    label="string0",
    color="k",
)
av_0 = np.mean(reduced_string[start_iteration:, 0, :], axis=0)
std_0 = np.std(reduced_string[start_iteration:, 0, :], axis=0)
av_1 = np.mean(reduced_string[start_iteration:, 1, :], axis=0)
std_1 = np.std(reduced_string[start_iteration:, 1, :], axis=0)
ax.plot(
    av_0,
    av_1,
    ls="-",
    marker=".",
    color="k",
    # label=f"mean(string{start_iteration}-{n_strings})",
)

for jj, j in enumerate(range(start_iteration, n_strings, n_average)):
    av_0 = np.mean(reduced_string[j:, 0, :], axis=0)
    std_0 = np.std(reduced_string[j:, 0, :], axis=0)
    av_1 = np.mean(reduced_string[j:, 1, :], axis=0)
    std_1 = np.std(reduced_string[j:, 1, :], axis=0)
    ax.errorbar(
        av_0, av_1, fmt="--", xerr=std_0, yerr=std_1, color=colors[jj], alpha=0.9
    )


ax.set_ylabel(
    reduced_string_labels[1],
    size=18,
    labelpad=16,
)
ax.set_xlabel(
    reduced_string_labels[0],
    size=18,
    labelpad=16,
)

ax.xaxis.set_minor_locator(MultipleLocator(0.1))
ax.xaxis.set_major_locator(MultipleLocator(0.1))
ax.yaxis.set_minor_locator(MultipleLocator(0.1))
ax.yaxis.set_major_locator(MultipleLocator(0.1))
ax.grid(which="minor")
ax.tick_params(axis="y", labelsize=14)
ax.tick_params(axis="x", labelsize=11)
ax.legend()
fec.visualize(
    savefig=False,
    show_data=False,
    # vmax=3.,
    n_contour_levels=25,
    ylabel="IG (nm)",
    xlabel="SF (nm)",
    filename="free_energy_landscape",
    title="Free energy landscape",
    ax=ax,
)
# cbar = colorbar(ax, cmap, norm, "iteration number", 20)

In [None]:
fec.visualize(
    savefig=False,
    show_data=False,
    # vmax=3.,
    n_contour_levels=100,
    ylabel="IG (nm)",
    xlabel="SF (nm)",
    filename="free_energy_landscape",
    title="Free energy landscape",
)