In [None]:
import numpy as np
import scipy
import itertools
import  pathlib 

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
plt.interactive(True)
#%config InlineBackend.figure_format = 'pdf'
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

from matplotlib import gridspec

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'png')
plt.rcParams['savefig.dpi'] = 300

plt.rcParams['figure.autolayout'] = False

plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = "sans-serif"
plt.rcParams['font.serif'] = "cm"
plt.rcParams['text.latex.preamble'] = [
    r"\usepackage{subdepth}",
    r"\usepackage{type1cm}",
    r'\usepackage{tgheros}',    # helvetica font
    r'\usepackage{sansmath}',   # math-font matching  helvetica
    r'\sansmath'                # actually tell tex to use it!
#    r'\usepackage{siunitx}',    # micro symbols
#    r'\sisetup{detect-all}',    # force siunitx to use the fonts
]  

print(plt.rcParams['figure.figsize'])


def set_size(column=0):
    if (column==0):
        plt.rcParams['figure.figsize']   = [10,6]
        plt.rcParams['axes.labelsize']   = 18 
        plt.rcParams['axes.titlesize']   = 20 
        plt.rcParams['font.size']        = 16 
        plt.rcParams['lines.linewidth']  = 2.0 
        plt.rcParams['lines.markersize'] = 8 
        plt.rcParams['legend.fontsize']  = 14 
    elif (column==1):
        plt.rcParams['figure.figsize']   = [3.5,3.5*0.6]
        plt.rcParams['axes.labelsize']   = 10 
        plt.rcParams['axes.titlesize']   = 12 
        plt.rcParams['font.size']        = 10 
        plt.rcParams['lines.linewidth']  = 1.5 
        plt.rcParams['lines.markersize'] = 10 
        plt.rcParams['legend.fontsize']  = 10 
    elif (column==2):
        plt.rcParams['figure.figsize']   = [7.0,7.0*0.6]
        plt.rcParams['axes.labelsize']   = 10 
        plt.rcParams['axes.titlesize']   = 12 
        plt.rcParams['font.size']        = 10 
        plt.rcParams['lines.linewidth']  = 2.0 
        plt.rcParams['lines.markersize'] = 10 
        plt.rcParams['legend.fontsize']  = 10 
        
set_size(column=2)

In [None]:
dir='../data/d_tor1_tor2/'
N_windows = 1370

w_i = np.array([], dtype="int")
t   = np.array([])
q = {}
q["d"], q["tor1"], q["tor2"] = np.array([]), np.array([]), np.array([])

for window in range(0, N_windows):
    print(window, end=" ")
    try:
        c1, c2, c3, c4 = np.loadtxt(dir + str(window) + ".txt", unpack=True)
    except:
        continue

    w_i  = np.append(w_i, np.full(c1.shape, window))
    t    = np.append(t, c1)
    q["d"]    = np.append(q["d"]   , c2)
    q["tor1"] = np.append(q["tor1"], c3)
    q["tor2"] = np.append(q["tor2"], c4)

    

In [None]:
weights = np.load("../weights/weights.npy")

In [None]:
# reduce
q["d"]    = q["d"]   [::25] 
q["tor1"] = q["tor1"][::25] 
q["tor2"] = q["tor2"][::25]
w_i       = w_i      [::25]
t         = t        [::25]
weights   = weights  [::25]   

In [None]:
# plot them 
plt.figure()
plt.plot(q["d"])
plt.xlabel(r"sample index")
plt.ylabel(r"r / nm")

plt.figure()
plt.plot(q["tor1"])
plt.xlabel(r"sample index")
plt.ylabel(r"$\mathrm{\Theta}^0$ / $^\circ$")

plt.figure()
plt.plot(q["tor2"])
plt.xlabel(r"sample index")
plt.ylabel(r"$\mathrm{\Theta}^1$ / $^\circ$")


# Cluster

In [None]:
D={}

In [None]:
q_t = q["d"].reshape(-1, 1)
X = q_t - q_t.T
D["d"] = X / (X.max()-X.min())

In [None]:
mat = D["d"][::10, ::10]

plt.figure()
square=mat.shape[1]/mat.shape[0]
plt.imshow(mat, aspect=square, origin='upper',interpolation='none')

plt.gca().xaxis.set_ticks_position('bottom')
plt.ylabel(r"sample index")
plt.xlabel(r"sample index")

cbar=plt.colorbar(fraction=0.046, pad=0.04)
cbar.set_label (r"D")


In [None]:
q_t = q["tor1"].reshape(-1, 1)
X = np.abs(q_t - q_t.T)
# taking the periodicity into account
X[X> np.pi] -= 2*np.pi
X[X<-np.pi] += 2*np.pi
D["tor1"] = X / (2*np.pi)

In [None]:
mat = D["tor1"][::10, ::10]

plt.figure()
square=mat.shape[1]/mat.shape[0]
plt.imshow(mat, aspect=square, origin='upper',interpolation='none')

plt.gca().xaxis.set_ticks_position('bottom')
plt.ylabel(r"sample index")
plt.xlabel(r"sample index")

cbar=plt.colorbar(fraction=0.046, pad=0.04)
cbar.set_label (r"D")


In [None]:
q_t = q["tor2"].reshape(-1, 1)
X = np.abs(q_t - q_t.T)
# taking the periodicity into account
X[X> np.pi] -= 2*np.pi
X[X<-np.pi] += 2*np.pi
D["tor2"] = X / (2*np.pi)

In [None]:
mat = D["tor2"][::10, ::10]

plt.figure()
square=mat.shape[1]/mat.shape[0]
plt.imshow(mat, aspect=square, origin='upper',interpolation='none')

plt.gca().xaxis.set_ticks_position('bottom')
plt.ylabel(r"sample index")
plt.xlabel(r"sample index")

cbar=plt.colorbar(fraction=0.046, pad=0.04)
cbar.set_label (r"D")


In [None]:
D["total"] = (D["d"]**2.0 + D["tor1"]**2.0 + D["tor2"]**2.0)**0.5

In [None]:
mat = D["total"][::10, ::10]

plt.figure()
square=mat.shape[1]/mat.shape[0]
plt.imshow(mat, aspect=square, origin='upper',interpolation='none')

plt.gca().xaxis.set_ticks_position('bottom')
plt.ylabel(r"sample index")
plt.xlabel(r"sample index")

cbar=plt.colorbar(fraction=0.046, pad=0.04)
cbar.set_label (r"D")


In [None]:
plt.hist(D["total"].flatten())

In [None]:
from sklearn.cluster import AgglomerativeClustering

model = AgglomerativeClustering(n_clusters=None,
                                     distance_threshold=0.5, 
                                     affinity="precomputed", 
                                     linkage="complete")

clustering = model.fit(D["total"])

In [None]:
N_clusters = clustering.n_clusters_
print(N_clusters)

plt.plot(clustering.labels_)

In [None]:
from scipy.cluster.hierarchy import dendrogram

def plot_dendrogram(model, **kwargs):
    # Create linkage matrix and then plot the dendrogram

    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack([model.children_, model.distances_,
                                      counts]).astype(float)

    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, **kwargs)

In [None]:
plt.title('Hierarchical Clustering Dendrogram')
# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode='level', p=3)
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()

In [None]:
import sys, importlib
from densitytopmf import *
importlib.reload(sys.modules['densitytopmf'])
from densitytopmf import *

In [None]:
##### calculate the  metadaynamics weights
Temperature = 310
# Boltzmann constant in kJ/(mol.K)
kB = 0.0083144621
beta = 1.0/(kB*Temperature)

In [None]:
cluster_centers = {}

In [None]:
cluster_centers["d"]    = np.full([N_clusters, 2], np.nan)
cluster_centers["tor1"] = np.full([N_clusters, 2], np.nan)
cluster_centers["tor2"] = np.full([N_clusters, 2], np.nan)

G = np.full(N_clusters, np.nan)

for i in range(clustering.n_clusters_):
    mask = clustering.labels_==i
    #plt.plot(mask)
    cluster_centers["d"]   [i,:] = do_mean_std(q["d"]   [mask], weights[mask])
    cluster_centers["tor1"][i,:] = do_mean_std_angle(q["tor1"][mask], weights[mask])
    cluster_centers["tor2"][i,:] = do_mean_std_angle(q["tor2"][mask], weights[mask])
    G[i] = np.sum(weights[mask])

G = -np.log(G) / beta
G -= G.min()

In [None]:
indices = np.argsort(G)
print(indices)

In [None]:
for i in indices:
    print (
    i, G[i],
    cluster_centers["d"]   [i,:],
    cluster_centers["tor1"][i,0]*180/np.pi,
    cluster_centers["tor2"][i,0]*180/np.pi,
    )

In [None]:
names = np.around(cluster_centers["d"][:,0], decimals=2).astype(str)
print(names[indices])

names=np.arange(N_clusters).astype(int)
print(G[indices])

plt.bar(names, G[indices])
plt.xticks(rotation=90)

## Pair Distances($\mathrm{r}$, ij)

In [None]:
#load the rmin
dir = '../data/pairdist/' 

q["pairdist"]=np.full([t.shape[0], 1225], np.nan) 
for window in range(0, N_windows):
    try:
        c = np.loadtxt(dir + str(window) + ".dist.xvg", comments=["#","@"])
        c1 = c[:,0] 
        c2 = c[:,1:]
        print(window, end=" ")
    except:
        continue
    # here I match the windows and the times    
    mask = (w_i==window) * np.in1d(t,c1)
    q["pairdist"][mask,:] = c2[np.in1d(c1,t[mask])]         

In [None]:
# plot them 
plt.figure()

q_t = q["pairdist"].copy()
q_t [q_t==0] = np.nan

plt.matshow(q_t, aspect=q_t.shape[1]/q_t.shape[0])
plt.xlabel(r"pairdist")
plt.ylabel(r"sample index")

In [None]:
q1_t = q["pairdist"]
q2_t = clustering.labels_

mask = np.isfinite(q1_t[:,0])

i=0

A , bin_centers = do_average(y=q1_t[mask,i], x=q2_t[mask], weights=weights[mask], bins=np.arange(0,N_clusters+1,1))

for i in range(1,1225):
    y, x = do_average(y=q1_t[mask,i], x=q2_t[mask], weights=weights[mask], bins=np.arange(0,N_clusters+1,1))
    A = np.vstack([A, y])

In [None]:
print(bin_centers , N_clusters)
plt.figure()

#A[A<0.00001] = np.nan
minx, maxx, miny, maxy = bin_centers.min(), bin_centers.max(), .5, 1295.5 

plt.imshow(A, origin="lower", extent = (minx, maxx, miny, maxy), aspect = (maxx-minx) / (maxy-miny) , interpolation="none" )
cbar=plt.colorbar(fraction=0.046, pad=0.04)
cbar.set_label (r"Pairdist")

plt.xlabel(r"$\mathrm{r}$ / nm")
plt.ylabel(r"Residue No.")

In [None]:
B = A.reshape(35,35,-1)
print(B.shape, B.max(), B.min())

In [None]:
for i in indices:
    
    plt.figure()

    Z = B[:,:,i].copy()
    #Z[Z>2] = np.nan

    minx, maxx, miny, maxy = 0.5, 35.5, 0.5, 35.5 
    aspect= (maxx-minx) / (maxy-miny)
    plt.gca().set_aspect(aspect)

    plt.imshow(Z, origin="lower", extent = (minx, maxx, miny, maxy),
               aspect = (maxx-minx) / (maxy-miny),
               interpolation="gaussian", cmap="gist_stern")
    #_X, _Y = np.meshgrid(np.arange(1,36), np.arange(1,36))
    #plt.contourf(_X, _Y, Z.T, 5, alpha=1, cmap="jet_r");

    plt.gca().xaxis.set_ticks_position('bottom')
    
    cbar=plt.colorbar(fraction=0.046, pad=0.04)
    cbar.set_label (r"$\mathrm{d_{ij}}$ / nm")
    
    plt.clim(0.4,2)

    plt.xlabel(r"Residue No. (chain A)")
    plt.ylabel(r"Residue No. (chain B)")
    plt.title(r"$\mathrm{\Delta G} =$ "   + '{0:.2f}'.format(G[i])                                   + " $\mathrm{kJ/mol}$, " +
               "$\mathrm{cluster} =$ "    + '{0:.0f}'.format(np.arange(0,N_clusters+1,1)[i])         + ", "                   +
               "$\mathrm{r} =$ "          + '{0:.2f}'.format(cluster_centers["d"][i,0])              + " $\mathrm{nm}$, "     +
               "$\mathrm{{\Theta}_1} =$ " + '{0:.2f}'.format(cluster_centers["tor1"][i,0]*180/np.pi) + "$^\circ$, "           +
               "$\mathrm{{\Theta}_2} =$ " + '{0:.2f}'.format(cluster_centers["tor2"][i,0]*180/np.pi) + "$^\circ$"
             )