In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.markers import MarkerStyle as markerstyle
import math
from scipy.interpolate import griddata
import pandas
from scipy import constants
import matplotlib.patches as patches
import matplotlib.cm as cm

In [None]:
"""
------- Defining Constants ------------
"""
k = constants.value('Boltzmann constant')
Ava_no = constants.value('Avogadro constant')
temp = 310
kbt = (k*temp*Ava_no)

In [None]:
#Energies_File
ener_file = "ADD PATH TO REWEIGHTED ENERGIES FILE HERE"  #Reweighted Energy file For e.g. - "Reweighted_Energies.dat"
dist_file = "ADD PATH TO MASTER DATA FILE"  # Distance file For e.g. - "Final_data.txt"

# ener_col = ['Timestep','Bond','G96','Imp','LJ','Col','Pot','Volume','Col-Pro-Pro','LJ-Pro-Pro','Col-Pro-W','LJ-Pro-W','Col-W-W','LJ-W-W']
ener_data = pandas.read_csv(ener_file,comment='#',sep='\s+',dtype=np.float64)   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file


#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)



# #Merge two dataframes
master_data3D = dist_data.join(ener_data.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')

In [None]:
#Energies_File
ener_file = "ADD PATH TO REWEIGHTED ENERGIES FILE HERE"  #Reweighted Energy file For e.g. - "Reweighted_Energies.dat"
dist_file = "ADD PATH TO MASTER DATA FILE"  # Distance file For e.g. - "Final_data.txt"

# ener_col = ['Timestep','Bond','G96','ImPDih','LJ','Col','Pot','Col-Mem-Mem','LJ-Mem-Mem','Col-Mem-W','LJ-Mem-W','Col-Pro-Mem','LJ-Pro-Mem','Col-W-W','LJ-W-W','Col-Pro-W','LJ-Pro-W','Col-Pro-Pro','LJ-Pro-Pro']

ener_data = pandas.read_csv(ener_file,comment='#',sep='\s+',dtype=np.float64)   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file


#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)

print("Total configs: ",len(dist_data))
mask1 = dist_data['Timestep']%1000 == 0
dist_data02 = dist_data[mask1]
dist_data02.reset_index(drop=True,inplace=True)
mask1 = ener_data['Timestep']<= 220000000
ener_data02 = ener_data[mask1]
ener_data02.reset_index(drop=True,inplace=True)

# #Merge two dataframes
master_data2D = dist_data02.join(ener_data02.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')

In [None]:
print(len(master_data2D))

In [None]:
#Energies_File
ener_file = "ADD PATH TO REWEIGHTED ENERGIES FILE HERE"  #Reweighted Energy file For e.g. - "Reweighted_Energies.dat"
dist_file = "ADD PATH TO MASTER DATA FILE"  # Distance file For e.g. - "Final_data.txt"

# ener_col = ['Timestep','Bond','G96','ImPDih','LJ','Col','Pot','Col-Mem-Mem','LJ-Mem-Mem','Col-Mem-W','LJ-Mem-W','Col-Pro-Mem','LJ-Pro-Mem','Col-W-W','LJ-W-W','Col-Pro-W','LJ-Pro-W','Col-Pro-Pro','LJ-Pro-Pro']

ener_data = pandas.read_csv(ener_file,comment='#',sep='\s+',dtype=np.float64)   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file


#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)

mask1 = dist_data['Timestep']%1000 == 0
dist_data02 = dist_data[mask1]
dist_data02.reset_index(drop=True,inplace=True)
mask1 = ener_data['Timestep']<= 89448400.0
ener_data02 = ener_data[mask1]
ener_data02.reset_index(drop=True,inplace=True)

# #Merge two dataframes
master_data_pseudo = dist_data02.join(ener_data02.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')

In [None]:
print(master_data_pseudo)

In [None]:


#Categorize data into Bound vs Unbound state
#Add new column to data frame called 'State'
master_data3D['State'] = 'Bound'
# bd_mask = (master_data3D['d1_mda'] < 13.0 ) & (master_data3D['d2_mda'] < 13.0)
unb_mask = (master_data3D['d1_mda'] >= 13.0 ) | (master_data3D['d2_mda'] >= 13.0)
master_data3D.loc[unb_mask,'State'] = 'Unbound'

master_data2D['State'] = 'Bound'
unb_mask = (master_data2D['d1_mda'] >= 13.0 ) | (master_data2D['d2_mda'] >= 13.0)
master_data2D.loc[unb_mask,'State'] = 'Unbound'

master_data_pseudo['State'] = 'Bound'
# bd_mask = (master_data_pseudo['d1_mda'] < 13.0 ) & (master_data_pseudo['d2_mda'] < 13.0)
unb_mask = (master_data_pseudo['d1_mda'] >= 13.0 ) | (master_data_pseudo['d2_mda'] >= 13.0)
master_data_pseudo.loc[unb_mask,'State'] = 'Unbound'



In [None]:
#Categorize data into Bound vs Unbound state
#Add new column to data frame called 'State'
master_data3D['RMSD'] = 'Unbound'
rmsd_cutoffs = [1.0,2.0]
avg_energies3D = []
avg_energies2D = []
avg_energies_pseudo = []

col_names = ['LJ-Pro-Pro','LJ-Pro-W','Col-Pro-Pro','Col-Pro-W']
# bd_mask = (master_data3D['d1_mda'] < 13.0 ) & (master_data3D['d2_mda'] < 13.0)
prev_cutoff = 0.0

rwt_bool = True


"""
3D
"""
for cutoff in rmsd_cutoffs:
    unb_mask = (master_data3D['RMSD-BtoA'] > prev_cutoff) & (master_data3D['RMSD-BtoA'] <= cutoff)
    master_data3D.loc[unb_mask,'RMSD'] = f'{cutoff} nm'
    prev_cutoff = cutoff

# unb_mask = (master_data3D['RMSD']=='Unbound') & (master_data3D['d1_mda']<21.0 ) & (master_data3D['d2_mda']<21.0) & (master_data3D['RMSD-BtoA'] < 18.0)

"""
2D
"""
master_data2D['RMSD'] = 'Unbound'
prev_cutoff=0
for cutoff in rmsd_cutoffs:
    unb_mask = (master_data2D['RMSD-B'] > prev_cutoff) & (master_data2D['RMSD-B'] <= cutoff)
    master_data2D.loc[unb_mask,'RMSD'] = f'{cutoff} nm'
    prev_cutoff = cutoff


# unb_mask = (master_data2D['RMSD']=='Unbound') & (master_data2D['d1_mda']<19.0 ) & (master_data2D['d2_mda']<19.0) & (master_data2D['RMSD-B'] < 18.0)

"""
Pseudo
"""
master_data_pseudo['RMSD'] = 'Unbound'
prev_cutoff=0
for cutoff in rmsd_cutoffs:
    unb_mask = (master_data_pseudo['RMSD-BtoA'] > prev_cutoff) & (master_data_pseudo['RMSD-BtoA'] <= cutoff)
    master_data_pseudo.loc[unb_mask,'RMSD'] = f'{cutoff} nm'
    prev_cutoff = cutoff



In [None]:
## Histogram plot 
t_size= 18
lw = 3
f_size= 20
leg_size=12
fig,ax=plt.subplots(1,2,figsize=(8,4),sharey=True)
all_data = master_data3D[master_data3D['RMSD'] == '1.0 nm']
n1_count=len(all_data)

#Get total Count for Normalization
# total_count = len(master_data3D)
total_count = 1    # Not normalized

cl1 = 'darkorchid'
cl2 = 'darkgoldenrod'
freq_lj_pro,bins_lj_pro = np.histogram(all_data['LJ-Pro-Pro'],bins=50)
freq_lj_w, bins_lj_w = np.histogram(all_data['LJ-Pro-W'],bins=50)

#Get max values
max_count_01 = np.argmax(freq_lj_pro)
max_count_02 = np.argmax(freq_lj_w)

ljpro_energy_0p6 = bins_lj_pro[max_count_01]
ljw_energy_0p6 = bins_lj_w[max_count_02]

print("Max Values for 1.0 nm: ")
print(bins_lj_pro[max_count_01],bins_lj_w[max_count_02])

ax[0].plot(bins_lj_pro[:-1],freq_lj_pro/total_count,alpha=1.0,label='1.0 nm',color=cl1, lw=3.5)
ax[1].plot(bins_lj_w[:-1],freq_lj_w/total_count,alpha=1.0,label='LJ-Pro-W',color=cl1, lw=3.5)

# ax[0].vlines(bins_lj_pro[max_count_01],0,800,color='black',linestyle='--',lw=3)
# ax[1].vlines(bins_lj_w[max_count_02],0,800,color='black',linestyle='--',lw=3)

# ax[0].hist(all_data['LJ-Pro-Pro'],bins=50,alpha=0.5,color='navy',label='0.6 nm')
# ax[1].hist(all_data['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='navy')

all_data02 = master_data3D[master_data3D['RMSD'] == '2.0 nm']

freq_lj_pro_02, bins_lj_pro_02 = np.histogram(all_data02['LJ-Pro-Pro'],bins=30)
freq_lj_w_02, bins_lj_w_02 = np.histogram(all_data02['LJ-Pro-W'],bins=30)

#Get max values
max_count_01 = np.argmax(freq_lj_pro_02)
max_count_02 = np.argmax(freq_lj_w_02)

ljpro_energy_2p0 = bins_lj_pro_02[max_count_01]
ljw_energy_2p0 = bins_lj_w_02[max_count_02]

print("Max Values for 2.0 nm: ")
print(bins_lj_pro[max_count_01],bins_lj_w[max_count_02])

n2_count=len(all_data02)    

ax[0].plot(bins_lj_pro_02[:-1],freq_lj_pro_02/total_count,alpha=1,label='2.0 nm',color=cl2, lw=3)
ax[1].plot(bins_lj_w_02[:-1],freq_lj_w_02/total_count,alpha=1,label='LJ-Pro-W',color=cl2, lw=3)

# ax[0].vlines(bins_lj_pro_02[max_count_01],0,800,color='black',linestyle='--',lw=3)
# ax[1].vlines(bins_lj_w_02[max_count_02],0,800,color='black',linestyle='--',lw=3)

# h1 = ax[0].hist(all_data02['LJ-Pro-Pro'],bins=50,alpha=0.5,color='crimson',label='2.0 nm')
# ax[1].hist(all_data02['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='crimson');

ax[0].legend(fontsize=f_size-2,frameon=False,loc='upper right',borderaxespad=1.5)
ax[0].set_xlim([-16000,-13500])
ax[0].set_xticks([-15500,-14000])
ax[1].set_xlim([-16000,-10500])
ax[1].set_xticks([-15000,-12500])
for ax_ in ax:
    # ax_.set_xlabel('Energy (kJ/mol)',fontsize=f_size)
    
    ax_.tick_params(axis='both',labelsize=t_size)

# ax[0].set_ylabel('Frequency',fontsize=f_size)
fig.tight_layout()
# plt.savefig('3D_RMSD_PotEnergyMart2.svg',format='svg',dpi=600,transparent=True)

print(n1_count,n2_count,total_count)



In [None]:
bound_mask = (master_data3D['d1_mda'] < 13.0) & (master_data3D['d2_mda'] < 13.0)
total_bound = np.sum(bound_mask)
print("Fraction of configs in bound state: ", n1_count/total_bound)
print(len(master_data3D))
print("Total Bound Configs: ", total_bound)


In [None]:
"""
Plot only RMSD configs
"""

fig,ax=plt.subplots()
all_data = master_data3D[master_data3D['RMSD'] == '1.0 nm']
ax.hist(all_data['RMSD-BtoA'], bins=50,alpha=0.8,color='darkorchid',label='1.0 nm')
all_data02 = master_data3D[master_data3D['RMSD'] == '2.0 nm']
ax.hist(all_data02['RMSD-BtoA'], bins=50,alpha=0.8,color='darkgoldenrod',label='2.0 nm')

ax.set_xlabel('dRMSD (nm)',fontsize=f_size)
ax.set_ylabel('Frequency',fontsize=f_size)
ax.tick_params(axis='both',labelsize=t_size)
ax.legend(fontsize=f_size-2,frameon=False,loc='upper left')
fig.tight_layout()



In [None]:
## Histogram plot 

fig,ax=plt.subplots(1,2,figsize=(8,4),sharey=True)
all_data = master_data2D[master_data2D['RMSD'] == '1.0 nm']
n1_count=len(all_data)

#Normalization
# total_count = len(master_data2D)
total_count = 1

freq_lj_pro,bins_lj_pro = np.histogram(all_data['LJ-Pro-Pro'],bins=50)
freq_lj_w, bins_lj_w = np.histogram(all_data['LJ-Pro-W'],bins=50)

ax[0].plot(bins_lj_pro[:-1],freq_lj_pro/total_count,alpha=1.0,label='1.0 nm',color=cl1, lw=3.5)
ax[1].plot(bins_lj_w[:-1],freq_lj_w/total_count,alpha=1.0,label='LJ-Pro-W',color=cl1, lw=3.5)

# ax[0].hist(all_data['LJ-Pro-Pro'],bins=50,alpha=0.5,color='navy',label='0.6 nm')
# ax[1].hist(all_data['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='navy')

all_data02 = master_data2D[master_data2D['RMSD'] == '2.0 nm']

freq_lj_pro_02, bins_lj_pro_02 = np.histogram(all_data02['LJ-Pro-Pro'],bins=30)
freq_lj_w_02, bins_lj_w_02 = np.histogram(all_data02['LJ-Pro-W'],bins=30)


n2_count=len(all_data02)    

ax[0].plot(bins_lj_pro_02[:-1],freq_lj_pro_02/total_count,alpha=1,label='2.0 nm',color=cl2, lw=3.5)
ax[1].plot(bins_lj_w_02[:-1],freq_lj_w_02/total_count,alpha=1,label='LJ-Pro-W',color=cl2, lw=3.5)

# h1 = ax[0].hist(all_data02['LJ-Pro-Pro'],bins=50,alpha=0.5,color='crimson',label='2.0 nm')
# ax[1].hist(all_data02['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='crimson');

# ax[0].legend()
ax[0].set_xlim([-16000,-13500])
ax[0].set_xticks([-15500,-14000])
ax[1].set_xlim([-16000,-10500])
ax[1].set_xticks([-15000,-12500])

for ax_ in ax:
    # ax_.set_xlabel('Energy (kJ/mol)',fontsize=f_size)
    
    ax_.tick_params(axis='both',labelsize=t_size)

# ax[0].set_ylabel('Frequency',fontsize=f_size)
fig.tight_layout()
# plt.savefig('2D_RMSD_PotEnergy_Mart2.svg',format='svg',dpi=600,transparent=True)

print(n1_count,n2_count,total_count)

In [None]:
bound_mask = (master_data2D['d1_mda'] < 13.0) & (master_data2D['d2_mda'] < 13.0)
total_bound = np.sum(bound_mask)
print(np.sum(bound_mask))
print(len(master_data2D))
print(master_data2D['Timestep'].to_numpy()[-1])
print("Fraction of configs in bound state: ", n1_count/total_bound)

In [None]:
"""
Plot only RMSD configs
"""

fig,ax=plt.subplots()
all_data = master_data2D[master_data2D['RMSD'] == '1.0 nm']
ax.hist(all_data['RMSD-B'], bins=50,alpha=0.8,color='darkorchid',label='1.0 nm')
all_data02 = master_data2D[master_data2D['RMSD'] == '2.0 nm']
ax.hist(all_data02['RMSD-B'], bins=50,alpha=0.8,color='darkgoldenrod',label='2.0 nm')

ax.set_xlabel('dRMSD (nm)',fontsize=f_size)
ax.set_ylabel('Frequency',fontsize=f_size)
ax.tick_params(axis='both',labelsize=t_size)
ax.legend(fontsize=f_size-2,frameon=False,loc='upper left')
fig.tight_layout()


In [None]:
## Histogram plot 

fig,ax=plt.subplots(1,2,figsize=(8,4),sharey=True)
all_data = master_data_pseudo[master_data_pseudo['RMSD'] == '1.0 nm']
n1_count=len(all_data)

#Normalization
# total_count = len(master_data2D)
total_count = 1

freq_lj_pro,bins_lj_pro = np.histogram(all_data['LJ-Pro-Pro'],bins=50)
freq_lj_w, bins_lj_w = np.histogram(all_data['LJ-Pro-W'],bins=50)

ax[0].plot(bins_lj_pro[:-1],freq_lj_pro/total_count,alpha=1.0,label='1.0 nm',color=cl1, lw=3.5)
ax[1].plot(bins_lj_w[:-1],freq_lj_w/total_count,alpha=1.0,label='LJ-Pro-W',color=cl1, lw=3.5)

# ax[0].hist(all_data['LJ-Pro-Pro'],bins=50,alpha=0.5,color='navy',label='0.6 nm')
# ax[1].hist(all_data['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='navy')

all_data02 = master_data_pseudo[master_data_pseudo['RMSD'] == '2.0 nm']

freq_lj_pro_02, bins_lj_pro_02 = np.histogram(all_data02['LJ-Pro-Pro'],bins=30)
freq_lj_w_02, bins_lj_w_02 = np.histogram(all_data02['LJ-Pro-W'],bins=30)


n2_count=len(all_data02)    

ax[0].plot(bins_lj_pro_02[:-1],freq_lj_pro_02/total_count,alpha=1,label='2.0 nm',color=cl2, lw=3.5)
ax[1].plot(bins_lj_w_02[:-1],freq_lj_w_02/total_count,alpha=1,label='LJ-Pro-W',color=cl2, lw=3.5)

# h1 = ax[0].hist(all_data02['LJ-Pro-Pro'],bins=50,alpha=0.5,color='crimson',label='2.0 nm')
# ax[1].hist(all_data02['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='crimson');

# ax[0].legend()
ax[0].set_xlim([-16000,-13500])
ax[0].set_xticks([-15500,-14000])
ax[1].set_xlim([-16000,-10500])
ax[1].set_xticks([-15000,-12500])

for ax_ in ax:
    # ax_.set_xlabel('Energy (kJ/mol)',fontsize=f_size)
    
    ax_.tick_params(axis='both',labelsize=t_size)

# ax[0].set_ylabel('Frequency',fontsize=f_size)
fig.tight_layout()
# plt.savefig('2D_RMSD_PotEnergy_Mart2.svg',format='svg',dpi=600,transparent=True)

print(n1_count,n2_count,total_count)

In [None]:
"""
Plot only RMSD configs
"""

fig,ax=plt.subplots()
all_data = master_data_pseudo[master_data_pseudo['RMSD'] == '1.0 nm']
ax.hist(all_data['RMSD-BtoA'], bins=50,alpha=0.8,color='darkorchid',label='1.0 nm')
all_data02 = master_data_pseudo[master_data_pseudo['RMSD'] == '2.0 nm']
ax.hist(all_data02['RMSD-BtoA'], bins=50,alpha=0.8,color='darkgoldenrod',label='2.0 nm')

ax.set_xlabel('dRMSD (nm)',fontsize=f_size)
ax.set_ylabel('Frequency',fontsize=f_size)
ax.tick_params(axis='both',labelsize=t_size)
ax.legend(fontsize=f_size-2,frameon=False,loc='upper left')
fig.tight_layout()
