In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.markers import MarkerStyle as markerstyle
import math
from scipy.interpolate import griddata
import pandas
from scipy import constants
import matplotlib.patches as patches
import matplotlib.cm as cm

In [None]:
"""
------- Defining Constants ------------
"""
k = constants.value('Boltzmann constant')
Ava_no = constants.value('Avogadro constant')
temp = 310
kbt = (k*temp*Ava_no)

## Read 3D Solution data

In [None]:
#Energies_File
ener_file = "ADD PATH TO REWEIGHTED ENERGIES FILE HERE"  #Reweighted Energy file For e.g. - "Reweighted_Energies.dat"
dist_file = "ADD PATH TO MASTER DATA FILE"  # Distance file For e.g. - "Final_data.txt"

ener_data = pandas.read_csv(ener_file,comment='#',sep='\t',dtype=np.float64)   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file


#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)

#Merge two dataframes
master_data3D = dist_data.join(ener_data.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')

## Read 2D Membrane data

In [None]:
#Energies_File
ener_file = "ADD PATH TO REWEIGHTED ENERGIES FILE HERE"  #Reweighted Energy file For e.g. - "Reweighted_Energies.dat"
dist_file = "ADD PATH TO MASTER DATA FILE"  # Distance file For e.g. - "Final_data.txt"

ener_data = pandas.read_csv(ener_file,comment='#',sep='\t',dtype=np.float64)   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file


#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)

#Merge two dataframes
master_data2D = dist_data.join(ener_data.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')

#Remove instances where protein came off membrane 
timesteps_array = master_data2D['Timestep']/1e6

#Timepoints to ignore 
 #33.0 - 66.0 us 
#95.2 - 97.8 us
#101.95 to 103.3 us 
#127.6 to 128.5 us

mask1 = (timesteps_array > 33.0) & (timesteps_array < 66.0)
mask2 = (timesteps_array > 95.2) & (timesteps_array < 97.8)
mask3 = (timesteps_array > 101.95) & (timesteps_array < 103.3)
mask4 = (timesteps_array > 127.6) & (timesteps_array < 128.5)
mask5 = (timesteps_array > 139) & (timesteps_array < 142.5)


mask = mask1 | mask2 | mask3 | mask4 | mask5

master_data2D = master_data2D[~mask]

In [None]:
print(ener_data.columns)


## Read Pseudo Membrane data

In [None]:
#Energies_File
ener_file = "ADD PATH TO REWEIGHTED ENERGIES FILE HERE"  #Reweighted Energy file For e.g. - "Reweighted_Energies.dat"
dist_file = "ADD PATH TO MASTER DATA FILE"  # Distance file For e.g. - "Final_data.txt"

ener_data = pandas.read_csv(ener_file,comment='#',sep='\s+',dtype=np.float64)   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file
dist_data['Timestep'] = 200*round(dist_data['Timestep']/200)
#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)

# print("Energy Data: ", ener_data['Timestep'])

# print("Final_data: ", dist_data['Timestep'])

# # t_values = {'Timestep': dist_data['Timestep']}.values()
# mask1 = ener_data['Timestep'].isin(dist_data['Timestep'].values)
# print(mask1)
# ener_data = ener_data[~mask1]
# print(ener_data['Timestep'])



#Merge two dataframes
master_data_pseudo = dist_data.join(ener_data.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')


In [None]:
#Solution Terms
all_colms = ['Bond', 'HP', 'G96', 'PDih', 'ImPDih', 'LJ', 'Col',
       'Pot', 'Col-Pro-Pro', 'LJ-Pro-Pro', 'Col-Pro-W', 'LJ-Pro-W', 'Col-W-W',
       'LJ-W-W']
#Membrane Terms
mem_colms = ['Col-Pro-Mem', 'LJ-Pro-Mem','Col-Mem-Mem', 'LJ-Mem-Mem', 'Col-Mem-W',
       'LJ-Mem-W']
#Pseudo Terms
# ps_colms = ['Bond', 'HP', 'G96', 'PDih', 'ImPDih', 'LJ', 'Col',
#        'Pot', 'Col-Pro-Pro', 'LJ-Pro-Pro', 'Col-Pro-W', 'LJ-Pro-W', 'Col-W-W',
#        'LJ-W-W']



In [None]:
sorted_3D = master_data3D.sort_values(by='RMSD-B',ascending=True)
sorted_2D = master_data2D.sort_values(by='RMSD-BtoA',ascending=True)
sorted_pseudo = master_data_pseudo.sort_values(by='RMSD-B',ascending=True)

nbins=50

hist3D = np.histogram(sorted_3D['RMSD-B'],bins=nbins)
hist2D = np.histogram(sorted_2D['RMSD-BtoA'],bins=nbins)
hist_pseudo = np.histogram(sorted_pseudo['RMSD-B'],bins=nbins)

rmsd_bins3D = hist3D[1]
rmsd_bins2D = hist2D[1]
rmsd_bins_pseudo = hist_pseudo[1]



energies_3D= {}
energies_3D_rwt = {}
energies_2D= {}
energies_2D_rwt = {}
energies_pseudo= {}
energies_pseudo_rwt = {}

energies_3D_std = {}
energies_2D_std = {}
energies_pseudo_std = {}
# col_names = 'Col-Pro-Pro','LJ-Pro-Pro','Col-Pro-W','LJ-Pro-W','LJ-W-W','Col-W-W'

for i in range(nbins):
    mask3D = (sorted_3D['RMSD-B'] > rmsd_bins3D[i]) & (sorted_3D['RMSD-B'] < rmsd_bins3D[i+1])
    mask2D = (sorted_2D['RMSD-BtoA'] > rmsd_bins2D[i]) & (sorted_2D['RMSD-BtoA'] < rmsd_bins2D[i+1])
    mask_pseudo = (sorted_pseudo['RMSD-B'] > rmsd_bins_pseudo[i]) & (sorted_pseudo['RMSD-B'] < rmsd_bins_pseudo[i+1])
    for col in all_colms:
        if col not in energies_3D:
            energies_3D[col] = []
            energies_3D_rwt[col] = []
            energies_2D[col] = []
            energies_2D_rwt[col] = []
            energies_pseudo[col] = []
            energies_pseudo_rwt[col] = []

            energies_3D_std[col] = []
            energies_2D_std[col] = []
            energies_pseudo_std[col] = []

        energies_3D[col].append(np.average(sorted_3D[mask3D][col]))
        energies_3D_rwt[col].append(np.average(sorted_3D[mask3D][col],weights=np.exp(sorted_3D[mask3D]['rbias']/kbt)))
        num = len(sorted_3D[mask3D][col])
        # print(num)
        energies_3D_std[col].append(np.std(sorted_3D[mask3D][col])/math.sqrt(num))

        energies_2D[col].append(np.average(sorted_2D[mask2D][col]))
        energies_2D_rwt[col].append(np.average(sorted_2D[mask2D][col],weights=np.exp(sorted_2D[mask2D]['rbias']/kbt)))
        
        num = len(sorted_2D[mask2D][col])
        # print(num)
        energies_2D_std[col].append(np.std(sorted_2D[mask2D][col])/math.sqrt(num))
        try:
            energies_pseudo[col].append(np.average(sorted_pseudo[mask_pseudo][col]))
            energies_pseudo_rwt[col].append(np.average(sorted_pseudo[mask_pseudo][col],weights=np.exp(sorted_pseudo[mask_pseudo]['rbias']/kbt)))
            num = len(sorted_pseudo[mask_pseudo][col])
            energies_pseudo_std[col].append(np.std(sorted_pseudo[mask_pseudo][col])/math.sqrt(num))
        except:
            print("Error for column: ", col)
            print(np.average(sorted_pseudo[mask_pseudo][col]))
            print(sorted_pseudo[mask_pseudo]['rbias'])
            # energies_pseudo[col].append(np.nan)
            # energies_pseudo_rwt[col].append(np.nan)


    for col in mem_colms:
        if col not in energies_2D:
            energies_2D[col] = []
            energies_2D_rwt[col] = []
            energies_2D_std[col] = []

        energies_2D[col].append(np.average(sorted_2D[mask2D][col]))
        energies_2D_rwt[col].append(np.average(sorted_2D[mask2D][col],weights=np.exp(sorted_2D[mask2D]['rbias']/kbt)))
        
        num = len(sorted_2D[mask2D][col])
        # print(num)
        energies_2D_std[col].append(np.std(sorted_2D[mask2D][col])/math.sqrt(num))



In [None]:
fig,ax=plt.subplots(2,1,figsize=(6,8),sharex=True)

t_size= 24
lw = 3
f_size= 25
col_names=['LJ-Pro-Pro','LJ-Pro-W']
# col_names=['Col-Pro-Pro','Col-Pro-W']
for i in range(len(col_names)):
    row_idx = i//2
    col_idx = i%2

    unb_mask = (rmsd_bins3D[:-1] > 12.0) & (rmsd_bins3D[:-1] < 20.0)
    zero_point = np.mean(np.array(energies_3D_rwt[col_names[i]])[unb_mask])
    energies_3D_rwt[col_names[i]] = np.array(energies_3D_rwt[col_names[i]]) - zero_point

    unb_mask = (rmsd_bins2D[:-1] > 12.0) & (rmsd_bins2D[:-1] < 20.0)
    zero_point = np.mean(np.array(energies_2D_rwt[col_names[i]])[unb_mask])
    energies_2D_rwt[col_names[i]] = np.array(energies_2D_rwt[col_names[i]]) - zero_point

    unb_mask = (rmsd_bins_pseudo[:-1] > 12.0) & (rmsd_bins_pseudo[:-1] < 20.0)
    zero_point = np.mean(np.array(energies_pseudo_rwt[col_names[i]])[unb_mask])
    energies_pseudo_rwt[col_names[i]] = np.array(energies_pseudo_rwt[col_names[i]]) - zero_point

    # low_lim = np.min([np.min(energies_3D[col_names[i]][rmsd_bins3D[:-1]<19.0]),np.min(energies_2D[col_names[i]][rmsd_bins2D[:-1]<19.0]),np.min(energies_pseudo[col_names[i]][rmsd_bins_pseudo[:-1]<19.0])])
    # upp_lim = np.max([np.max(energies_3D[col_names[i]][rmsd_bins3D[:-1]<19.0]),np.max(energies_2D[col_names[i]][rmsd_bins2D[:-1]<19.0]),np.max(energies_pseudo[col_names[i]][rmsd_bins_pseudo[:-1]<19.0])])

    
    ax[i].plot(rmsd_bins3D[:-1],energies_3D_rwt[col_names[i]],label='3D',color='navy',lw=lw,alpha=0.8)
    ax[i].plot(rmsd_bins2D[:-1],energies_2D_rwt[col_names[i]],label='2D',color='firebrick',lw=lw,alpha=0.8)
    ax[i].plot(rmsd_bins_pseudo[:-1],energies_pseudo_rwt[col_names[i]],label='2D-Ps',color='teal',lw=lw,alpha=0.8)

    lw1= 1.0
    al1=0.6
    ax[i].errorbar(rmsd_bins3D[:-1],energies_3D_rwt[col_names[i]],yerr=energies_3D_std[col_names[i]],color='navy',lw=lw1,alpha=al1,capsize=2)
    ax[i].errorbar(rmsd_bins2D[:-1],energies_2D_rwt[col_names[i]],yerr=energies_2D_std[col_names[i]],color='firebrick',lw=lw1,alpha=al1,capsize=2)
    ax[i].errorbar(rmsd_bins_pseudo[:-1],energies_pseudo_rwt[col_names[i]],yerr=energies_pseudo_std[col_names[i]],color='teal',lw=lw1,alpha=al1,capsize=2)

    
    # ax[i].vlines(2.0,low_lim,upp_lim,color='black',linestyle='--',lw=3)

    # ax[row_idx,col_idx].set_title(col_names[i],fontsize=f_size+4)
    # ax[row_idx,col_idx].set_ylim([low_lim-0.01*low_lim,upp_lim+0.01*upp_lim])

ax[i].legend(fontsize=f_size,frameon=False,loc='upper right',labelspacing=0.1,borderaxespad=0.1)

#For LJ interactions
ax[0].vlines(2.0,-1800,200,color='black',linestyle='--',lw=3)
ax[1].vlines(2.0,-400,3000,color='black',linestyle='--',lw=3)
ax[0].set_ylim([-1800,200])
ax[1].set_ylim([-400,3000])
ax[0].set_yticks([-1800,-900,0])
ax[1].set_yticks([3000,1500,0])

#For Col interactions
# ax[0].vlines(2.0,-100,20,color='black',linestyle='--',lw=3)
# ax[1].vlines(2.0,-10,120,color='black',linestyle='--',lw=3)
# ax[0].set_ylim([-100,20])
# ax[1].set_ylim([-10,120])
# ax[0].set_yticks([-100,-50,0])
# ax[1].set_yticks([3000,1500,0])
for ax_ in ax.flat:
    # ax_.set_xlabel('RMSD (nm)',fontsize=f_size)
    # ax_.set_ylabel('Energy (kJ/mol)',fontsize=f_size)

    ax_.set_xlim([0,19])
    
    ax_.tick_params(axis='both',labelsize=t_size)
    
fig.tight_layout()
# plt.savefig('Figure_4.svg',format='svg',dpi=600,transparent=True)
# plt.savefig('Supp_ColInteractions.svg',format='svg',dpi=600,transparent=True)


In [None]:
fig,ax=plt.subplots(1,2,figsize=(6,3),sharey=True)

t_size= 24
lw = 3
f_size= 25
# col_names=['LJ-Pro-Pro','LJ-Pro-W']
col_names=['Col-Pro-Mem','LJ-Pro-Mem']
for i in range(len(col_names)):
    row_idx = i//2
    col_idx = i%2

    

    unb_mask = (rmsd_bins2D[:-1] > 12.0) & (rmsd_bins2D[:-1] < 20.0)
    zero_point = np.mean(np.array(energies_2D_rwt[col_names[i]])[unb_mask])
    energies_2D_rwt[col_names[i]] = np.array(energies_2D_rwt[col_names[i]]) - zero_point

   

    # low_lim = np.min([np.min(energies_3D[col_names[i]][rmsd_bins3D[:-1]<19.0]),np.min(energies_2D[col_names[i]][rmsd_bins2D[:-1]<19.0]),np.min(energies_pseudo[col_names[i]][rmsd_bins_pseudo[:-1]<19.0])])
    # upp_lim = np.max([np.max(energies_3D[col_names[i]][rmsd_bins3D[:-1]<19.0]),np.max(energies_2D[col_names[i]][rmsd_bins2D[:-1]<19.0]),np.max(energies_pseudo[col_names[i]][rmsd_bins_pseudo[:-1]<19.0])])

    
    
    ax[i].plot(rmsd_bins2D[:-1],energies_2D_rwt[col_names[i]],label='2D',color='firebrick',lw=lw,alpha=0.8)


    lw1= 1.0
    al1=0.6
    
    ax[i].errorbar(rmsd_bins2D[:-1],energies_2D_rwt[col_names[i]],yerr=energies_2D_std[col_names[i]],color='firebrick',lw=lw1,alpha=al1,capsize=2)


    
    # ax[i].vlines(2.0,low_lim,upp_lim,color='black',linestyle='--',lw=3)

    # ax[row_idx,col_idx].set_title(col_names[i],fontsize=f_size+4)
    # ax[row_idx,col_idx].set_ylim([low_lim-0.01*low_lim,upp_lim+0.01*upp_lim])

# ax[i].legend(fontsize=f_size,frameon=False,loc='upper right',labelspacing=0.1,borderaxespad=0.1)

#For LJ interactions
# ax[0].vlines(2.0,-1800,200,color='black',linestyle='--',lw=3)
# ax[1].vlines(2.0,-400,3000,color='black',linestyle='--',lw=3)
# ax[0].set_ylim([-1800,200])
# ax[1].set_ylim([-400,3000])
# ax[0].set_yticks([-1800,-900,0])
# ax[1].set_yticks([3000,1500,0])

# #For Col interactions
ax[0].vlines(2.0,-30,50,color='black',linestyle='--',lw=3)
ax[1].vlines(2.0,-30,50,color='black',linestyle='--',lw=3)
ax[0].set_ylim([-30,50])
# ax[1].set_ylim([-10,120])
ax[0].set_yticks([-25,0,25,50])
# ax[1].set_yticks([3000,1500,0])
for ax_ in ax.flat:
    # ax_.set_xlabel('RMSD (nm)',fontsize=f_size)
    # ax_.set_ylabel('Energy (kJ/mol)',fontsize=f_size)

    ax_.set_xlim([0,19])
    
    ax_.tick_params(axis='both',labelsize=t_size)
    
fig.tight_layout()
# plt.savefig('Figure_4.svg',format='svg',dpi=600,transparent=True)
plt.savefig('Supp_MemInteractions.svg',format='svg',dpi=600,transparent=True)


In [None]:
fig,ax=plt.subplots(1,2,figsize=(8,4),sharey=True)

t_size= 24
lw = 1.5
f_size= 25
# col_names= ['Bond', 'LJ', 'Col','Pot'] #, 'Col-Pro-Pro' , 'Col-Pro-W', 'Col-W-W','LJ-W-W']
fig_indx = [0,1,0,1,0,1]
col_names= mem_colms
mk_style= ['o','o','d','d','s','s']
ls= ['solid','solid','dotted','dotted','dashdot','dashdot']
# ['Col-Pro-Mem', 'LJ-Pro-Mem','Col-Mem-Mem', 'LJ-Mem-Mem', 'Col-Mem-W',
#        'LJ-Mem-W']
col_names=['Col-Pro-Mem', 'LJ-Pro-Mem']
colors = ['firebrick','firebrick','orchid','orchid', 'steelblue','steelblue']
for i in range(len(col_names)):
    row_idx = fig_indx[i]//2
    col_idx = fig_indx[i]%2

    

    unb_mask = (rmsd_bins2D[:-1] > 12.0) & (rmsd_bins2D[:-1] < 20.0)
    zero_point = np.mean(np.array(energies_2D_rwt[col_names[i]])[unb_mask])
    energies_2D_rwt[col_names[i]] = np.array(energies_2D_rwt[col_names[i]]) - zero_point
   
    ax[fig_indx[i]].plot(rmsd_bins2D[:-1],energies_2D_rwt[col_names[i]],label=col_names[i],color=colors[0],lw=lw,alpha=0.8,marker=mk_style[i],linestyle=ls[i])
   
    lw1= 1.0
    al1=0.6
    ax[fig_indx[i]].errorbar(rmsd_bins2D[:-1],energies_2D_rwt[col_names[i]],yerr=energies_2D_std[col_names[i]],color=colors[0],lw=lw1,alpha=al1,capsize=2)

    # ax.set_title(col_names[i],fontsize=f_size+4)
   

ax[0].legend(fontsize=f_size-5,frameon=False,loc='lower right',labelspacing=0.1,borderaxespad=0.1)
ax[1].legend(fontsize=f_size-5,frameon=False,loc='lower right',labelspacing=0.1,borderaxespad=0.1)

# ax[0,0].set_ylim(bottom=-50,top=20)
# ax[0,1].set_ylim(bottom=-1000,top=500)
# ax[1,0].set_ylim(bottom=-50,top=20)
# ax[1,1].set_ylim(bottom=-1000,top=500)
for ax_ in ax:
    ax_.set_ylim(bottom=-300,top=100)
    ax_.set_xlim([0,19])
        
    ax_.tick_params(axis='both',labelsize=t_size)
    
fig.tight_layout()
# plt.savefig('Figure_4.svg',format='svg',dpi=600,transparent=True)

In [None]:

print("Minimum energy points: ")
print("3D: ", np.min(energies_3D_rwt['Col-Pro-Pro']),np.min(energies_3D_rwt['LJ-Pro-Pro']),np.min(energies_3D_rwt['Col-Pro-W']),np.min(energies_3D_rwt['LJ-Pro-W']))
print("2D: ", np.min(energies_2D_rwt['Col-Pro-Pro']),np.min(energies_2D_rwt['LJ-Pro-Pro']),np.min(energies_2D_rwt['Col-Pro-W']),np.min(energies_2D_rwt['LJ-Pro-W']))
print("Pseudo: ", np.min(energies_pseudo_rwt['Col-Pro-Pro']),np.min(energies_pseudo_rwt['LJ-Pro-Pro']),np.min(energies_pseudo_rwt['Col-Pro-W']),np.min(energies_pseudo_rwt['LJ-Pro-W']))


In [None]:
import seaborn as sns
def plot_boxplot(all_data_bd,cols,ax,legbool=False):
    plot_data = {'State': [],'Environment': [],'Energy': []}
    for env,data in all_data_bd.items():
        # values= np.exp(data[cols]/kbt)
        values = data[cols]
        state_lbl = data['State']

        env_lbl = [env]*len(values)

        plot_data['State'].extend(state_lbl)
        plot_data['Environment'].extend(env_lbl)
        plot_data['Energy'].extend(values)


        
    sns.boxplot(data=plot_data,ax=ax,x='State',y='Energy',hue='Environment',palette='Set2',legend=legbool)


def plot_distplot(all_data, cols, ax, leg=True, leg_size=14,rwt=True):
    # Create the KDE plot
    #Dont plot UNbound date
    all_data = all_data[all_data['State'] != 'Unbound']
    if rwt:
        ca = sns.kdeplot(data=all_data, ax=ax, x=cols, hue='RMSD', palette='dark', fill=True, common_norm=False, legend=leg,weights=np.exp(all_data['rbias']/kbt))
    else:
        ca = sns.kdeplot(data=all_data, ax=ax, x=cols, hue='RMSD', palette='dark', fill=True, common_norm=False, legend=leg)
    
    if leg:
        # Access the legend and customize
        legend = ca.get_legend()
        legend.set_loc('upper left')
        legend.set_ncols(4)
        for text in legend.get_texts():
            text.set_fontsize(leg_size)  # Set font size to desired value

        legend.get_title().set_fontsize(leg_size)

        
    return ca

In [None]:


#Categorize data into Bound vs Unbound state
#Add new column to data frame called 'State'
master_data3D['State'] = 'Bound'
# bd_mask = (master_data3D['d1_mda'] < 13.0 ) & (master_data3D['d2_mda'] < 13.0)
unb_mask = (master_data3D['d1_mda'] >= 13.0 ) | (master_data3D['d2_mda'] >= 13.0)
master_data3D.loc[unb_mask,'State'] = 'Unbound'

master_data2D['State'] = 'Bound'
unb_mask = (master_data2D['d1_mda'] >= 13.0 ) | (master_data2D['d2_mda'] >= 13.0)
master_data2D.loc[unb_mask,'State'] = 'Unbound'

master_data_pseudo['State'] = 'Bound'
# bd_mask = (master_data_pseudo['d1_mda'] < 13.0 ) & (master_data_pseudo['d2_mda'] < 13.0)
unb_mask = (master_data_pseudo['d1_mda'] >= 13.0 ) | (master_data_pseudo['d2_mda'] >= 13.0)
master_data_pseudo.loc[unb_mask,'State'] = 'Unbound'



## Use RMSD for categorization

In [None]:
#Categorize data into Bound vs Unbound state
#Add new column to data frame called 'State'
master_data3D['RMSD'] = 'Unbound'
rmsd_cutoffs = [0.6,2.0]
avg_energies3D = []
avg_energies2D = []
avg_energies_pseudo = []

col_names = ['LJ-Pro-Pro','LJ-Pro-W','Col-Pro-Pro','Col-Pro-W']
# bd_mask = (master_data3D['d1_mda'] < 13.0 ) & (master_data3D['d2_mda'] < 13.0)
prev_cutoff = 0.0

rwt_bool = True


"""
3D
"""
for cutoff in rmsd_cutoffs:
    unb_mask = (master_data3D['RMSD-B'] > prev_cutoff) & (master_data3D['RMSD-B'] < cutoff)
    master_data3D.loc[unb_mask,'RMSD'] = f'{cutoff} nm'
    prev_cutoff = cutoff

    energies = {}
    for col in col_names:
        if rwt_bool:
            energies[col] = np.average(master_data3D[unb_mask][col],weights=np.exp(master_data3D[unb_mask]['rbias']/kbt))
        else:
            energies[col] = np.average(master_data3D[unb_mask][col])
    avg_energies3D.append(energies)

unb_mask = (master_data3D['RMSD']=='Unbound') & (master_data3D['d1_mda']<21.0 ) & (master_data3D['d2_mda']<21.0) & (master_data3D['RMSD-B'] < 18.0)
energies = {}
for col in col_names:
    if rwt_bool:
        energies[col] = np.average(master_data3D[unb_mask][col],weights=np.exp(master_data3D[unb_mask]['rbias']/kbt))
    else:
        energies[col] = np.average(master_data3D[unb_mask][col])
avg_energies3D.append(energies)

"""
2D
"""
master_data2D['RMSD'] = 'Unbound'
prev_cutoff=0
for cutoff in rmsd_cutoffs:
    unb_mask = (master_data2D['RMSD-BtoA'] > prev_cutoff) & (master_data2D['RMSD-BtoA'] < cutoff)
    master_data2D.loc[unb_mask,'RMSD'] = f'{cutoff} nm'
    prev_cutoff = cutoff

    energies = {}
    for col in col_names:
        energies[col] = np.average(master_data2D[unb_mask][col],weights=np.exp(master_data2D[unb_mask]['rbias']/kbt))

    avg_energies2D.append(energies)

unb_mask = (master_data2D['RMSD']=='Unbound') & (master_data2D['d1_mda']<19.0 ) & (master_data2D['d2_mda']<19.0) & (master_data2D['RMSD-BtoA'] < 18.0)
energies = {}
for col in col_names:
    if rwt_bool:
        energies[col] = np.average(master_data2D[unb_mask][col],weights=np.exp(master_data2D[unb_mask]['rbias']))
    else:
        energies[col] = np.average(master_data2D[unb_mask][col])
avg_energies2D.append(energies)

"""
Pseudo
"""
master_data_pseudo['RMSD'] = 'Unbound'
prev_cutoff=0
for cutoff in rmsd_cutoffs:
    unb_mask = (master_data_pseudo['RMSD-B'] > prev_cutoff) & (master_data_pseudo['RMSD-B'] < cutoff)
    master_data_pseudo.loc[unb_mask,'RMSD'] = f'{cutoff} nm'
    prev_cutoff = cutoff

    energies = {}
    for col in col_names:
        energies[col] = np.average(master_data_pseudo[unb_mask][col],weights=np.exp(master_data_pseudo[unb_mask]['rbias']))

    avg_energies_pseudo.append(energies)

unb_mask = (master_data_pseudo['RMSD']=='Unbound') & (master_data_pseudo['d1_mda']<19.0 ) & (master_data_pseudo['d2_mda']<19.0) & (master_data_pseudo['RMSD-B'] < 18.0)
energies = {}
for col in col_names:
    if rwt_bool:
        energies[col] = np.average(master_data_pseudo[unb_mask][col],weights=np.exp(master_data_pseudo[unb_mask]['rbias']))
    else:
        energies[col] = np.average(master_data_pseudo[unb_mask][col])
avg_energies_pseudo.append(energies)


## Calculated Averages


In [None]:
print(avg_energies3D)

In [None]:
## Histogram plot 

fig,ax=plt.subplots(1,2,figsize=(8,4),sharey=True)
all_data = master_data3D[master_data3D['RMSD'] == '0.6 nm']
n1_count=len(all_data)

#Get total Count for Normalization
# total_count = len(master_data3D)
total_count = 1    # Not normalized

cl1 = 'darkorchid'
cl2 = 'darkgoldenrod'
freq_lj_pro,bins_lj_pro = np.histogram(all_data['LJ-Pro-Pro'],bins=50)
freq_lj_w, bins_lj_w = np.histogram(all_data['LJ-Pro-W'],bins=50)

#Get max values
max_count_01 = np.argmax(freq_lj_pro)
max_count_02 = np.argmax(freq_lj_w)

ljpro_energy_0p6 = bins_lj_pro[max_count_01]
ljw_energy_0p6 = bins_lj_w[max_count_02]

print("Max Values for 0.6 nm: ")
print(bins_lj_pro[max_count_01],bins_lj_w[max_count_02])

ax[0].plot(bins_lj_pro[:-1],freq_lj_pro/total_count,alpha=1.0,label='0.6 nm',color=cl1, lw=3.5)
ax[1].plot(bins_lj_w[:-1],freq_lj_w/total_count,alpha=1.0,label='LJ-Pro-W',color=cl1, lw=3.5)

# ax[0].vlines(bins_lj_pro[max_count_01],0,800,color='black',linestyle='--',lw=3)
# ax[1].vlines(bins_lj_w[max_count_02],0,800,color='black',linestyle='--',lw=3)

# ax[0].hist(all_data['LJ-Pro-Pro'],bins=50,alpha=0.5,color='navy',label='0.6 nm')
# ax[1].hist(all_data['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='navy')

all_data02 = master_data3D[master_data3D['RMSD'] == '2.0 nm']

freq_lj_pro_02, bins_lj_pro_02 = np.histogram(all_data02['LJ-Pro-Pro'],bins=30)
freq_lj_w_02, bins_lj_w_02 = np.histogram(all_data02['LJ-Pro-W'],bins=30)

#Get max values
max_count_01 = np.argmax(freq_lj_pro_02)
max_count_02 = np.argmax(freq_lj_w_02)

ljpro_energy_2p0 = bins_lj_pro_02[max_count_01]
ljw_energy_2p0 = bins_lj_w_02[max_count_02]

print("Max Values for 2.0 nm: ")
print(bins_lj_pro[max_count_01],bins_lj_w[max_count_02])

n2_count=len(all_data02)    

ax[0].plot(bins_lj_pro_02[:-1],freq_lj_pro_02/total_count,alpha=1,label='2.0 nm',color=cl2, lw=3)
ax[1].plot(bins_lj_w_02[:-1],freq_lj_w_02/total_count,alpha=1,label='LJ-Pro-W',color=cl2, lw=3)

# ax[0].vlines(bins_lj_pro_02[max_count_01],0,800,color='black',linestyle='--',lw=3)
# ax[1].vlines(bins_lj_w_02[max_count_02],0,800,color='black',linestyle='--',lw=3)

# h1 = ax[0].hist(all_data02['LJ-Pro-Pro'],bins=50,alpha=0.5,color='crimson',label='2.0 nm')
# ax[1].hist(all_data02['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='crimson');

ax[0].legend(fontsize=f_size-2,frameon=False,loc='upper left')
ax[0].set_xlim([-13000,-10500])
ax[0].set_xticks([-12500,-11500])
ax[1].set_xlim([-18000,-12500])
ax[1].set_xticks([-17000,-15000])
for ax_ in ax:
    # ax_.set_xlabel('Energy (kJ/mol)',fontsize=f_size)
    
    ax_.tick_params(axis='both',labelsize=t_size)

# ax[0].set_ylabel('Frequency',fontsize=f_size)
fig.tight_layout()
plt.savefig('3D_RMSD_PotEnergy.svg',format='svg',dpi=600,transparent=True)

print(n1_count,n2_count,total_count)



In [None]:
bound_mask = (master_data3D['d1_mda'] < 13.0) & (master_data3D['d2_mda'] < 13.0)
total_bound = np.sum(bound_mask)
print("Fraction of configs in bound state: ", n1_count/total_bound)
print(len(master_data3D))
print("Total Bound Configs: ", total_bound)

In [None]:
#Get Timestep for the corresponding max counts
data_0p6 = master_data3D[master_data3D['RMSD'] == '0.6 nm']

data_2p0 = master_data3D[master_data3D['RMSD'] == '2.0 nm']

mask1 = data_0p6['LJ-Pro-Pro']


In [None]:
## Histogram plot 

fig,ax=plt.subplots(1,2,figsize=(8,4),sharey=True)
all_data = master_data2D[master_data2D['RMSD'] == '0.6 nm']
n1_count=len(all_data)

#Normalization
# total_count = len(master_data2D)
total_count = 1

freq_lj_pro,bins_lj_pro = np.histogram(all_data['LJ-Pro-Pro'],bins=50)
freq_lj_w, bins_lj_w = np.histogram(all_data['LJ-Pro-W'],bins=50)

ax[0].plot(bins_lj_pro[:-1],freq_lj_pro/total_count,alpha=1.0,label='0.6 nm',color=cl1, lw=3.5)
ax[1].plot(bins_lj_w[:-1],freq_lj_w/total_count,alpha=1.0,label='LJ-Pro-W',color=cl1, lw=3.5)

# ax[0].hist(all_data['LJ-Pro-Pro'],bins=50,alpha=0.5,color='navy',label='0.6 nm')
# ax[1].hist(all_data['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='navy')

all_data02 = master_data2D[master_data2D['RMSD'] == '2.0 nm']

freq_lj_pro_02, bins_lj_pro_02 = np.histogram(all_data02['LJ-Pro-Pro'],bins=30)
freq_lj_w_02, bins_lj_w_02 = np.histogram(all_data02['LJ-Pro-W'],bins=30)


n2_count=len(all_data02)    

ax[0].plot(bins_lj_pro_02[:-1],freq_lj_pro_02/total_count,alpha=1,label='2.0 nm',color=cl2, lw=3.5)
ax[1].plot(bins_lj_w_02[:-1],freq_lj_w_02/total_count,alpha=1,label='LJ-Pro-W',color=cl2, lw=3.5)

# h1 = ax[0].hist(all_data02['LJ-Pro-Pro'],bins=50,alpha=0.5,color='crimson',label='2.0 nm')
# ax[1].hist(all_data02['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='crimson');

# ax[0].legend()
ax[0].set_xlim([-13000,-10500])
ax[0].set_xticks([-12500,-11500])
ax[1].set_xlim([-18000,-12500])
ax[1].set_xticks([-17000,-15000])
ax[0].set_yticks([0,500,1000,1500])
for ax_ in ax:
    # ax_.set_xlabel('Energy (kJ/mol)',fontsize=f_size)
    
    ax_.tick_params(axis='both',labelsize=t_size)

# ax[0].set_ylabel('Frequency',fontsize=f_size)
fig.tight_layout()
plt.savefig('2D_RMSD_PotEnergy.svg',format='svg',dpi=600,transparent=True)

print(n1_count,n2_count,total_count)

In [None]:
bound_mask = (master_data2D['d1_mda'] < 13.0) & (master_data2D['d2_mda'] < 13.0)

print(np.sum(bound_mask))
print(len(master_data2D))

In [None]:
## Histogram plot 

fig,ax=plt.subplots(1,2,figsize=(8,4),sharey=True)
all_data = master_data_pseudo[master_data_pseudo['RMSD'] == '0.6 nm']
n1_count=len(all_data)

#Normalization
# total_count = len(master_data_pseudo)
total_count = 1

freq_lj_pro,bins_lj_pro = np.histogram(all_data['LJ-Pro-Pro'],bins=50)
freq_lj_w, bins_lj_w = np.histogram(all_data['LJ-Pro-W'],bins=50)

ax[0].plot(bins_lj_pro[:-1],freq_lj_pro/total_count,alpha=1.0,label='0.6 nm',color=cl1, lw=3.5)
ax[1].plot(bins_lj_w[:-1],freq_lj_w/total_count,alpha=1.0,label='LJ-Pro-W',color=cl1, lw=3.5)

# ax[0].hist(all_data['LJ-Pro-Pro'],bins=50,alpha=0.5,color='navy',label='0.6 nm')
# ax[1].hist(all_data['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='navy')

all_data02 = master_data_pseudo[master_data_pseudo['RMSD'] == '2.0 nm']

freq_lj_pro_02, bins_lj_pro_02 = np.histogram(all_data02['LJ-Pro-Pro'],bins=30)
freq_lj_w_02, bins_lj_w_02 = np.histogram(all_data02['LJ-Pro-W'],bins=30)


n2_count=len(all_data02)    

ax[0].plot(bins_lj_pro_02[:-1],freq_lj_pro_02/total_count,alpha=1,label='2.0 nm',color=cl2, lw=3.5)
ax[1].plot(bins_lj_w_02[:-1],freq_lj_w_02/total_count,alpha=1,label='LJ-Pro-W',color=cl2, lw=3.5)

# h1 = ax[0].hist(all_data02['LJ-Pro-Pro'],bins=50,alpha=0.5,color='crimson',label='2.0 nm')
# ax[1].hist(all_data02['LJ-Pro-W'],bins=50,alpha=0.5,label='LJ-Pro-W',color='crimson');

# ax[0].legend()
ax[0].set_xlim([-13000,-10500])
ax[0].set_xticks([-12500,-11500])
ax[1].set_xlim([-18000,-13000])
ax[1].set_xticks([-17000,-15000])
for ax_ in ax:
    ax_.set_xlabel('Energy (kJ/mol)',fontsize=f_size)
    
    ax_.tick_params(axis='both',labelsize=t_size)

# ax[0].set_ylabel('Frequency',fontsize=f_size)
print(n1_count,n2_count,total_count)
fig.tight_layout()
plt.savefig('PS_RMSD_PotEnergy.svg',format='svg',dpi=600,transparent=True)
