In [None]:
import matplotlib.pyplot as plt
import MDAnalysis as mda
import numpy as np
from matplotlib.markers import MarkerStyle as markerstyle
import math
from scipy.interpolate import griddata
import pandas
from scipy import constants
import matplotlib.patches as patches
import matplotlib.cm as cm

In [None]:
#Energies_File
ener_file = "../3D_Martini3/SASA_COMPLETE.dat"
dist_file = "../3D_Martini3/Final_data_COMPLETE.txt"

ener_data = pandas.read_csv(ener_file,comment='#',sep='\s+',dtype=np.float64,names=['Timestep','SASA'])   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file


#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)

#Merge two dataframes
master_data3D = dist_data.join(ener_data.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')

In [None]:
#Energies_File
ener_file = "../2D_Martini3/SASA_COMPLETE.dat"
dist_file = "../2D_Martini3/Filtered_data_RMSD.dat"

ener_data = pandas.read_csv(ener_file,comment='#',sep='\s+',dtype=np.float64,names=['Timestep','SASA'])   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file


#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)

#Merge two dataframes
master_data2D = dist_data.join(ener_data.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')

#Remove instances where protein came off membrane 
timesteps_array = master_data2D['Timestep']/1e6

#Timepoints to ignore 
 #33.0 - 66.0 us 
#95.2 - 97.8 us
#101.95 to 103.3 us 
#127.6 to 128.5 us

mask1 = (timesteps_array > 33.0) & (timesteps_array < 66.0)
mask2 = (timesteps_array > 95.2) & (timesteps_array < 97.8)
mask3 = (timesteps_array > 101.95) & (timesteps_array < 103.3)
mask4 = (timesteps_array > 127.6) & (timesteps_array < 128.5)
mask5 = (timesteps_array > 139) & (timesteps_array < 142.5)


mask = mask1 | mask2 | mask3 | mask4 | mask5

master_data2D = master_data2D[~mask]

In [None]:
print(master_data2D['SASA'])

In [None]:
#Energies_File
ener_file = "../Pseudo_Mem/SASA_COMPLETE.dat"
dist_file = "../Pseudo_Mem/Final_data_102us.txt"

ener_data = pandas.read_csv(ener_file,comment='#',sep='\s+',dtype=np.float64,names=['Timestep','SASA'])   #Reweighted Energy file
dist_data = pandas.read_csv(dist_file,comment='#',sep='\t',dtype=np.float64)   #Distance file
dist_data['Timestep'] = 200*round(dist_data['Timestep']/200)
#Remove duplicates 
dist_data = dist_data.drop_duplicates(subset=['Timestep'],ignore_index=True)
ener_data = ener_data.drop_duplicates(subset=['Timestep'],ignore_index=True)

# print("Energy Data: ", ener_data['Timestep'])

# print("Final_data: ", dist_data['Timestep'])

# # t_values = {'Timestep': dist_data['Timestep']}.values()
# mask1 = ener_data['Timestep'].isin(dist_data['Timestep'].values)
# print(mask1)
# ener_data = ener_data[~mask1]
# print(ener_data['Timestep'])



#Merge two dataframes
master_data_pseudo = dist_data.join(ener_data.set_index('Timestep'),on='Timestep',how='inner',lsuffix = '_mda',rsuffix = '_plumed')


In [None]:
def plot_vs_cutoff(plot_data,ener_col,cutoffs,ax,colors=None):
    n_configs=[]
    prev_cutoff = 0
    for cutoff in cutoffs:
        
        mask = (plot_data['RMSD-B'] < cutoff) & (plot_data['RMSD-B'] >= prev_cutoff)
        data = plot_data[mask]
        energy = data[[ener_col]].sum(axis=1)
        if colors is not None:
            ax.hist(energy,bins=50,alpha=0.8,weights=np.ones(len(energy)),label=f'{cutoff} nm',density=True,color=colors[cutoff])
        else:
            ax.hist(energy,bins=50,alpha=0.8,weights=np.ones(len(energy)),label=f'{cutoff} nm',density=True)

        n_configs.append(len(energy))
        prev_cutoff = cutoff

    return n_configs

def plot_vs_cutoffMem(plot_data,ener_col,cutoffs,ax,colors=None):
    n_configs=[]
    prev_cutoff = 0.0
    for cutoff in cutoffs:
        mask = (plot_data['RMSD-BtoA'] < cutoff) & (plot_data['RMSD-BtoA'] >= prev_cutoff)
        data = plot_data[mask]
        energy = data[[ener_col]].sum(axis=1)
        if colors is not None:
            ax.hist(energy,bins=50,alpha=0.8,weights=np.ones(len(energy)),label=f'{cutoff} nm',density=True,color=colors[cutoff])
        else:
            ax.hist(energy,bins=50,alpha=0.8,weights=np.ones(len(energy)),label=f'{cutoff} nm',density=True)

        n_configs.append(len(energy))
        prev_cutoff = cutoff

    return n_configs

        

        


In [None]:
col_names = 'SASA'

fig,ax=plt.subplots(3,1,figsize=(6,8),sharex=True)

t_size= 18
lw = 3
f_size= 16

nconfigs_3D = plot_vs_cutoff(master_data3D,col_names,cutoffs=[0.2,0.4,0.6,0.8,1.0,1.4,1.8,2.0],ax=ax[0])

nconfigs_2D = plot_vs_cutoffMem(master_data2D,col_names,cutoffs=[0.2,0.4,0.6,0.8,1.0,1.4,1.8,2.0],ax=ax[1])

nconfigs_pseudo = plot_vs_cutoff(master_data_pseudo,col_names,cutoffs=[0.2,0.4,0.6,0.8,1.0,1.4,1.8,2.0],ax=ax[2])

for ax_ in ax.flat:
    # ax_.set_xlabel('Energy (kJ/mol)',fontsize=f_size)
    # ax_.set_ylabel('Frequency',fontsize=f_size)
    
    ax_.tick_params(axis='both',labelsize=t_size)
    ax_.ticklabel_format(axis='x',style='sci',scilimits=(-3,3))
    ax_.set_ylabel("PDF",fontsize=f_size)
    
ax[2].set_xlim(left=250,right=350)
ax[2].set_xlabel(r'$SASA (nm^2)$',fontsize=f_size)
fig.tight_layout()

In [None]:
bound_mask = (master_data3D['d1'] < 13.0) & (master_data3D['d2'] < 13.0)
# bound_mask = (master_data['RMSD-BtoA'] < 2.0)
unbound_mask = ((master_data3D['d1'] > 13.0) | (master_data3D['d2'] > 13.0)) & ((master_data3D['d1'] < 21.0) & (master_data3D['d2'] < 21.0))

bd_data3D = master_data3D[bound_mask]
unb_data3D = master_data3D[unbound_mask]

bound_mask = (master_data2D['d1'] < 13.0) & (master_data2D['d2'] < 13.0)
# bound_mask = (master_data['RMSD-BtoA'] < 2.0)
unbound_mask = ((master_data2D['d1'] > 13.0) | (master_data2D['d2'] > 13.0)) & ((master_data2D['d1'] < 19.0) & (master_data2D['d2'] < 19.0))

bd_data2D = master_data2D[bound_mask]
unb_data2D = master_data2D[unbound_mask]

bound_mask = (master_data_pseudo['d1'] < 12.0) & (master_data_pseudo['d2'] < 12.0)
# bound_mask = (master_data_pseudo['RMSD-B'] < 2.0)
unbound_mask = ((master_data_pseudo['d1'] > 12.0) | (master_data_pseudo['d2'] > 12.0)) & ((master_data_pseudo['d1'] < 19.0) & (master_data_pseudo['d2'] < 19.0))

bd_data = master_data_pseudo[bound_mask]
unb_data = master_data_pseudo[unbound_mask]






In [None]:
fig,ax=plt.subplots()

ax.hist(bd_data3D['SASA'],bins=50,alpha=0.8,label='Bound',density=True,color='navy')
ax.hist(unb_data3D['SASA'],bins=50,alpha=0.8,label='Unbound',density=True,color='grey')

ax.set_xlabel(r'$SASA (nm^2)$',fontsize=f_size)
ax.set_ylabel("PDF",fontsize=f_size)
ax.tick_params(axis='both',labelsize=t_size)
ax.legend(fontsize=f_size)
ax.set_xlim(left=250,right=350)
fig.tight_layout()


In [None]:
fig,ax=plt.subplots()


ax.hist(bd_data2D['SASA'],bins=50,alpha=0.8,label='Bound',density=True,color='firebrick')
ax.hist(unb_data2D['SASA'],bins=50,alpha=0.8,label='Unbound',density=True,color='grey')

ax.set_xlabel(r'$SASA (nm^2)$',fontsize=f_size)
ax.set_ylabel("PDF",fontsize=f_size)
ax.tick_params(axis='both',labelsize=t_size)
ax.legend(fontsize=f_size)
ax.set_xlim(left=250,right=350)
fig.tight_layout()


In [None]:
fig,ax=plt.subplots()


ax.hist(bd_data['SASA'],bins=50,alpha=0.8,label='Bound',density=True,color='teal')
ax.hist(unb_data['SASA'],bins=50,alpha=0.8,label='Unbound',density=True,color='grey')

ax.set_xlabel(r'$SASA (nm^2)$',fontsize=f_size)
ax.set_ylabel("PDF",fontsize=f_size)
ax.tick_params(axis='both',labelsize=t_size)
ax.legend(fontsize=f_size)
ax.set_xlim(left=250,right=350)
fig.tight_layout()
