In [1]:
!pwd
!which python
!hostname

/media/ebeyerle/seagate/af2-dynamics/notebooks
/home/ebeyerle/micromamba/envs/bioemu-env/bin/python
hemingway


/home/ebeyerle/micromamba/envs/bioemu-env/bin/python
hemingway


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import mdtraj as md
#import MDAnalysis as mda
import deeptime
from scipy.interpolate import RegularGridInterpolator

In [3]:
def traj_from_xtc(XTC, TOP, chunk_size = 10000):
	lst = []
	for chunk in md.iterload(XTC, chunk = chunk_size, top = TOP):
		print(chunk)
		lst.append(chunk)

	for i, dummy in enumerate(lst):
		print(dummy)
		NFRS = dummy.n_frames
		NATOMS = dummy.n_atoms
		if i == 0:
			traj =  np.reshape(dummy.xyz, (NFRS, 3*NATOMS))
		else:
			traj = np.vstack([traj, np.reshape(dummy.xyz, (NFRS, 3*NATOMS))])

	return traj #, md.join(lst)

def xyz_from_xtc(XTC, TOP, chunk_size = 10000):
	lst = []
	for chunk in md.iterload(XTC, chunk = chunk_size, top = TOP):
		print(chunk)
		lst.append(chunk)

	for i, dummy in enumerate(lst):
		print(dummy)
		NFRS = dummy.n_frames
		NATOMS = dummy.n_atoms
		if i == 0:
			traj =  dummy.xyz
		else:
			traj = np.vstack([traj, dummy.xyz])

	return traj #, md.join(lst)

def traj_from_dcd(DCD, TOP, chunk_size = 10000):
	lst = []
	for chunk in md.iterload(DCD, chunk = chunk_size, top = TOP):
		print(chunk)
		lst.append(chunk)

	for i, dummy in enumerate(lst):
		print(dummy)
		NFRS = dummy.n_frames
		NATOMS = dummy.n_atoms
		if i == 0:
			traj =  np.reshape(dummy.xyz, (NFRS, 3*NATOMS))
		else:
			traj = np.vstack([traj, np.reshape(dummy.xyz, (NFRS, 3*NATOMS))])

	return traj


def traj_from_pdb(PDB, TOP, chunk_size = 10000):
	lst = []
	for chunk in md.iterload(PDB, chunk = chunk_size, top = TOP):
		print(chunk)
		lst.append(chunk)

	for i, dummy in enumerate(lst):
		print(dummy)
		NFRS = dummy.n_frames
		NATOMS = dummy.n_atoms
		if i == 0:
			traj =  np.reshape(dummy.xyz, (NFRS, 3*NATOMS))
		else:
			traj = np.vstack([traj, np.reshape(dummy.xyz, (NFRS, 3*NATOMS))])

	return traj

def ca_traj_from_xtc(XTC, TOP, chunk_size = 10000):
	lst = []
	for chunk in md.iterload(XTC, chunk = chunk_size, top = TOP):
		print(chunk)
		lst.append(chunk)

	for i, dummy in enumerate(lst):
		print(dummy)
		NFRS = dummy.n_frames
		NATOMS = dummy.n_atoms
		if i == 0:
			traj =  np.reshape(dummy.xyz, (NFRS, 3*NATOMS))
		else:
			traj = np.vstack([traj, np.reshape(dummy.xyz, (NFRS, 3*NATOMS))])

	return traj #, md.join(lst)

In [4]:
params = {'legend.fontsize': 25,
          'figure.figsize': (8, 6),
         'axes.labelsize': 30,
         'axes.titlesize':25,
         'xtick.labelsize':25,'ytick.labelsize':25,
         'axes.linewidth':4,
         'xtick.major.width':3,'ytick.major.width':3,
         'xtick.minor.width':1,'ytick.minor.width':1,
         'xtick.major.size':5,'ytick.major.size':5,
         'xtick.minor.size':3,'ytick.minor.size':3,
         'xtick.direction':'in','ytick.direction':'in'
         }
plt.rcParams.update(params)

In [5]:
model_list = []
mean_list = []
xbins_list = []
ybins_list = []
fes_list = []

In [6]:
tmp = md.load_xtc(f'../3ttp_8msa/aligned.xtc', top = f'../3ttp_CA.pdb').xyz

In [7]:
seqs = ['3ttp', '2pc0', '1q9p']
color_list = ['y', 'r', 'b']
marker_list = ['*', 's', 'o']
msa_list = [8, 16, 32, 64, 128, 256, 'full', 'af2-cluster', 'DiG', 'bioemu', 'MD']
suffix = ['_0eafb', '_5d034', '_87f3d']
ttp_fes_traj_list = []
pc0_fes_traj_list = []
q9p_fes_traj_list = []
traj_dict = {}
for i, seq in enumerate(seqs):
    traj_dict[seq] = {}
    if seq == '3ttp':
        ref = md.load_pdb('../sims/openmm/3ttp/CA.pdb')
    elif seq == '2pc0':
        ref = md.load_pdb('../sims/openmm/2pc0/CA.pdb')
    elif seq == '1q9p':
        ref = md.load_pdb('../sims/openmm/1q9p/CA.pdb')
    for counter, msa in enumerate(msa_list):
        # import AF2 samples
        if msa in [8, 16, 32, 64, 128, 256, 'full']:
            if msa == 8:
                traj_dict[seq]['AF2'] = {}
            traj = md.load_xtc(f'../{seq}_%smsa/aligned.xtc' % msa, top = f'../{seq}_CA.pdb')
            traj.superpose(ref)
            traj = traj.xyz
            traj = traj.reshape(traj.shape[0], traj.shape[1] * traj.shape[2])
            traj_dict[seq]['AF2'][msa] = traj.T
        elif msa == 'af2-cluster':
            traj = md.load_xtc(f'../AF_Cluster-main/{seq}%s/aligned.xtc' % suffix[i], top = f'../{seq}_CA.pdb')
            traj.superpose(ref)
            traj = traj.xyz
            traj = traj.reshape(traj.shape[0], traj.shape[1] * traj.shape[2])
            traj_dict[seq]['AFc'] = traj.T
        elif msa == 'DiG':
            traj = md.load_xtc(f'../microsoft-Graphormer-5e62370/distributional_graphormer/protein/{seq}_output/aligned.xtc', top = f'../{seq}_CA.pdb')
            traj.superpose(ref)
            traj = traj.xyz
            traj = traj.reshape(traj.shape[0], traj.shape[1] * traj.shape[2])
            traj_dict[seq]['DiG'] = traj.T
        elif msa == 'bioemu':
            traj = md.load_xtc(f'../bioemu/{seq}/aligned.xtc', top = f'../{seq}_CA.pdb')
            traj.superpose(ref)
            traj = traj.xyz
            traj = traj.reshape(traj.shape[0], traj.shape[1] * traj.shape[2])
            traj_dict[seq]['BioEMU'] = traj.T
        elif msa == 'MD':
            traj = md.load_xtc(f'../{seq}_%s/aligned.xtc' % msa, top = f'../{seq}_CA.pdb')
            traj.superpose(ref)
            traj = traj.xyz
            traj = traj.reshape(traj.shape[0], traj.shape[1] * traj.shape[2])
            traj_dict[seq]['MDsamples'] = traj.T

In [8]:
import pickle
with open('./generative_ML_ensembles_eric.pkl','wb') as f:
    pickle.dump(traj_dict,f)