In [1]:
import numpy as np
import mdtraj as md
import matplotlib.pyplot as plt
import LE4PD3N
import subprocess
import time
import os
import numpy.random as random

In [2]:
def traj_from_xtc(XTC, TOP, chunk_size = 5000):
	import mdtraj as md
	import numpy as np
	lst = []
	for chunk in md.iterload(XTC, chunk = chunk_size, top = TOP):
		print(chunk)
		lst.append(chunk)

	for i, dummy in enumerate(lst):
		print(dummy)
		NFRS = dummy.n_frames
		NATOMS = dummy.n_atoms
		if i == 0:
			traj =  np.reshape(dummy.xyz, (NFRS, 3*NATOMS))
		else:
			traj = np.vstack([traj, np.reshape(dummy.xyz, (NFRS, 3*NATOMS))])

	return traj.T

In [3]:
def tau_convert(eigvals, avfr, T = 300, bar = None, HA = False):
    #kT in J
    avfr = avfr*1e-9
    kT = 1.38e-23*T
    #Convert to kg *nm^2 / ps^2
    sigma = (kT / avfr)
    #print(sigma)
    tau = 1e-6*eigvals*avfr /(kT)
    if bar == None:
        bar = np.zeros(len(eigvals))
    tau_scaled = tau*np.exp(bar)
    return tau

In [4]:
# use values from universality paper

# scaling laws within error
a = 2.
gamma = 1.

# energy per unit length in kT / A
eps = (6.5 / 10.) / (1.38 * 0.3 * 6.022 *(1 / 4.184))

# From here on is the template for making the pickle based on the numpy files created by LE4PD-XYZ

In [5]:
TCF_final_1ebw = np.load('3ttp_all_tcfs.npy')
t_axis=TCF_final_1ebw[:,0]

HA_tcf_calc={}

In [6]:
#HA
eigvals_list = []
eigvecs_list = []
C_eigvals_list = []
tau_list = []
tau_scaled_list = []
corr_func_list = []
corr_func_scaled_list = []
cdf_list = []
cdf_scaled_list = []
covar_list = []
dtraj_list = []

msas=[8]
num_samples_per_msa=320
path_prefix='../'

for system_name in ['3ttp', '2pc0', '1q9p']:
    eigvals_list = []
    eigvecs_list = []
    C_eigvals_list = []
    tau_list = []
    tau_scaled_list = []
    corr_func_list = []
    corr_func_scaled_list = []
    cdf_list = []
    cdf_scaled_list = []
    covar_list = []
    dtraj_list = []
    SYS=system_name

    HA_tcf_calc[SYS]={}   #can iterate over sytem names

    Hs=[]
    covars=[]
    AIHIs=[]
    for i,msa in enumerate(msas):
        start_time_all = time.time()
        
        path_structures=path_prefix + f'{system_name}_{msa}msa/'
        os.chdir(path_structures)
        print(os.getcwd())
        XTC=path_structures + f'mode0_2_LE4PD_no_deepfakes.xtc'
        TOP=f"../{system_name}_CA.pdb"
        T=300
        NFRS = 320
        
        traj = traj_from_xtc(XTC, TOP)
        dtraj = traj - traj.mean(0)[None,:]
        QINV = np.load(path_structures + 'LE4PD_no_deepfakes/QINVmatrix.npy')
        Q = np.load(path_structures + 'LE4PD_no_deepfakes/Qmatrix.npy')
        eigvecs = np.copy(Q)
        eigvals = np.loadtxt(path_structures + 'LE4PD_no_deepfakes/lambda_eig')
        mu_eig = np.loadtxt(path_structures + 'LE4PD_no_deepfakes/mu_eig')
        eigvals_list.append(eigvals)
        eigvecs_list.append(Q)
        C_eigvals_list.append(mu_eig)
        
        fratio, sigma, fric, avfr = LE4PD3N.fric_calc(TOP, SYS, dtraj.shape[0] // 3, dtraj.shape[1], eigvals, 300, path_to_resarea=path_structures)
        tau = tau_convert(eigvals, avfr)
        tau_scaled = tau * np.exp((eps * np.sqrt(mu_eig)))
        tau_list.append(tau)
        tau_scaled_list.append(tau_scaled)
        weights = np.zeros((eigvecs.shape[0] // 3, eigvecs.shape[1]))
        for n, _ in enumerate(range(0, eigvecs.shape[0], 3)):
            for a in range(eigvecs.shape[1]):
                #print(a, counter)
                weights[n,a] = (eigvecs[_:_+3,a]**2).sum(0)
        #t = np.linspace(0, 100000, 100000)
        import copy
        t = copy.deepcopy(t_axis) # known time axis values with 5ps gap
        corr_func = np.matmul(weights[:,:-6], (mu_eig[:-6] * (np.exp(-t[:,None] / tau[None,:-6]))).T).T 
        corr_func_scaled = np.matmul(weights[:,:-6], (mu_eig[:-6] * (np.exp(-t[:,None] / tau_scaled[None,:-6]))).T).T
        corr_func = corr_func / corr_func[0,:]
        corr_func_scaled = corr_func_scaled / corr_func_scaled[0,:]
        
        corr_func_list.append(corr_func)
        corr_func_scaled_list.append(corr_func_scaled)
        cdf_list.append(1. - corr_func)
        cdf_scaled_list.append(1. - corr_func_scaled)
        
        stop_time_all = time.time()
        
        print(f"Time taken for {msa}MSA is {(stop_time_all-start_time_all)/60:2.4}mins")

    import copy

    corr_func_list_ha=[]
    corr_func_scaled_list_ha=[]
    corr_func_list_ha=copy.deepcopy(corr_func_list)
    corr_func_scaled_list_ha=copy.deepcopy(corr_func_scaled_list)
    cdf_list=[]
    cdf_scaled_list=[]
    for n, corr_func in enumerate(corr_func_list):
        cdf_list.append(1-corr_func)
        cdf_scaled_list.append(1-corr_func_scaled_list[n])

    cdf_list_ha=copy.deepcopy(cdf_list)
    cdf_scaled_list_ha=copy.deepcopy(cdf_scaled_list)

    HA_tcf_calc[SYS]['AF2']={}
    HA_tcf_calc[SYS]['AF2']['non_scaled']={}  #for two different barrier treatment
    HA_tcf_calc[SYS]['AF2']['scaled']={}
    HA_tcf_calc[SYS]['AF2']['CinvH']={}    #this is Qmatrix.npy file but it is actually (CH^-1) = (HC^-1)^-1 = (HA)^-1 (should've named it properly I'm stoopid lol)
    HA_tcf_calc[SYS]['AF2']['C']={}
    HA_tcf_calc[SYS]['AF2']['non_scaled']['tcf']=corr_func_list_ha
    HA_tcf_calc[SYS]['AF2']['non_scaled']['cdf']=cdf_list_ha
    HA_tcf_calc[SYS]['AF2']['scaled']['tcf']=corr_func_scaled_list_ha
    HA_tcf_calc[SYS]['AF2']['scaled']['cdf']=cdf_scaled_list_ha
    HA_tcf_calc[SYS]['AF2']['CinvH']['eigvec']=eigvecs_list
    HA_tcf_calc[SYS]['AF2']['CinvH']['eigval']=eigvals_list  #lambda eig vals coming from the HC^-1 
    HA_tcf_calc[SYS]['AF2']['C']['eigval']=C_eigvals_list    #mu eig values from Covariance matrix
    HA_tcf_calc[SYS]['AF2']['msas']=msas
    HA_tcf_calc[SYS]['AF2']['n_samples']=num_samples_per_msa
    HA_tcf_calc[SYS]['AF2']['t_axis']=t


    BD = '/media/ebeyerle/seagate/af2-dynamics/notebooks'
    os.chdir(BD)
    HA_tcf_calc[SYS]['test'] = {}

    #each files contain tcfs and the time axis => shape = (frames,N_res+1) and the time axis is in tcf[:,0]
    #hence the cdf is offset by 1 index

    md_files = {'1000ns':'3ttp_all_tcfs.npy',
                '100ns':'./100ns_3ttp_all_tcfs.npy',
                '10ns':'./10ns_3ttp_all_tcfs.npy'}

    for i,timescale in enumerate(md_files.keys()):
        HA_tcf_calc[SYS]['test'][timescale] = {}
        HA_tcf_calc[SYS]['test'][timescale]['tcf'] = np.load(md_files[timescale])
        HA_tcf_calc[SYS]['test'][timescale]['cdf'] = 1 - HA_tcf_calc[SYS]['test'][timescale]['tcf'][:,1:]

    print(HA_tcf_calc.keys(),HA_tcf_calc[SYS].keys(),HA_tcf_calc[SYS]['AF2'].keys())
        

/media/ebeyerle/seagate/af2-dynamics/3ttp_8msa
<mdtraj.Trajectory with 274 frames, 99 atoms, 99 residues, without unitcells>
<mdtraj.Trajectory with 274 frames, 99 atoms, 99 residues, without unitcells>
3ttp 99 274
fratio:  0.20351838934321723
Temperature (K):  300
Internal viscosity factor:  2.71828
Viscosity (Pa s):  0.001
fd20 0.0


  tau_scaled = tau * np.exp((eps * np.sqrt(mu_eig)))


Time taken for 8MSA is 0.02215mins
dict_keys(['3ttp']) dict_keys(['AF2', 'test']) dict_keys(['non_scaled', 'scaled', 'CinvH', 'C', 'msas', 'n_samples', 't_axis'])
/media/ebeyerle/seagate/af2-dynamics/2pc0_8msa
<mdtraj.Trajectory with 303 frames, 99 atoms, 99 residues, without unitcells>
<mdtraj.Trajectory with 303 frames, 99 atoms, 99 residues, without unitcells>
2pc0 99 303
fratio:  0.20662293181107913
Temperature (K):  300
Internal viscosity factor:  2.71828
Viscosity (Pa s):  0.001
fd20 0.0
Time taken for 8MSA is 0.02308mins
dict_keys(['3ttp', '2pc0']) dict_keys(['AF2', 'test']) dict_keys(['non_scaled', 'scaled', 'CinvH', 'C', 'msas', 'n_samples', 't_axis'])
/media/ebeyerle/seagate/af2-dynamics/1q9p_8msa
<mdtraj.Trajectory with 290 frames, 95 atoms, 95 residues, without unitcells>
<mdtraj.Trajectory with 290 frames, 95 atoms, 95 residues, without unitcells>
1q9p 95 290
fratio:  0.21000896509931355
Temperature (K):  300
Internal viscosity factor:  2.71828
Viscosity (Pa s):  0.001
fd2

Time taken for 8MSA is 0.008938mins
dict_keys(['3ttp', '2pc0', '1q9p']) dict_keys(['AF2', 'test']) dict_keys(['non_scaled', 'scaled', 'CinvH', 'C', 'msas', 'n_samples', 't_axis'])
/media/ebeyerle/seagate/af2-dynamics/2pc0_8msa
<mdtraj.Trajectory with 303 frames, 99 atoms, 99 residues, without unitcells>
<mdtraj.Trajectory with 303 frames, 99 atoms, 99 residues, without unitcells>
2pc0 99 303
fratio:  0.20662293181107913
Temperature (K):  300
Internal viscosity factor:  2.71828
Viscosity (Pa s):  0.001
fd20 0.0
Time taken for 8MSA is 0.0117mins
dict_keys(['3ttp', '2pc0', '1q9p']) dict_keys(['AF2', 'test']) dict_keys(['non_scaled', 'scaled', 'CinvH', 'C', 'msas', 'n_samples', 't_axis'])
/media/ebeyerle/seagate/af2-dynamics/1q9p_8msa
<mdtraj.Trajectory with 290 frames, 95 atoms, 95 residues, without unitcells>
<mdtraj.Trajectory with 290 frames, 95 atoms, 95 residues, without unitcells>
1q9p 95 290
fratio:  0.21000896509931355
Temperature (K):  300
Internal viscosity factor:  2.71828
Visc

In [7]:
import pickle
with open('./HA_tcf_calc_no_deepfakes.pkl','wb') as f:
    pickle.dump(HA_tcf_calc,f)