In [None]:
#Place this file into a folder containing all trajectories and topology for a given simulation

#First, load the trajectory and generate a short (~300 frame default) movie for visual inspection
#Movie should be short since they take quite a bit of storage and loading the mutable trajectory needed requires significant RAM
import pytraj as pt
runName = "NAME" #Set name of PDB movie and later averaged structure
movielen = 300 #Change for different length movie. Note movies take signifcant amounts of storage. Longer movie = more storage and generation time. 
#Movielen will also be the length of your RMSD preview, so it is important that it has the time resolution needed to determine whatever you want to see
proteinlen = 294 #How many residues are in the protein of interest?
trajectories = ['step4.1_equilibration.nc', 'step5_1.nc', 'step5_2.nc', 'step5_3.nc', 'step5_4.nc', 'step5_5.nc', 'step5_6.nc', 'step5_7.nc', 'step5_8.nc']
topology = 'step3_input.parm7'

#Load entire trajectory to get length, iterload saves memory
fulltraj = pt.iterload(trajectories, top=topology)
lentraj = len(fulltraj)
del fulltraj #clear fulltraj
print("Length of trajectory: "+str(lentraj)+" frames")
offset = int(lentraj/movielen) #calculate offset to generate correct length movie
print("Offset: "+str(offset))

#Generate movie centered around the protein
sampledtraj = pt.load(trajectories, top=topology, frame_indices=range(0,lentraj,offset))
sampledtraj = pt.autoimage(sampledtraj,mask=':1-'+str(proteinlen)) #center around protein
refFrame = sampledtraj[0]
data = pt.rmsd(sampledtraj, mask=':1-'+str(proteinlen), ref=refFrame) #keep constant protein orientation
lensamp = len(sampledtraj)
#pt.io.write_traj(runName+"_movie.pdb",traj=sampledtraj, options='model', overwrite=True) #Write ensemble PDB movie, comment out if running again to reduce runtime
#print("Movie generated")
print("Length of sample trajectory: " + str(lensamp) + " frames")
radgyr = pt.radgyr(sampledtraj, mask=':1-'+str(proteinlen)+"@CA")
del sampledtraj #clear to save memory

#Next, plot the RMSD and radius of gyration in order to identify the beginning of equillibrium
from matplotlib import pyplot as plt;
import numpy as np
fig = plt.figure(figsize=(12,9))
plt.rcParams["figure.autolayout"] = True

#Note that these frame numbers correspond to frames WITH offset
#Do not worry about converting offset frame number to actual frames, this program will keep track for you
plt.title(label='Sampled RMSD', fontsize=24)
plt.xlabel('Offset frames', fontsize=16) 
plt.ylabel('RMSD (Å)', fontsize=16)
rmsdData = list(data)
plt.plot(rmsdData);

fig2 = plt.figure(figsize=(12,9))
plt.rcParams["figure.autolayout"] = True
plt.title(label='Sampled Radius of Gyration', fontsize=24)
plt.xlabel('Offset frames', fontsize=16) 
plt.ylabel('Radgyr (Å)', fontsize=16)
radgyrData = list(radgyr)
plt.plot(radgyrData);


Get some popcorn and watch the movie!

In [None]:
#Next, select a range of frames where the equilibrated state of interest is, and generate an average structure based on that

#How many frames to average within the range, 100 is default
numavg=100

#Set the following values according to observations of the movie and RMSD
eqstart = NUMBER #fill with obersvation 
eqend = lensamp #default is end of entire movie "lensamp", which gives within 1 offset of the length of the entire movie, fill with observation

#converting to whole movie frame numbers
eqstart = eqstart*offset
eqend = eqend*offset
if eqend>lentraj:
    eqend = eqend-offset #correct for inclusion of final frame that was less than 1 offset

avgoffset = int((eqend-eqstart)/numavg)
print("Offset: "+str(avgoffset))

#Load segment of trajectory and generate average frame
#This does not give exactly the range intended due to having to load the entire movie for alignment purposes, I will think of a better solution later but it works ok
newtraj = pt.load(trajectories, top=topology, frame_indices=range(0,lentraj,avgoffset))
newtraj = pt.autoimage(newtraj, mask=':1-'+str(proteinlen)) #center around protein
refFrame = newtraj[0]
data = pt.rmsd(newtraj, mask=':1-'+str(proteinlen), ref=refFrame) #keep constant protein orientation
selection = newtraj[int(eqstart/avgoffset):int(eqend/avgoffset)]
file=pt.get_average_frame(selection, mask=':1-'+str(proteinlen), dtype='traj') #get average of selected frames
del newtraj #save memory
pt.io.write_traj( runName+".pdb",traj=file, options='chainid A',overwrite=True) #write to output
print("Average frame generated from "+str(len(selection))+" frames")
del file, selection #save memory

#%reset -f #uncomment to reset all variables
