In [1]:
##EXAMPLE 1 from powerpoint
%matplotlib inline
%config InlineBackend.figure_format = 'retina'  # high resolution
import os #to get paths
import matplotlib.pyplot as plt #import matplotlib #draw graphs
import numpy as np #data processing
import pandas as pd #data processing

from parmed.amber import AmberParm, AmberMask

import pytraj as pt #trajectory analysis
import shutil #utilities like copy file

qmmask = ":248|:177@CD,OE1,OE2,CG,HG1,HG2"
rms_mask = "rms_mask = '!:WAT,POT,CLA,248'"
QMMM_method = 'DFTB3'

FOLDER_PA = "0_Parent_example"
FOLDER_EQ = "1_Equilibrate_example"
FOLDER_ST = "2_String_example"

N_REPLICATES = 16
N_REPLICATES_ST = 32

# os.mkdir(FOLDER_EQ)
# os.mkdir(FOLDER_ST)
# os.mkdir(FOLDER_PA)

STRUCTURES=["1A0_6NB_GS", "1A0_6NB_TS", "1A5_6NB_GS", "1A5_6NB_TS"]

In [None]:
"dflkkds Ive made changes here"

In [None]:
# generating input files - parametrising system with AMBER, only needs to be done once

for STRUCTURE in STRUCTURES:  
    BOX = "10"
    with open("tleap.in", "w") as f:
        f.write(f"""source leaprc.protein.ff19SB 
source leaprc.gaff
source leaprc.water.tip3p
loadamberprep   {FOLDER_PA}/6NBI.prepi
loadamberparams {FOLDER_PA}/6NBI.frcmod
mol = loadpdb {FOLDER_PA}/{STRUCTURE}_parent.pdb
#addIons2 mol Cl- 0
solvateOct mol TIP3PBOX {BOX}
addIonsRand mol Cl- 23 K+ 22 5
saveamberparm mol {FOLDER_PA}/{STRUCTURE}.parm7 {FOLDER_PA}/{STRUCTURE}.rst7
quit
""")
    
    os.system("tleap -s -f tleap.in #> tleap.out")
    
    #'!' works in the same way as os.system, it executes the line as a shell command
    !ambpdb  -p "$FOLDER_PA"/"$STRUCTURE".parm7 \
             < "$FOLDER_PA"/"$STRUCTURE".rst7 \
             > "$FOLDER_PA"/"$STRUCTURE".pdb


In [None]:
## EXAMPLE 2 from presentation - writes amber input for minimisation only

STRUCTURES=["1A0_6NB_GS", "1A0_6NB_TS", "1A5_6NB_GS", "1A5_6NB_TS"]
FOLDER_EQ="1_Equilibrate_example"

for structure in STRUCTURES:

    os.mkdir(f"{FOLDER_EQ}/{structure}")
    min_params = f"""initial minimization
    &cntrl 
        imin           = 1, 
        ntmin          = 1,
        ncyc           = 10000,
        maxcyc         = 100000, 
        ntpr           = 100, 
        ntb            = 1,         !constant volume   
        nmropt         = 1,         !Restraints 
    &end 
    &wt TYPE='END' / 
    """       

    with open(f"{FOLDER_EQ}/{structure}/{structure}_min.in", "w") as f:
        f.writelines(min_params)

In [None]:
#writing AMBER .in files for minimisation, heating and equilibration

#change this to False for unrestrained simulations
restraints = True

for structure in STRUCTURES:

    if restraints == True:
        DISANG_fpath = f"DISANG={FOLDER_PA}/{structure}_DISANG.rst"
        min_DISANG_fpath = f"DISANG={FOLDER_PA}/{structure}_DISANG_min.rst"
    else:
        DISANG_fpath = " "
        min_DISANG_fpath = " "

    try:    
        os.mkdir(f"{FOLDER_EQ}/{structure}")
    except FileExistsError:
        pass

    filename = f"{FOLDER_EQ}/{structure}/{structure}"
    print(filename)
    
    #Make .in files    
    with open(f"{filename}_min.in", "w") as f:
        f.write(f"""initial minimization
&cntrl 
    imin           = 1, 
    ntmin          = 1,
    ncyc           = 10000,
    maxcyc         = 100000, 
    ntpr           = 100, 
    ntb            = 1,         !constant volume   
    nmropt         = 1,       !Restraints 
&end 
&wt TYPE='END' / 
{min_DISANG_fpath}
/ 
""")       

        
    with open(f"{filename}_heat.in", "w") as f:
        f.write(f"""0.05 ns (50000*0.001ps) Explicit solvent molecular dynamics heating            
&cntrl                                                                     
    imin           = 0,       !do MD                                                       
    irest = 0, ntx = 1,       !no restart                                          
    ntpr           = 5000,    !save energy ever n steps                                 
    ntwx           = 5000,    !save coordinates ever n steps                            
    ntwr           = 5000,    !save restrt file ever n steps                            
    nstlim         = 50000,   !number steps                                       
    dt             = 0.001,   !stepsize in ps                                          
    ioutfm         = 1,       !Binary output faster                                    
    iwrap          = 1,       !Put Atoms back into Box                                  
    ntt            = 2        !Andersen thermostate                              
    tempi          = 0.0,     !initial temp                                              
    temp0          = 300,     !target temp                                              
    ntc = 1, ntf   = 1,       !no shake                                                
    ntb            = 1,       !Constant Volumen 
    cut            = 8,       !e. static cutoff                                             
    nmropt         = 1,       !Restraints         
&end                                                                       
&wt                                                                        
    TYPE='TEMP0', ISTEP1=10000, ISTEP2=50000,                                  
    VALUE1=10.0, VALUE2=300,                                               
&end                                                       
&wt TYPE='END' /       
{DISANG_fpath}      
""")    

    with open(f"{filename}_eq_NVT.in", "w") as f:
        f.write(f"""10.0 ns (10000000*0.001ps) Explicit solvent molecular dynamics equilibration 100000*0.001=0.1ns/saved frame
&cntrl                         
    imin           = 0,       !do MD                                                       
    irest = 1, ntx = 5,       !restart           
    ntpr           = 100000,  !save energy ever n steps 
    ntwx           = 100000,  !save coordinates ever n steps 
    ntwr           = 1000,    !save restrt file ever n steps 
    nstlim         = 10000000,!number steps 
    dt             = 0.001,   !stepsize in ps 
    ioutfm         = 1,       !Binary output faster 
    iwrap          = 1,       !Put Atoms back into Box                   
    ntt            = 2        !Andersen thermostate     
    temp0          = 300,     !target temp                          
    ntc = 1, ntf   = 1,       !no shake          
    cut            = 8,       !e. static cutoff 
    ntb            = 1,       !constant volume
    nmropt         = 1,       !Restraints 
&end 
&wt TYPE='END' / 
{DISANG_fpath}      
""")          
        
    with open(f"{filename}_eq_NPT.in", "w") as f:
        f.write(f"""10.0 ns (100000000*0.001ps) Explicit solvent molecular dynamics equilibration 100000*0.001=0.1ns/saved frame
&cntrl                         
    imin           = 0,       !do MD                                                       
    irest = 1, ntx = 5,       !restart           
    ntpr           = 100000,  !save energy ever n steps 
    ntwx           = 100000,  !save coordinates ever n steps 
    ntwr           = 1000,    !save restrt file ever n steps 
    nstlim         = 10000000,!number steps 
    dt             = 0.001,   !stepsize in ps 
    ioutfm         = 1,       !Binary output faster 
    iwrap          = 1,       !Put Atoms back into Box                   
    ntt            = 2        !Andersen thermostate                       
    tempi          = 300,     !initial temp 
    temp0          = 300,     !target temp 
    ntp            = 1,       !pressure                                     
    ntc = 1, ntf   = 1,       !no shake          
    cut            = 8,       !e. static cutoff 
    ntb            = 2,       !constant pressure
    nmropt         = 1,       !Restraints   
&end 
&wt TYPE='END' / 
{DISANG_fpath}      
""")          

    with open(f"{filename}_eq_QM.in", "w") as f:
        f.write(f"""20 ps (20000*0.001 ps) QMMM equilibration  1000*0.001 = 1 ps/saved frame
&cntrl 
    imin           = 0,       !do MD                                                       
    irest = 0, ntx = 1,       !no restart           
    ntpr           = 100,     !save energy ever n steps 
    ntwx           = 100,     !save coordinates ever n steps 
    ntwr           = 100,     !save restrt file ever n steps 
    nstlim         = 20000,   !number steps 
    dt             = 0.001,   !stepsize in ps 
    ioutfm         = 1,       !Binary output faster 
    iwrap          = 1,       !Put Atoms back into Box                   
    ntt            = 2        !Andersen thermostate                       
    tempi          = 300,     !initial temp 
    temp0          = 300,     !target temp 
    ntp            = 1,       !pressure                                     
    ntc = 1, ntf   = 1,       !no shake          
    cut            = 8,       !e. static cutoff 
    ntb            = 2,       !constant pressure
    nmropt         = 1,       !Restraints  
    ifqnt          = 1,       !Switch on QM/MM coupled potential
&end 
&qmmm 
    qmmask = '{qmmask}', 
    qmcharge = -1, 
    writepdb = 1, !Saves QM region as PDB 
    spin = 1, 
    qm_theory = '{QMMM_method}',
    qmshake = 0,
    qmcut = 8.0,
    itrmax = 1000000,
&end 
&wt TYPE='END' /
{DISANG_fpath}      
""")                  


In [None]:
#EXAMPLE 3 part 1 from powerpoint - defines function to submit minimisation
def runjob_min(structure,experiment_folder,parent_folder):
    with open("MD.sh", "w") as f:
        f.write(f"""#!/bin/bash
#SBATCH --partition test  ##Using test partition b/c v. slow queue for compute  
#SBATCH --job-name=M{structure}
#SBATCH --output   M{structure}
#SBATCH --time=1:00:00       # Walltime
#SBATCH --nodes=1            # number of tasks
#SBATCH --ntasks-per-node=12 # number of tasks per node
#SBATCH --cpus-per-task=1
#SBATCH --mem=10G
#SBATCH --account=ptch000721
 
# 1. Load module(s)
module load apps/amber/20
module add lang/intel-parallel-studio-xe/2020

# 2. Set directories
cd {os.getcwd()} 

# 3. Define variables
input_folder={experiment_folder}/{structure}/{structure}
topology_filepath={parent_folder}/{structure}
structure_filepath={experiment_folder}/{structure}/{structure}
output_filepath=${{structure_filepath}}_min

# 4. Run Jobs

srun -n 12 --cpu-bind=cores --mpi=pmi2 sander.MPI -O -i ${{input_folder}}_min.in -o $output_filepath.log -p $topology_filepath.parm7 \
        -c $topology_filepath.rst7 -x $output_filepath.nc -r $output_filepath.rst7 -inf $output_filepath.mdinf
""")

    os.system("sbatch MD.sh")

In [None]:
#EXAMPLE 3 part 2 from powerpoint - calls the function above to submit the job for all four structures
STRUCTURES=["1A0_6NB_GS", "1A0_6NB_TS", "1A5_6NB_GS", "1A5_6NB_TS"]
FOLDER_PA = "0_Parent_example"
FOLDER_EQ = "1_Equilibrate_example"

for structure in STRUCTURES:
   runjob_min(structure=structure,
      experiment_folder=FOLDER_EQ,
      parent_folder=FOLDER_PA)   

In [None]:
#Checks everything has queued. Using the format string just means you can control how much space each column has, makes longer job names show up
os.system("""squeue --format="%.18i %.9P %.30j %.8u %.8T %.10M %.9l %.6D %R" --me""")

In [None]:
#Example 4 from powerpoint - requires cpptraj in the shell environment
STRUCTURES=["1A0_6NB_GS", "1A0_6NB_TS", "1A5_6NB_GS", "1A5_6NB_TS"]
FOLDER_PA = "0_Parent_example"
FOLDER_EQ = "1_Equilibrate_example"

for structure in STRUCTURES:       
    with open("cpptraj.in", "w") as f:
        f.write(f"parm {FOLDER_PA}/{structure}.parm7\n")

        filename = f"{FOLDER_EQ}/{structure}/{structure}"
        f.write(f"trajin {filename}_min.rst7\n")
        f.write("center !:WAT mass origin\n")
        f.write("autoimage\n")
        f.write(f"rms Full_Prot !:WAT,POT,CLA,248 first mass\n")
        f.write(f"outtraj {FOLDER_EQ}/{structure}/{structure}_min.pdb\n")

    os.system("cpptraj -i cpptraj.in")

In [None]:
## heat, NVT and NPT - can be run on GPU
def runjob_eq1(structure,index,experiment_folder,parent_folder):
    print(f"submitted E1_{index}_{structure}")
    
    temp = f"""#!/bin/bash
#SBATCH --partition gpu
#SBATCH --job-name=E1_{index}_{structure}
#SBATCH --output   E1_{index}_{structure}
#SBATCH --time=6:00:00  #14-00:00:00    # Walltime
#SBATCH --nodes=1
#SBATCH --gres=gpu:1
#SBATCH --mem=1G
#SBATCH --account=ptch000721

# 1. Load module(s)
module load apps/amber/20.cuda.gcc

# 2. Set directories
cd {os.getcwd()} 

# 3. Define variables
input_folder={experiment_folder}/{structure}/{structure} 

output_folder={experiment_folder}/{structure}/{structure}_{index}
mkdir ${{output_folder}}
structure_filepath=${{output_folder}}/{structure}_{index}
reference_filepath={experiment_folder}/{structure}/{structure}_min.rst7
topology_filepath={parent_folder}/{structure}
autoimage_script_filepath=cpptraj_autoimage.in

# 4. Run Jobs 
old_filepath={experiment_folder}/{structure}/{structure}_min
name=heat
output_filepath=${{structure_filepath}}_$name
pmemd.cuda      -O -i ${{input_folder}}_$name.in -o ${{output_filepath}}.log -p ${{topology_filepath}}.parm7  \
                    -ref ${{reference_filepath}} -c ${{old_filepath}}.rst7 -x ${{output_filepath}}.nc  -r ${{output_filepath}}.rst7 -inf ${{output_filepath}}.mdinf

cpptraj -p ${{topology_filepath}}.parm7 -i $autoimage_script_filepath -y ${{output_filepath}}.rst7 -x ${{output_filepath}}_im.rst7

old_filepath=${{output_filepath}}
name=eq_NVT 
output_filepath=${{structure_filepath}}_$name
pmemd.cuda     -O -i ${{input_folder}}_$name.in -o ${{output_filepath}}.log -p ${{topology_filepath}}.parm7  \
                    -ref ${{reference_filepath}} -c ${{old_filepath}}_im.rst7 -x ${{output_filepath}}.nc  -r ${{output_filepath}}.rst7 -inf ${{output_filepath}}.mdinf

cpptraj -p ${{topology_filepath}}.parm7 -i $autoimage_script_filepath -y ${{output_filepath}}.rst7 -x ${{output_filepath}}_im.rst7

old_filepath=${{output_filepath}}
name=eq_NPT
output_filepath=${{structure_filepath}}_$name
pmemd.cuda     -O -i ${{input_folder}}_$name.in -o ${{output_filepath}}.log -p ${{topology_filepath}}.parm7  \
                    -ref ${{reference_filepath}} -c ${{old_filepath}}_im.rst7 -x ${{output_filepath}}.nc  -r ${{output_filepath}}.rst7 -inf ${{output_filepath}}.mdinf
""" 
    with open(f"MD.sh", "w") as f:
        f.writelines(temp)

    os.system("sbatch --exclude=bp1-gpu[007,035,030] MD.sh")

In [None]:
for structure in STRUCTURES:
    for index in range(1,N_REPLICATES+1,1):
        runjob_eq1(structure=structure,
            index=str(index),
            experiment_folder=FOLDER_EQ,
            parent_folder=FOLDER_PA)

In [None]:
#writing endpoint PDBs from NPT

for structure in STRUCTURES:
    with open("cpptraj.in", "w") as f:
        f.write(f"parm {FOLDER_PA}/{structure}.parm7\n")
        
        for index in range(1,N_REPLICATES+1,1): 
            filename = f"{FOLDER_EQ}/{structure}/{structure}_{str(index)}/{structure}_{str(index)}"
            if os.path.isfile(f"{filename}_eq_NPT.rst7"):
                f.write(f"trajin {filename}_eq_NPT.rst7\n")
            else:
                print(f'{structure}_{index} No file found')

        f.write("autoimage\n")
        f.write(f"rms Full_Prot {rms_mask} first mass\n")
        f.write(f"closest 100 :248 parmout {FOLDER_PA}/{structure}_closewater.parm7\n")
        f.write(f"outtraj {FOLDER_EQ}/{structure}_{str(index)}_NPT_eq.pdb\n")

    os.system("cpptraj -i cpptraj.in")

In [None]:
##QM/MM equilibration - run on cpu with multiprocessing

def runjob_eq2(structure,index,experiment_folder,parent_folder):
    with open(f"MD.sh", "w") as f:
        f.write(f"""#!/bin/bash
#SBATCH --partition test #test has much shorter queue than compute and it only takes 15 minutes
#SBATCH --job-name=E2_{index}_{structure}
#SBATCH --output   E2_{index}_{structure}
#SBATCH --time=1:00:00 # 14-00:00:00    # Walltime
#SBATCH --nodes=1             # number of tasks
#SBATCH --ntasks-per-node=12  # number of tasks per node
#SBATCH --cpus-per-task=1
#SBATCH --mem=10G 
#SBATCH --account=ptch000721
 
# 1. Load module(s)
module load apps/amber/20
module add lang/intel-parallel-studio-xe/2020

# 2. Set directories
cd {os.getcwd()}

# 3. Define variables

input_folder={experiment_folder}/{structure}/{structure} 

output_folder={experiment_folder}/{structure}/{structure}_{index}
# mkdir ${{output_folder}}
structure_filepath=${{output_folder}}/{structure}_{index}
topology_filepath={parent_folder}/{structure}
autoimage_script_filepath=cpptraj_autoimage.in

# 4. Run Jobs 

old_filepath=${{structure_filepath}}_eq_NPT

cpptraj -p ${{topology_filepath}}.parm7 -i $autoimage_script_filepath -y ${{old_filepath}}.rst7 -x ${{old_filepath}}_im.rst7

name="eq_QM"
output_filepath=${{structure_filepath}}_$name

srun -n 12 --cpu-bind=cores --mpi=pmi2 sander.MPI -O -i ${{input_folder}}_${{name}}.in -o ${{output_filepath}}.log -p ${{topology_filepath}}.parm7  \
          -c ${{old_filepath}}_im.rst7 -x ${{output_filepath}}.nc -r ${{output_filepath}}.rst7 -inf ${{output_filepath}}.mdinf
""")

    os.system("sbatch MD.sh")

In [None]:
for STRUCTURE in STRUCTURES:
    for INDEX in range(1, N_REPLICATES+1):
        runjob_eq2(structure=STRUCTURE,
            index=str(INDEX),
            experiment_folder=FOLDER_EQ,
            parent_folder=FOLDER_PA)

In [None]:
#Makes 'trajectory' of pdb files with all 32 endpoints

for STRUCTURE in STRUCTURES:    
    with open("cpptraj.in", "w") as f:
        f.write(f"parm {FOLDER_PA}/{STRUCTURE}.parm7\n")

        for INDEX in range(1,N_REPLICATES+1,1):    
            filename = f"{FOLDER_EQ}/{STRUCTURE}/{STRUCTURE}_{str(INDEX)}/{STRUCTURE}_{str(INDEX)}"
            if os.path.isfile(filename+"_eq_QM.rst7"):
                f.write(f"trajin {filename}_eq_QM.rst7\n")
            
        f.write("center !:WAT mass origin\n")
        f.write("autoimage\n")
        f.write(f"rms Full_Prot {rms_mask} first mass\n")
        f.write("closest 100 :248\n")
        f.write("strip :POT,CLA\n")
        f.write(f"outtraj {FOLDER_EQ}/{STRUCTURE}_{str(INDEX)}_last_eq_QM.pdb\n")

    print(f"{FOLDER_EQ}/{STRUCTURE}_{str(INDEX)}_last_eq_QM.pdb")
    os.system('cpptraj -i cpptraj.in')


In [None]:
#EXAMPLE 5 from powerpoint
#plotting RMSD with pytraj - saves data as files, only do this once
fig, axs = plt.subplots(2,4, figsize=(16,8), sharey='row')
for fig_col, structure in enumerate(STRUCTURES):

    for index in range(1, N_REPLICATES+1):
        input_folder = f'{FOLDER_EQ}/{structure}/{structure}_{index}/{structure}_{index}'
        
        traj = pt.load([f'{input_folder}_heat.nc',f'{input_folder}_eq_NVT.nc', f'{input_folder}_eq_NPT.nc',f'{input_folder}_eq_QM.nc'],
            top=f'{FOLDER_PA}/{structure}.parm7')

        traj = traj.autoimage()
        traj = pt.center(traj, '!:WAT,POT,CLA,248', mass=True, center='origin')

        rmsd_data = pt.rmsd(traj, mask='!:WAT,POT,CLA,248' ,mass=True)
        np.savetxt(fname=f"{input_folder}_rmsd.dat", X=rmsd_data)

        ligbase_dist = pt.distance(traj, mask=':177@CD :248@C15')
        np.savetxt(fname=f"{input_folder}_ligbasedist.dat", X=ligbase_dist)
        
        axs[0,fig_col].plot(rmsd_data, linewidth=0.8, label=index, color='k', alpha=0.2)
        axs[1,fig_col].plot(ligbase_dist, linewidth=0.8, label=index, color='k', alpha=0.2)

    axs[0,fig_col].set_xlabel('Frame')
    axs[0,fig_col].set_ylabel('RMSD')
    axs[0,fig_col].set_title(f'{structure}')
    axs[0,fig_col].set_xlim(xmin=0)
    axs[0,fig_col].set_xlim(xmin=0)
    axs[1,fig_col].set_xlabel('Frame')
    axs[1,fig_col].set_ylabel('6NB C to GLU177 CD distance')

    axs[1,fig_col].set_xlim(xmin=0)


In [None]:
# plots rmsd and ligbase dist data saved to files

fig, axs = plt.subplots(2,4, figsize=(16,8), sharey='row')
for fig_col, structure in enumerate(STRUCTURES):

    for index in range(1, N_REPLICATES+1):
        input_folder = f'{FOLDER_EQ}/{structure}/{structure}_{index}/{structure}_{index}'
        
        try:
            ligbase_dist = np.loadtxt(fname=f"{input_folder}_ligbasedist.dat")
            rmsd_data = np.loadtxt(fname=f"{input_folder}_rmsd.dat")
        except(FileNotFoundError):
            ligbase_dist = [np.nan]
            rmsd_data = [np.nan]
            print(f'file {input_folder}_ligbasedist.dat not found')

        if max(ligbase_dist) > 15:
            color = 'r'
            print(f"{structure}_{index} Ligand moved away from active site")
        elif len(rmsd_data) < 410:
            print(f"{structure}_{index} did not finish")
            color = 'g'
        else:
            color = 'k'
        
        axs[0,fig_col].plot(rmsd_data, linewidth=0.8, label=index, color=color, alpha=0.2)
        axs[1,fig_col].plot(ligbase_dist, linewidth=0.8, label=index, color=color, alpha=0.2)

    axs[0,fig_col].set_xlabel('Frame')
    axs[0,fig_col].set_ylabel('RMSD')
    axs[0,fig_col].set_title(f'{structure}')
    axs[0,fig_col].set_xlim(xmin=0)
    axs[1,fig_col].set_xlabel('Frame')
    axs[1,fig_col].set_ylabel('6NB C7 to GLU177 CD distance')
