# Prepare and Run OpenMM Simulation

In [9]:
experiment_name = 'example_4W52'

This notebook allows you to run an OpenMM simulation locally or on a job submission cluster, like Wynton.

You may run this notebook top-to-bottom, except where you see cells titled **OPTION**. These indicate places you may choose to run certain code (e.g. running a simulation in the jupyter notebook vs on a cluster).

Always specify the experiment name above before skipping down to a section.

# Load Starting Structure

### OPTION 1) Load local file

In [10]:
### Using local file? ###
input_pdb_file = './4W52_example.pdb'

### OPTION 2) Load from PDB

In [2]:
### Or, using PDB ID? ###
input_pdb_ID = '4W52'

from easyMD.utils import download_pdb
download_pdb(input_pdb_ID)

input_pdb_file = f'{input_pdb_ID}.pdb'



Successfully downloaded 1AKI.pdb


# Prepare Simulation

In [11]:
##########################################
############ DIRECTORY SETUP #############
##########################################
# experiments/
#     new_experiment/
#         inputs/
#             raw/
#             processed/
#         simulations/
#     another_experiment/...
from easyMD.utils import setup_experiment_dir, create_dirs
from pathlib import Path
experiments_dir = Path('experiments')
current_experiment_dir, \
inputs_dir,             \
raw_dir,                \
processed_dir,          \
sim_dir                = setup_experiment_dir(experiments_dir, experiment_name)

create_dirs([experiments_dir, current_experiment_dir, inputs_dir, raw_dir, processed_dir, sim_dir])

#Let's copy the input files to the inputs:
import shutil
shutil.copy(input_pdb_file, raw_dir)
pdb_path = raw_dir / Path(input_pdb_file).name

#show structure:
from easyMD.utils import show_pdb
show_pdb(pdb_path, size=(600, 600), water=True)

NGLWidget()

### Take care during processing!
In this step, we make decisions for processing our pdb before the simulation. Here you may think about the following options, and enable or disable in the code below:
- Do you want to keep certain heteroatoms?
- Do you want to keep all chains, or just one?
- Do you want PDBfixer to add missing residues?

In [14]:
# Which chains are we keeping? Put the chain IDs here.
# Empty list means keep all.
chains_to_keep = ['A']

# Are we using a ligand? List the 3-letter residue name here.
# Empty list means no ligand.
ligand_resnames = ['EPE']


In [15]:
############################################
############ PROCESS INPUT PDB #############
############################################

### Fix messed up residues
from pdbfixer import PDBFixer
pdb = PDBFixer(str(pdb_path))

### Fix messed up residues
pdb.findMissingResidues()
pdb.findNonstandardResidues()
pdb.replaceNonstandardResidues() 
pdb.findMissingAtoms()
pdb.addMissingAtoms()    #this adds both missing atoms and residues 

### Remove chains
chains_to_delete = [chain.index for chain in pdb.topology.chains() if chain.id not in chains_to_keep]
pdb.removeChains(chains_to_delete)

### add hydrogens:
pdb.addMissingHydrogens(7.0)

### print the names of all non-standard residues:
standard_residues = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
nonstandard_residues = []
for residue in pdb.topology.residues():
    if residue.name not in standard_residues:
        nonstandard_residues.append(residue.name)

### MAKE MODELLER OBJECT:
from openmm.app import Modeller
modeller = Modeller(pdb.topology, pdb.positions)

### Delete all nonstandard residues except ligand:
to_delete = []
for residue in modeller.topology.residues():
    if residue.name in nonstandard_residues and residue.name not in ligand_resnames:
        to_delete.append(residue)
modeller.delete(to_delete)

### Write out the cleaned PDB file:
from openmm.app import PDBFile
#import tempfile:
import tempfile
cleaned_pdb_path = Path(tempfile.mkstemp(suffix='.pdb')[1])
#cleaned_pdb_path = processed_dir / (str(Path(input_pdb_file).stem) + '_cleaned.pdb')
PDBFile.writeFile(modeller.topology, modeller.positions, str(cleaned_pdb_path))

### Add Solvent:
pdb = PDBFixer(str(cleaned_pdb_path))
from openmm import unit as openmm_unit
pdb.addSolvent(padding=10*openmm_unit.angstroms)

### Write out the Processed pdb
print('Writing out the processed input pdb...')
processed_pdb_path = processed_dir / Path(input_pdb_file).name
PDBFile.writeFile(pdb.topology, pdb.positions, str(processed_dir / Path(input_pdb_file).name ))

### Extract and correct ligands, if using:
from easyMD.utils import extract_and_correct_ligand
if ligand_resnames:
    for ligand_resname in ligand_resnames:
        extract_and_correct_ligand(pdb, ligand_resname , processed_dir / 'ligands' / f'{ligand_resname}.sdf')

### Output:
print('Done!')
print('Number of atoms:', pdb.topology.getNumAtoms())

from easyMD.utils import show_pdb
show_pdb(processed_pdb_path, size=(600, 600), water=True)

Writing out the processed input pdb...
Found ligand: EPE with 33 atoms
No ligand SDF template provided. Downloading from RCSB PDB...
SDF file for EPE downloaded successfully.
Done!
Number of atoms: 35987





NGLWidget()

# Run Simulation

**Starting a fresh simulation?** Adjust the parameters below, and continue through this section.

**Continuing a simulation?** Skip to the cells titled Option 1) or Option 2). In each, there is a flag called `continue_from_previous_sim` - set this to `True`. Also specify the number of new steps you want to run with `continue_sim_steps`.

In [16]:
############################################
############# SIM PARAMETERS ###############
############################################

step_size           = 0.002             # picoseconds
temperature         = 300               # kelvin

### Forcefield:
forcefield_files = ['amber14-all.xml', 'amber14/tip3p.xml']     #['amber14-all.xml', 'amber14/tip3p.xml'] is recommended if using a ligand! (Since it works with GAFF)

### Simulation steps:
minimization_steps  = 0                  # 0 means run until convergence, recommended.
equilibration_steps = 10000
production_steps    = 500000 * 1        # 500000 = 1 ns. On a 4080TI, this takes around 2 minutes for a small system.

### Saving:
reporting_interval  = 5000               # How frequently do you want to save frames?
checkpoint_interval = 500000             # How frequently do you want to save checkpoints? (This is for restarting failed or finished simulations).

### Skipping and Relaunching Simulations:
only_production = False                  # If True, will skip equilibration and minimization.

In [17]:
### arguments for simulations script:
from easyMD.utils import write_sim_parameters

sim_parameter_dict =   {'minimization_steps':   minimization_steps,
                        'equilibration_steps':  equilibration_steps,
                        'production_steps':     production_steps,
                        'reporting_interval':   reporting_interval,
                        'checkpoint_interval':  checkpoint_interval,
                        'input_pdb_file':       input_pdb_file,
                        'sim_dir':              str(sim_dir),
                        'processed_dir':        str(processed_dir),
                        'experiment_name':      experiment_name,
                        'forcefield_files':     forcefield_files,
                        'step_size':            step_size,
                        'temperature':          temperature,
                        'only_production':      only_production      #this is a flag to only run production, for when we want to continue a simulation
                        }       

write_sim_parameters( sim_dir / 'simulation_dict.json', sim_parameter_dict)

### OPTION 1) Run Locally
This will run in the notebook. If you stop the notebook or kill your connection, the simulation will stop.

In [20]:
from easyMD.utils import run_sim_local
run_sim_local(sim_dir)

### Continuing a previous sim? Use this instead:
#from pathlib import Path
#sim_dir = Path('experiments/example_1AKI_1/simulations')
#run_sim(sim_dir, continue_from_previous_sim=True, continue_sim_steps=500000*1) #continues sim for 1ns

Minimizing energy...
Writing out the minimized pdb...
Running equilibration...
#"Step","Potential Energy (kJ/mole)","Temperature (K)"
1000,-317122.9195733601,271.41679584861055
2000,-308602.4361749226,291.1049484543053
3000,-305861.8365655476,300.0806127782968
4000,-304831.7564874226,301.21047603561703
5000,-304683.6715264851,300.1294548819948
6000,-304010.6187921101,299.78782717676296
7000,-305133.9742608601,301.2379613142572
8000,-305867.9000421101,299.00429262117075
9000,-304509.6822686726,299.3811057815637
10000,-304803.1978936726,300.31087873211175
Running production...
#"Step","Potential Energy (kJ/mole)","Temperature (K)"
15000,-305488.1490655476,299.1938887562754
20000,-305191.6207452351,301.15774596101215
25000,-305569.3453546101,300.06012435937373
30000,-304692.2066827351,301.2242064277675
35000,-304674.1031671101,301.31482958220573
40000,-304334.0211358601,301.97927011963435
45000,-304421.3267999226,297.29948499215783
50000,-304519.0123467976,298.16093208447705
55000,-305274

KeyboardInterrupt: 

### OPTION 2) Submit it to a queue on Wynton

In [18]:
from easyMD.utils import run_sim_wynton
run_sim_wynton(sim_dir, continue_from_previous_sim=False, continue_sim_steps=None, 
               mem='4G', 
               max_runtime='0:10:00') #2 hour max runtime.

## Continuing a previous sim? Use this instead:
# from easyMD.utils import run_sim_wynton
# from pathlib import Path
# sim_dir = Path('experiments/example_1AKI_1/simulations')
# run_sim_wynton(sim_dir, max_runtime='2:00:00', continue_from_previous_sim=True, continue_sim_steps=500000*1) #continues sim for 1ns

b'Your job 629160 ("example_4W52_1") has been submitted'
<class 'bytes'>


# View Trajectory

Already ran a sim, and just want to do the analysis?

No need to run the rest of the notebook. Just specify the experiment name at the top before moving on.

In [19]:
sim_parameters_file = sim_dir / 'simulation_dict.json'

from easyMD.utils import read_sim_parameters
minimization_steps, \
equilibration_steps,\
production_steps,   \
reporting_interval, \
checkpoint_interval,\
input_pdb_file,     \
sim_dir,            \
processed_dir,      \
experiment_name,    \
forcefield_files,   \
step_size,          \
temperature,        \
only_production      =  read_sim_parameters(sim_parameters_file)

In [20]:
#show the trajectory from PDB with nglview
from easyMD.utils import show_traj
from pathlib import Path

traj_topology_file = processed_dir / input_pdb_file
traj_file = sim_dir / (Path(input_pdb_file).stem + '_prod.dcd')

show_traj(traj_topology_file, traj_file, size=(600, 600))

NGLWidget(max_frame=99)