# Prepare and Run OpenMM Simulation

In [None]:
author          = 'Your Name'
experiment_name = 'example_sim'
description     = 'A test simulation'
experiments_dir = 'experiments/'            #this is where your data will be saved

apptainer_path  = './easymd_0.1.0.sif'            #path to the apptainer

This notebook allows you to run an OpenMM simulation locally or on a job submission cluster, like Wynton.

You may run this notebook top-to-bottom, except where you see cells titled **OPTION**. These indicate places you may choose to run certain code (e.g. running a simulation in the jupyter notebook vs on a cluster).

Always specify the experiment name above before skipping down to a section.

In [None]:
#Verify container exists:
from pathlib import Path
apptainer_path = str(Path(apptainer_path).resolve())
assert Path(apptainer_path).exists(), f'Apptainer not found at {apptainer_path}'

# Load Starting Structure

### OPTION 1) Load local file

In [None]:
### Using local file? ###
input_pdb_file = './4W52_example.pdb'

### OPTION 2) Load from PDB

In [None]:
### Or, using PDB ID? ###
# input_pdb_ID = '4W52'

# from easyMD.utils import download_pdb
# download_pdb(input_pdb_ID)

# input_pdb_file = f'{input_pdb_ID}.pdb'

# Prepare Simulation

In [None]:
##########################################
############ DIRECTORY SETUP #############
##########################################
# experiments/
#     new_experiment/
#         experiment_params.json
#         inputs/
#             raw/
#             processed/
#         simulations/
#     another_experiment/...

from easyMD.utils import setup_experiment_dir, create_dirs
from pathlib import Path
experiments_dir = Path(experiments_dir)
current_experiment_dir, \
inputs_dir,             \
raw_dir,                \
processed_dir,          \
sim_dir                = setup_experiment_dir(experiments_dir, experiment_name)

create_dirs([experiments_dir, current_experiment_dir, inputs_dir, raw_dir, processed_dir, sim_dir])

#Let's copy the input files to the inputs:
import shutil
shutil.copy(input_pdb_file, raw_dir)
pdb_path = raw_dir / Path(input_pdb_file).name

#show structure:
from easyMD.utils import show_pdb
show_pdb(pdb_path, size=(600, 600), water=True)

### Take care during processing!
In this step, we make decisions for processing our pdb before the simulation. Here you may think about the following options, and enable or disable in the code below:
- Do you want to keep certain heteroatoms?
- Do you want to keep all chains, or just one?
- Do you want PDBfixer to add missing residues?

In [None]:
# Which chains are we keeping? Put the chain IDs here.
# Empty list means keep all.
chains_to_keep = ['A']

# Are we using a ligand? List the 3-letter residue name here.
# Empty list means no ligand.
ligand_resnames = ['BNZ']

# If we are automatically generating ligand templates, list the paths.
# E.g. ligand_templates = ['./ligand1.SDF', './ligand2.SDF', ...]
# Otherwise, ligand_templates = 'automatic'
ligand_templates  = 'automatic'

In [None]:
############################################
############ PROCESS INPUT PDB #############
############################################

### Fix messed up residues
from pdbfixer import PDBFixer
pdb = PDBFixer(str(pdb_path))

### Fix messed up residues
pdb.findMissingResidues()
pdb.findNonstandardResidues()
pdb.replaceNonstandardResidues() 
pdb.findMissingAtoms()
pdb.addMissingAtoms()    #this adds both missing atoms and residues 

### Remove chains
chains_to_delete = [chain.index for chain in pdb.topology.chains() if chain.id not in chains_to_keep]
pdb.removeChains(chains_to_delete)

### add hydrogens:
pdb.addMissingHydrogens(7.0)

### print the names of all non-standard residues:
standard_residues = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
nonstandard_residues = []
for residue in pdb.topology.residues():
    if residue.name not in standard_residues:
        nonstandard_residues.append(residue.name)

### MAKE MODELLER OBJECT:
from openmm.app import Modeller
modeller = Modeller(pdb.topology, pdb.positions)

### Delete all nonstandard residues except ligand:
to_delete = []
for residue in modeller.topology.residues():
    if residue.name in nonstandard_residues and residue.name not in ligand_resnames:
        to_delete.append(residue)
modeller.delete(to_delete)

### Write out the cleaned PDB file:
from openmm.app import PDBFile
#import tempfile:
import tempfile
cleaned_pdb_path = Path(tempfile.mkstemp(suffix='.pdb')[1])
#cleaned_pdb_path = processed_dir / (str(Path(input_pdb_file).stem) + '_cleaned.pdb')
PDBFile.writeFile(modeller.topology, modeller.positions, str(cleaned_pdb_path))

### Add Solvent:
pdb = PDBFixer(str(cleaned_pdb_path))
from openmm import unit as openmm_unit
pdb.addSolvent(padding=10*openmm_unit.angstroms, ionicStrength=0.15*openmm_unit.molar)

### Write out the Processed pdb
print('Writing out the processed input pdb...')
processed_pdb_path = processed_dir / Path(input_pdb_file).name
PDBFile.writeFile(pdb.topology, pdb.positions, str(processed_dir / Path(input_pdb_file).name ))

### Extract and correct ligands, if using:
from easyMD.utils import extract_and_correct_ligand
using_manual_templates = False if ligand_templates == 'automatic' else True
if ligand_resnames:
    for i, ligand_resname in enumerate(ligand_resnames):
        current_template = None
        if using_manual_templates: current_template = ligand_templates[i]
        extract_and_correct_ligand(pdb, ligand_resname , processed_dir / 'ligands' / f'{ligand_resname}.sdf', current_template)

### Output:
print('Done!')
print('Number of atoms:', pdb.topology.getNumAtoms())

from easyMD.utils import show_pdb
show_pdb(processed_pdb_path, size=(600, 600), water=True)

# Prepare Files

**Starting a fresh simulation?** Adjust the parameters below, and continue through this section.

**Continuing a simulation?** Skip to the cells titled Option 1) or Option 2). In each, there is a flag called `continue_from_previous_sim` - set this to `True`. Also specify the number of new steps you want to run with `continue_sim_steps`.

In [None]:
############################################
############# SIM PARAMETERS ###############
############################################

step_size           = 0.002             # picoseconds
temperature         = 300               # kelvin

### Forcefield:
forcefield_files = ['amber14-all.xml', 'amber14/tip3p.xml']     #['amber14-all.xml', 'amber14/tip3p.xml'] is recommended if using a ligand! (Since it works with GAFF)

### Simulation steps:
minimization_steps  = 0                  # 0 means run until convergence, recommended.
equilibration_steps = 10000
production_steps    = 500000 * 50        # 500000 = 1 ns. On a 4080TI, this takes around 2 minutes for a small system.

### Saving:
reporting_interval  = 5000               # How frequently do you want to save frames?
checkpoint_interval = 500000             # How frequently do you want to save checkpoints? (This is for restarting failed or finished simulations).

### Skipping and Relaunching Simulations:
only_production = False                  # If True, will skip equilibration and minimization.

In [None]:
### arguments for simulations script:
from easyMD.utils import write_sim_parameters
import datetime
#from easyMD import __version__ as easyMD_version
easyMD_version = "0.1.0"

parameters = {
    "metadata":{
        "author":           author,
        "date":             datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "experiment_name":  experiment_name,
        "description":      description,
        "version":          easyMD_version
    },
    "paths": {
        "experiment_dir":           str(current_experiment_dir.resolve()),
        "inputs":{
            "input_dir":            "inputs/processed",
            "input_pdb_file":       processed_pdb_path.name,
            "input_ligand_files":   [ f'ligands/{ligand_resname}.sdf' for ligand_resname in ligand_resnames ]
        },
        "outputs": {
            "output_dir": "simulations",
            "output_minim_file":    f'{experiment_name}_minim.pdb',
            "output_eq_file":       f'{experiment_name}_equil.dcd',
            "output_prod_file":     f'{experiment_name}_prod.dcd',
            "output_checkpoint_file": f'{experiment_name}_prod.chk'
        }
    },
    "simulation": {
        "minimization_steps":   minimization_steps,
        "equilibration_steps":  equilibration_steps,
        "production_steps":     production_steps,
        "reporting_interval":   reporting_interval,
        "checkpoint_interval":  checkpoint_interval,
        "forcefield_files":     forcefield_files,
        "step_size":            step_size,
        "temperature":          temperature,
        "only_production":      only_production
    }
}

import json
parameters_file = current_experiment_dir / 'parameters.json'
with open(parameters_file, 'w') as f:
    json.dump(parameters, f, indent=4)

### OPTION 1) Run Locally
This will run in the notebook. If you stop the notebook or kill your connection, the simulation will stop.

In [None]:
# from easyMD.utils import run_sim_local
# run_sim_local(parameters_file)

### OPTION 2) Submit it to a queue on Wynton

In [None]:
from easyMD.utils import run_sim_wynton

run_sim_wynton(apptainer_path, parameters_file, max_runtime='0:30:00', repeat=5)

# View Trajectory

Already ran a sim, and just want to do the analysis?

No need to run the rest of the notebook. Just specify the path to the params file for your current experiment:

In [None]:
path_to_parameters_file = "./experiments/example_sim/parameters.json"

import json
import os
with open(path_to_parameters_file, 'r') as f:
    parameters = json.load(f)
experiment_dir          = parameters['paths']['experiment_dir']
sim_dir                 = os.path.join(experiment_dir, parameters['paths']['outputs']['output_dir'])
traj_topology_file      = os.path.join(sim_dir, parameters['paths']['outputs']['output_minim_file'])
traj_file               = os.path.join(sim_dir, parameters['paths']['outputs']['output_prod_file'])

#show the trajectory from PDB with nglview
from easyMD.utils import show_traj
from pathlib import Path

show_traj(traj_topology_file, traj_file, size=(600, 600))