# Prepare a simulation sample 

To run this notebook create a conda environment 
```bash
conda create -n mlfftutorial -c conda-forge
conda activate mlfftutorial
conda install -c conda-forge openmm openmmforcefields nglview
pip install mond
```
Soon there will be a plain pip version as well. But for now this has to suffice. 

As a first step, we are packing a simulation sample using a Monte-Carlo docking scheme based on van-der-Waals radii of the atoms to avoid clashes

In [1]:
## Packing a Simulation Sample 

import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem

from mond.utils import check_overlaps_vectorized, create_molecule_from_smiles, random_pose, load_rdmol_from_xyz, get_coordinates, get_vdw_radii
from mond.molecule.molecule import Molecule, MoleculeCollection
from copy import deepcopy
from mond.packing import pack_molecules_in_box, pack_molecules_to_existing_collection
from mond.utils import get_splitted_collection_from_joined_smiles_mol
from mond.openmm import simulate_sample

bounding_box = [10,10,10] #in angstrom for openmm
probs = [0.5, 0.5]
max_molecules = 500 # Choose any number. Is set such that the packing terminates reasonably
max_tries = 100000 # considerable time factor
safety_distance = -0.3 #additional buffer in angstrom, can also be used to increase the density
smiles_1 = "O" #need to start from smiles
smiles_2 = "CCO"

molec1 = create_molecule_from_smiles(smiles=smiles_1)
molec2 = create_molecule_from_smiles(smiles=smiles_2)
molecules_in_mixture = [molec1, molec2]

coll = pack_molecules_in_box(
    molecules_in_mixture=molecules_in_mixture,
    probs=probs, 
    bounding_box=bounding_box, 
    safety_distance=safety_distance,
    max_molecules=max_molecules,
    max_tries=max_tries)

sdf_file = f"packed_sample_iteration_1.sdf"
coll.save_to_sdf(sdf_file)
pdb_file = f"packed_sample_iteration_1.pdb"
coll.save_to_pdb(pdb_file)

1 molecules added
2 molecules added
3 molecules added
4 molecules added
5 molecules added
6 molecules added
7 molecules added
8 molecules added
9 molecules added
10 molecules added
11 molecules added
12 molecules added
13 molecules added
14 molecules added
15 molecules added
16 molecules added
17 molecules added
18 molecules added
19 molecules added


## Making the packing more dense

To obtain a more dense sample, we simulate a MD simulation using OpenMM for 5000 steps and then keep adding molecules using the MC algorithm based on atomic radii. 

In [6]:
from mond.openmm import simulate_with_small_cutoff_LJPME
from mond.utils import get_splitted_collection_from_joined_smiles_mol
from mond.packing import pack_molecules_in_box, pack_molecules_to_existing_collection

bounding_box_simulation = [1,1,1] #nm
bounding_box_packing = [10,10,10] #Angstrom
cutoff=0.4 #nm
simulation_steps = 5000
report_every_steps = 10
print_report_every_steps = 1000
temperature=300
velocity_init_temperature=300
time_step = 0.002 #picoseconds
minimization_steps = 1000
force_field = "amber/protein.ff14SB.xml"
water_model = "amber/tip3p_standard.xml"

counter = 1
keep_adding = True
while keep_adding: 
    print(f"=============Iteration {counter}=============")
    sdf_file = f"packed_sample_iteration_{counter}.sdf"
    out_file_state=f"simulated_iteration_{counter}.pdb"
    out_traj_name =f"simulation_step_{counter}"
    print("=============MD Simulation=============")
    simulate_with_small_cutoff_LJPME(
        sdf_file=sdf_file,
        out_file_state=out_file_state,
        out_traj_name=out_traj_name, 
        box_vectors=bounding_box_simulation,
        simulation_steps=simulation_steps, 
        report_every_steps=report_every_steps, 
        cutoff=cutoff,
        print_report_every_steps=print_report_every_steps, 
        temperature = temperature, 
        velocity_init_temperature=velocity_init_temperature, 
        timestep = time_step,
        minimization_steps= minimization_steps,
        force_field = force_field,
        water_model= water_model,
    )
    counter += 1
    mol = Chem.MolFromPDBFile(out_file_state, removeHs=False)
    coords = get_coordinates(mol)
    radii = get_vdw_radii(mol)
    splitted_coll = get_splitted_collection_from_joined_smiles_mol(mol, coords, radii)
    print("=============MC Simulation=============")
    coll, molecules_added = pack_molecules_to_existing_collection(
        coll = splitted_coll, 
        molecules_in_mixture=molecules_in_mixture,
        probs=probs, 
        bounding_box=bounding_box, 
        safety_distance=safety_distance,
        max_molecules=max_molecules,
        max_tries=max_tries)
    
    sdf_file = f"packed_sample_iteration_{counter}.sdf"
    coll.save_to_sdf(sdf_file)
    if molecules_added == 0: 
        keep_adding = False

/home/julian/miniforge3/envs/mlfftutorial/lib/python3.11/site-packages/openforcefields/offxml/openff-2.1.0.offxml
Minimizing energy...
Running simulation...
#"Step","Potential Energy (kJ/mole)","Temperature (K)","Speed (ns/day)"
1000,-663.6423753407976,278.4001854003637,0
2000,-654.8543755560917,348.431947239249,60.8
3000,-660.3324704203011,293.70239670573113,61.5
4000,-690.7929139729529,304.1942629923053,61.5
5000,-684.6985134268429,318.96359072259526,61.7
0 molecules added
1 molecules added
2 molecules added
3 molecules added
/home/julian/miniforge3/envs/mlfftutorial/lib/python3.11/site-packages/openforcefields/offxml/openff-2.1.0.offxml
Minimizing energy...
Running simulation...
#"Step","Potential Energy (kJ/mole)","Temperature (K)","Speed (ns/day)"
1000,-859.9706469289237,334.63058066820497,0
2000,-899.2848013376333,352.441931893325,60.3
3000,-888.0252835014778,281.2546300921627,62.2
4000,-912.2568479971087,271.579484595423,61.9
5000,-917.2325825483173,285.2087124092735,62
0 molecu

## Visualize Sample 

Lets visualize the simulation trajectory of our dense sample using `nglview`

In [7]:
import nglview as nv
import mdtraj as md

# Load PDB as topology and DCD as trajectory
traj = md.load("simulation_step_3.dcd", top="simulation_step_3.pdb")
view = nv.show_mdtraj(traj)
view

NGLWidget(max_frame=499)