# Tutorial 2 - Generating a dataset for our MLFF 

In this tutorial we will generate a simple MeOH trajectory to fit a MLFF to it. The goal is to find out what kind of algorithm can work on this simple test case. 

## Installation and setup of the environment 

```
conda create -n mlfftutorial2 psi4 -c conda-forge/label/libint_dev -c conda-forge
conda activate mlfftutorial2
conda install -c conda-forge ase openmm openmmforcefields
pip install mond
```

In [None]:
import numpy as np 
from amarium import check_make_dir

from ase.md.velocitydistribution import MaxwellBoltzmannDistribution, Stationary

from tqdm import tqdm

from mond.molecule import Molecule, MoleculeCollection
from mond.utils import create_molecule_from_smiles, random_pose
from mond.simulation.aimd import init_traj, append_to_traj, initialize_velocities, get_energy_forces_psi4, velocity_verlet_step
from mond.utils import get_atomic_masses

bounding_box = [10, 10, 10] # In angstrom

mol_smiles = "CO"
molec = create_molecule_from_smiles(mol_smiles)
new_coords = random_pose(molec.coordinates, bounding_box)
molec.set_mol_conf_coordinates(new_coords)
coll = MoleculeCollection([molec])

memory="22GB"
method="PBE"
basis ="def2-SVP"
scf_type="df"
maxiter=300

#md
traj_file="methanol_aimd_traj.xyz"
box_lengths = np.array(bounding_box)
atom_symbols = coll.get_atom_symbols()
masses = get_atomic_masses(atom_symbols)
dt = 0.05 #fs
temp_init = 298.5
temp_thermostat =0
periodic_boundary = False
num_steps = 10000 
positions = coll.get_atom_coords_list()
print("Initial positions")
print(positions)
velocities = initialize_velocities(atom_masses=masses, temp=temp_init, remove_drift=True)
new_forces, e_pot = get_energy_forces_psi4(
    coll=coll,
    positions=coll.get_atom_coords_list(),
    method=method,
    basis=basis,
    scf_type=scf_type,
    memory=memory, 
    maxiter=maxiter)
# initialize trajectory 
init_traj(traj_file)
#one step 
check_make_dir("methanol_aimd_data")
for i in tqdm(range(num_steps), desc="Running AIMD"): 
    
    positions, velocities, new_forces, potential_energy = velocity_verlet_step(
        coll=coll, 
        method_psi4=method, 
        memory_psi4=memory, 
        basis_psi4=basis,
        scf_type_psi4=scf_type,
        maxiter_psi4=maxiter, 
        positions=positions, 
        velocities=velocities, 
        forces=new_forces, 
        masses=masses, 
        box_lengths=box_lengths, 
        dt=dt, 
        periodic_boundary=periodic_boundary
    )
    append_to_traj(
        traj_file = traj_file, 
        positions = positions.tolist(), 
        symbols=atom_symbols
    )
    np.savez(f"methanol_aimd_data/step_{i}.npz",
         positions=positions,
         forces=new_forces,
         atom_types = coll.get_atom_numbers(),
         e_pot=potential_energy)