# Run ML trajectories

We want to run a trajectory with the mace potential and see if the uncertainty spikes when we go off course

In [1]:
import numpy as np
from scipy.stats import pearsonr
from pathlib import Path
import os
from glob import glob
from collections import defaultdict

%matplotlib inline
from matplotlib import pyplot as plt
from pathlib import Path
from ase.io import read
from ase import units
from ase.md.npt import NPT
from tqdm.auto import tqdm

from mace.calculators.foundations_models import mace_mp
from mace.calculators import MACECalculator

  from .autonotebook import tqdm as notebook_tqdm


### Configuration

In [2]:
infer_device: str = 'cuda:0'
temperature = 298
steps: int = 512

### get ensemble members from directory

In [3]:
model_files = os.listdir('ensemble/')
model_files = [Path('ensemble')/f for f in model_files]

MACECalculator can take multiple models in a list

In [4]:
model_files

[PosixPath('ensemble/model_2.pt'),
 PosixPath('ensemble/model_3.pt'),
 PosixPath('ensemble/model_0.pt'),
 PosixPath('ensemble/model_1.pt')]

In [5]:
calculators = {
    'mace_small_pretrained': mace_mp('small', device=infer_device),
    'mace_small_finetuned': MACECalculator(model_paths=model_files, device=infer_device)
}

Using Materials Project MACE for MACECalculator with /home/mike/.cache/mace/46jrkm3v
Using float32 for MACECalculator, which is faster but less accurate. Recommended for MD. Use float64 for geometry optimization.
Default dtype float32 does not match model dtype float64, converting models to float32.
Running committee mace with 4 models
No dtype selected, switching to float32 to match model dtype.


## I wonder how this compares to the training trajectories? 

In [6]:
reference_trajectories = list(sorted(glob('../0_setup/md/**/md.traj')))
reference_trajectories

['../0_setup/md/packmol-CH4-in-H2O=32-seed=0-blyp-npt=298/md.traj',
 '../0_setup/md/packmol-CH4-in-H2O=32-seed=1-blyp-npt=298/md.traj',
 '../0_setup/md/packmol-CH4-in-H2O=32-seed=2-blyp-npt=298/md.traj',
 '../0_setup/md/packmol-CH4-in-H2O=32-seed=3-blyp-npt=298/md.traj']

First run ML on the initial (training) geometries

In [7]:
initial_geometries = list(sorted(glob('../0_setup/md/**/initial_cfg.traj')))

In [8]:
initial_geometries

['../0_setup/md/packmol-CH4-in-H2O=32-seed=0-blyp-npt=298/initial_cfg.traj',
 '../0_setup/md/packmol-CH4-in-H2O=32-seed=1-blyp-npt=298/initial_cfg.traj',
 '../0_setup/md/packmol-CH4-in-H2O=32-seed=2-blyp-npt=298/initial_cfg.traj',
 '../0_setup/md/packmol-CH4-in-H2O=32-seed=3-blyp-npt=298/initial_cfg.traj']

In [27]:
for initial_geometry in tqdm(initial_geometries, 'geometries', total=4): 
    for calc_name, calc in calculators.items():
        name = f'{Path(initial_geometry).parent.name[:-5]}-npt={temperature}-calc={calc_name}'
        print(name)
        run_dir = Path('md') / name
        run_dir.mkdir(exist_ok=True, parents=True)
        traj_file = run_dir / 'md.traj'
        if traj_file.is_file() and traj_file.stat().st_size > 0:
            traj = read(str(traj_file), slice(None))
            start = len(traj)
            atoms = traj[-1]
            print('Loaded last structure')
        else:
            atoms = read(initial_geometry)
            start = 0
        atoms.calc = calc
        dyn = NPT(atoms,
          timestep=0.5 * units.fs,
          temperature_K=temperature,
          ttime=100 * units.fs,
          pfactor=0.01,
          externalstress=0,
          logfile=str(run_dir / 'md.log'),
          trajectory=str(traj_file),
          append_trajectory=False)
        dyn.run(512 - start)



packmol-CH4-in-H2O=32-seed=0-blyp-np-npt=298-calc=mace_small_pretrained




packmol-CH4-in-H2O=32-seed=0-blyp-np-npt=298-calc=mace_small_finetuned




packmol-CH4-in-H2O=32-seed=1-blyp-np-npt=298-calc=mace_small_pretrained




packmol-CH4-in-H2O=32-seed=1-blyp-np-npt=298-calc=mace_small_finetuned




packmol-CH4-in-H2O=32-seed=2-blyp-np-npt=298-calc=mace_small_pretrained




packmol-CH4-in-H2O=32-seed=2-blyp-np-npt=298-calc=mace_small_finetuned




packmol-CH4-in-H2O=32-seed=3-blyp-np-npt=298-calc=mace_small_pretrained




packmol-CH4-in-H2O=32-seed=3-blyp-np-npt=298-calc=mace_small_finetuned


geometries: 100%|██████████████████████████████████████████████| 4/4 [08:33<00:00, 128.40s/it]


## Compute individual model trajectories

In [29]:
for initial_geometry in tqdm(initial_geometries, 'geometries', total=4): 
    for model_ix in tqdm(range(4), 'ensemble members'):
        calc = MACECalculator(f'ensemble/model_{model_ix}.pt', device='cuda:0')
        name = f'{Path(initial_geometry).parent.name[:-5]}-npt={temperature}-calc=mace_small_finetuned-ensemble_ix={model_ix}'
        print(name)
        run_dir = Path('md') / name
        run_dir.mkdir(exist_ok=True, parents=True)
        traj_file = run_dir / 'md.traj'
        if traj_file.is_file() and traj_file.stat().st_size > 0:
            traj = read(str(traj_file), slice(None))
            start = len(traj)
            atoms = traj[-1]
            print('Loaded last structure')
        else:
            atoms = read(initial_geometry)
            start = 0
        start = 0
        atoms.calc = calc
        dyn = NPT(atoms,
          timestep=0.5 * units.fs,
          temperature_K=temperature,
          ttime=100 * units.fs,
          pfactor=0.01,
          externalstress=0,
          logfile=str(run_dir / 'md.log'),
          trajectory=str(traj_file),
          append_trajectory=False)
        dyn.run(512 - start)

geometries:   0%|                                                       | 0/4 [00:00<?, ?it/s]


No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=0-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=0





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=0-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=1





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=0-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=2





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=0-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=3



ensemble members: 100%|█████████████████████████████████████████| 4/4 [01:41<00:00, 25.38s/it][A
geometries:  25%|███████████▌                                  | 1/4 [01:41<05:04, 101.51s/it]


No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=1-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=0





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=1-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=1





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=1-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=2





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=1-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=3



ensemble members: 100%|█████████████████████████████████████████| 4/4 [01:49<00:00, 27.31s/it][A
geometries:  50%|███████████████████████                       | 2/4 [03:30<03:32, 106.06s/it]


No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=2-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=0





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=2-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=1





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=2-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=2





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=2-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=3



ensemble members: 100%|█████████████████████████████████████████| 4/4 [01:45<00:00, 26.41s/it][A
geometries:  75%|██████████████████████████████████▌           | 3/4 [05:16<01:45, 105.87s/it]


No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=3-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=0





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=3-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=1





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=3-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=2





No dtype selected, switching to float32 to match model dtype.
packmol-CH4-in-H2O=32-seed=3-blyp-np-npt=298-calc=mace_small_finetuned-ensemble_ix=3



ensemble members: 100%|█████████████████████████████████████████| 4/4 [01:43<00:00, 25.97s/it][A
geometries: 100%|██████████████████████████████████████████████| 4/4 [07:00<00:00, 105.07s/it]


## compute UQ

This is slowed down a lot by device IO. There's really no need to pass back and forth with GPU. Ideally we'd have a way of saving forces_comm during the generation of the trajectory, but I dont think we have that...

In [10]:
for initial_geometry in tqdm(initial_geometries, 'geometries', total=4): 
    name = f'{Path(initial_geometry).parent.name[:-5]}-npt={temperature}-calc=mace_small_finetuned'
    run_dir = Path('md') / name
    traj_file = run_dir / 'md.traj'
    traj = read(traj_file, index=':')
    forces_comm_traj = np.zeros((len(traj), 4, 101, 3))
    for t, atoms in tqdm(enumerate(traj), 'timestep'): 
        calc = calculators['mace_small_finetuned']
        calc.calculate(atoms)
        forces_comm = calc.get_property('forces_comm')
        forces_comm_traj[t] = forces_comm
    filename = run_dir / 'forces_comm.npz'
    np.savez(filename, forces_comm_traj)

geometries:   0%|                                                       | 0/4 [00:00<?, ?it/s]
timestep: 0it [00:00, ?it/s][A
timestep: 1it [00:01,  1.02s/it][A
timestep: 2it [00:01,  1.29it/s][A
timestep: 3it [00:03,  1.16s/it][A
timestep: 4it [00:03,  1.27it/s][A
timestep: 5it [00:03,  1.75it/s][A
timestep: 6it [00:03,  2.28it/s][A
timestep: 7it [00:04,  2.81it/s][A
timestep: 8it [00:04,  3.32it/s][A
timestep: 9it [00:04,  3.74it/s][A
timestep: 10it [00:04,  4.12it/s][A
timestep: 11it [00:04,  4.43it/s][A
timestep: 12it [00:04,  4.66it/s][A
timestep: 13it [00:05,  4.84it/s][A
timestep: 14it [00:05,  4.96it/s][A
timestep: 15it [00:05,  5.06it/s][A
timestep: 16it [00:05,  5.12it/s][A
timestep: 17it [00:05,  5.20it/s][A
timestep: 18it [00:06,  5.27it/s][A
timestep: 19it [00:06,  3.38it/s][A
timestep: 20it [00:07,  2.55it/s][A
timestep: 21it [00:07,  2.95it/s][A
timestep: 22it [00:07,  3.41it/s][A
timestep: 23it [00:07,  3.83it/s][A
timestep: 24it [00:08,  4.20it/