# Cascade: serial prototype implementation

Here we use some of the classes we've written to create a serial prototype run of cascade

This is the minimum viable run, intended to inform upcoming design decisions before distributed runs.

No science is done here. 

In [1]:
from glob import glob
from pathlib import Path
from dataclasses import dataclass, field


import ase
from ase.io import read, write
from ase.io.trajectory import Trajectory, TrajectoryWriter
from ase import units
from ase.md import MDLogger, VelocityVerlet
import numpy as np
from mace.calculators import mace_mp


from cascade.utils import canonicalize, apply_calculator
from cascade.auditor import RandomAuditor
from cascade.learning.torchani import TorchANI
from cascade.learning.torchani.build import make_output_nets, make_aev_computer

  _Jd, _W3j_flat, _W3j_indices = torch.load(os.path.join(os.path.dirname(__file__), 'constants.pt'))
  from torch.distributed.optim import ZeroRedundancyOptimizer


## Read in structure
We'll do these simulations on a Si 2x2x2 with a vacancy

In [2]:
atoms = read('../0_setup/initial-geometries/si-vacancy-2x2x2.vasp')

## Set up calculator

We'll use a small MACE model as our *target*.   
That is to say, MACE is our ground truth physics.   
(We want fast for this prototype)

In [3]:
device = 'cuda:0'
calc = mace_mp('small')

Using Materials Project MACE for MACECalculator with /home/mike/.cache/mace/20231210mace128L0_energy_epoch249model
Using float32 for MACECalculator, which is faster but less accurate. Recommended for MD. Use float64 for geometry optimization.
Default dtype float32 does not match model dtype float64, converting models to float32.


  return torch._C._cuda_getDeviceCount() > 0
  torch.load(f=model_path, map_location=device)


## Set up learner

We'll fit two ANI models to MACE

In [4]:
learner = TorchANI()

In [5]:
species = list(set(atoms.symbols))
aev = make_aev_computer(species)

model = aev, make_output_nets(species, aev), dict((s, 0.) for s in species)

## Class for trajectories

In [6]:
class CascadeTrajectory:
    """A class to encasplulate a cascade trajectory

    This is useful for reading and auditing trajectories
    so we know where to start sampling from (e.g., after the last trusted timestep)
    """


    def __init__(self, 
                 path: str, 
                 starting: ase.Atoms = None):
        self.path = path
        self.starting = starting
       
        if self.starting is not None:
                write(self.path, self.starting)
        else:
            self.starting = read(self.path)
        
        self.current = starting
        self.current_timestep = 0
        self.last_trusted_timestep = 0
    
    def read(self, index=':', *args, **kwargs) -> list[ase.Atoms]:
        """Read the trajectory into an iterable of atoms"""
        return read(self.path, *args, index=index, **kwargs)

    def get_untrusted_segment(self) -> list[ase.Atoms]:
        """Return the part of the trajectory that needs to be audited"""
        return read(self.path, index=f'{self.last_trusted_timestep+1}:')
    
    def trim_untrusted_segment(self):
        """Remove the part of a trajectory that failed an audit, updating timesteps as appropriate"""
        # todo: is there a way to do this without loading into memory?
        write(self.path, read(self.path, index=f':{self.last_trusted_timestep+1}'))
        self.current_timestep = self.last_trusted_timestep

    def __repr__(self): 
        return f"CascadeTrajectory(path={self.path}, current_timestep={self.current_timestep}, last_trusted_timestep={self.last_trusted_timestep})"
        

### tests 

#### Todo: (these should go in a test suite if we're keeping this), update the coords or something to make sure the right things are getting deleted

In [7]:
write('test.traj', [atoms, atoms.copy()])

In [8]:
traj = CascadeTrajectory('test.traj')

In [9]:
traj.read()

[Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86]),
 Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86])]

In [10]:
traj.get_untrusted_segment()

[Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86])]

In [11]:
read('test.traj', index=':1')

[Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86])]

In [12]:
traj.trim_untrusted_segment()

In [13]:
traj.read()

[Atoms(symbols='Si63', pbc=True, cell=[10.86, 10.86, 10.86])]

## Minimum viable cascasde loop

In [14]:

# create two cascasde trajectories from the same starating point but with different seeds
seeds = [0, 1]
trajectories = [CascadeTrajectory(path=f'si-diffusion-seed={s}.traj', 
                                  starting=atoms.copy()) for s in seeds]
# notably, right now, the seeds have no effect since our dynamics are NVE


total_steps = 128    # how long will our final trajectories be
increment_steps = 64 # how many steps to run with ML at a time

# audits are random
auditor = RandomAuditor(random_state=42)
threshold = 0.5 # this is the 'score' threshold on the auditor

done = False
i = 0 # track while loop iterations
max_iter = 10 # dont go above this
while not done:
    
    done_ctr = 0 # count how many trajectories are done
    
    for j, traj in enumerate(trajectories):
        
        ## Check if this trajectory is done
        print(f'On traj {j+1}/{len(trajectories)}')
        if traj.last_trusted_timestep == total_steps: 
            done_ctr += 1
            print('Traj is completed, continuing')
            continue

        
        ## if we've advanced past a trusted segment, lets audit it
        if traj.current_timestep > traj.last_trusted_timestep: 
            print('Auditing trajectory')
            segment = traj.get_untrusted_segment()
            score, audit_frames = auditor.audit(segment, n_audits=32)
            if score > threshold: 
                print(f'score > threshold ({score} > {threshold}), running audit calculations and dropping untrusted segment')
                segment = apply_calculator(calc, segment)
                traj.trim_untrusted_segment()
            else:
                print(f'score < threshold ({score} < {threshold}, marking recent segment as trusted')
                traj.last_trusted_timestep = traj.current_timestep

        
        # otherwise we can run the ML-driven dynamics 
        else:
            # then we run dynamics
            print('Running ML-driven dynamics')
            traj.current.calc = learner.make_calculator(model, device='cpu')
            dyn = VelocityVerlet(atoms=traj.current,
                                 timestep=1*units.fs,
                                 trajectory=TrajectoryWriter(traj.path, mode='a')
                                )
            dyn.run(increment_steps)
            traj.current_timestep += increment_steps
        print(traj)
        
    i += 1
    print(f'done {done_ctr} / {len(trajectories)}')
    done = done_ctr == len(trajectories) or i == max_iter

  self_energies = torch.tensor(self_energies, dtype=torch.double)


On traj 1/2
Running ML-driven dynamics
CascadeTrajectory(path=si-diffusion-seed=0.traj, current_timestep=64, last_trusted_timestep=0)
On traj 2/2
Running ML-driven dynamics
CascadeTrajectory(path=si-diffusion-seed=1.traj, current_timestep=64, last_trusted_timestep=0)
done 0 / 2
On traj 1/2
Auditing trajectory
score < threshold (0.3745401188473625 < 0.5, marking recent segment as trusted
CascadeTrajectory(path=si-diffusion-seed=0.traj, current_timestep=64, last_trusted_timestep=64)
On traj 2/2
Auditing trajectory
score < threshold (0.034388521115218396 < 0.5, marking recent segment as trusted
CascadeTrajectory(path=si-diffusion-seed=1.traj, current_timestep=64, last_trusted_timestep=64)
done 0 / 2
On traj 1/2
Running ML-driven dynamics
CascadeTrajectory(path=si-diffusion-seed=0.traj, current_timestep=128, last_trusted_timestep=64)
On traj 2/2
Running ML-driven dynamics
CascadeTrajectory(path=si-diffusion-seed=1.traj, current_timestep=128, last_trusted_timestep=64)
done 0 / 2
On traj 1/2

## did those complete? 

In [15]:
[len(t.read()) for t in trajectories]

[129, 129]

Seems done enough for now