# Force field example

## low level interface

To show how the components of NFFLr work together, let's train a formation energy model using the `mlearn` dataset.
We can use the `periodic_radius_graph` transform to configure the `AtomsDataset` to automatically transform atomic configurations into `DGLGraph`s.

In [5]:
import nfflr

transform = nfflr.nn.PeriodicRadiusGraph(cutoff=5.0)

dataset = nfflr.AtomsDataset(
    "mlearn", 
    target="energy_and_forces", 
    transform=transform,
)
dataset[0]

dataset_name='mlearn'
Obtaining mlearn dataset 1730...
Reference:https://github.com/materialsvirtuallab/mlearn
Loading the zipfile...
Loading completed.


(Graph(num_nodes=107, num_edges=11342,
       ndata_schemes={'coord': Scheme(shape=(3,), dtype=torch.float32), 'atomic_number': Scheme(shape=(), dtype=torch.int32)}
       edata_schemes={'r': Scheme(shape=(3,), dtype=torch.float32)}),
 {'energy': tensor(-64656.0625),
  'forces': tensor([[-1.9282e-01, -1.8793e+00, -6.6374e-01],
          [-8.2543e-03, -2.0313e-01,  3.6808e-01],
          [-5.5372e-01, -1.4736e+00,  1.2997e+00],
          [ 4.5678e-01,  5.1175e-01, -1.0934e+00],
          [-1.6499e+00, -1.6259e+00,  4.5255e-01],
          [-1.6698e-01,  6.8080e-01,  6.7749e-01],
          [ 3.6802e-02, -3.1423e+00, -2.0166e+00],
          [-1.0730e-01, -3.5780e-01,  1.1357e+00],
          [-1.9132e-01,  5.1381e-01,  3.4296e-01],
          [ 2.0090e+00,  1.5143e+00, -3.5578e-01],
          [-1.7128e-01, -2.7808e+00, -1.4215e+00],
          [-9.3987e-01, -1.6757e-02,  7.9322e-01],
          [ 3.7190e-01, -9.0627e-01, -5.2933e-01],
          [ 5.6458e-01, -9.6833e-01, -7.0043e-01],
        

Set up a medium-sized ALIGNN model:

In [12]:
from nfflr.models.gnn import alignn

cfg = nfflr.models.ALIGNNConfig(
    transform=transform,
    cutoff=nfflr.nn.XPLOR(4.5, 5.0),
    alignn_layers=1, 
    gcn_layers=2, 
    embedding_features=16,
    edge_input_features=16,
    triplet_input_features=16,
    hidden_features=32,
    norm="layernorm", 
    atom_features="embedding",
    compute_forces=True,
)
model = nfflr.models.ALIGNN(cfg)

atoms, target = dataset[0]
model(atoms)

  assert input.numel() == input.storage().size(), (


{'total_energy': tensor(0.5054, grad_fn=<SqueezeBackward0>),
 'forces': tensor([[-1.9327e-08, -2.0827e-08, -8.1269e-09],
         [ 4.9051e-09, -5.9333e-09, -1.5369e-09],
         [ 6.6050e-09,  2.4065e-09, -6.7521e-09],
         [-8.3214e-09,  2.7671e-09,  3.6863e-10],
         [ 7.0100e-09,  1.4210e-09, -2.9856e-09],
         [-1.0840e-08, -5.3169e-09,  1.0745e-08],
         [-1.7739e-09,  3.0995e-10, -5.1546e-10],
         [ 3.0281e-08,  8.8250e-09,  6.9771e-09],
         [-6.3300e-09,  1.7157e-08, -2.0408e-08],
         [-1.0554e-08,  2.5152e-08, -1.1257e-08],
         [ 4.7125e-10, -1.4849e-09, -4.9611e-10],
         [-1.2017e-10, -1.6895e-09, -6.7966e-10],
         [ 7.0346e-10, -2.4877e-09, -1.4374e-09],
         [-5.4707e-09, -7.9745e-09,  3.6146e-09],
         [-1.2301e-08,  9.5225e-09,  3.1206e-08],
         [ 1.9118e-09, -2.1600e-09, -2.0075e-09],
         [ 1.0441e-08, -1.1281e-08, -1.0463e-08],
         [-6.2744e-09, -2.1852e-08,  1.9808e-08],
         [ 6.9088e-10, -1.829

In [13]:
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader, SubsetRandomSampler

batchsize = 2

train_loader = DataLoader(
    dataset,
    batch_size=batchsize, 
    collate_fn=dataset.collate, 
    sampler=SubsetRandomSampler(dataset.split["train"]),
    drop_last=True
)
next(iter(train_loader))

(Graph(num_nodes=162, num_edges=14418,
       ndata_schemes={'coord': Scheme(shape=(3,), dtype=torch.float32), 'atomic_number': Scheme(shape=(), dtype=torch.int32)}
       edata_schemes={'r': Scheme(shape=(3,), dtype=torch.float32)}),
 {'total_energy': tensor([-66990.6328, -31633.8477]),
  'n_atoms': tensor([108,  54]),
  'forces': tensor([[ 7.4976e-01,  5.2112e-02, -4.4867e-01],
          [ 9.8866e-01,  1.5451e-01, -1.3315e+00],
          [-1.0298e-02,  6.0375e-02, -1.7932e-02],
          [-1.1754e+00, -5.7771e-01,  2.6522e-01],
          [-3.7542e-01,  1.9282e-01, -1.1827e-02],
          [ 6.3840e-01, -4.9131e-01, -2.1017e-01],
          [-2.5444e-01, -1.5675e-01, -1.9500e-02],
          [-6.2646e-02, -2.7115e-01,  3.8057e-01],
          [-6.9415e-02, -6.5498e-01,  6.0193e-01],
          [-5.1347e-01, -5.5116e-01, -1.1137e-01],
          [-2.1793e-01, -6.4830e-02,  4.3753e-02],
          [-3.7676e-02,  7.0439e-01, -5.6492e-01],
          [-9.4828e-01,  8.3889e-02, -5.9573e-01],
     

Now we can set up a PyTorch optimizer and objective function and optimize the model parameters with an explicit training loop. See the [PyTorch quickstart tutorial for more context)[https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html].

For force field training, we use a custom loss function since the output of the model is structured:

In [15]:
from tqdm import tqdm

criteria = {"total_energy": nn.MSELoss(), "forces": nn.HuberLoss(delta=0.1)}

def ff_criterion(outputs, targets):
    """Specify combined energy and force loss."""

    n_atoms = targets["n_atoms"]

    # scale loss by crystal size
    energy_loss = criteria["total_energy"](
        outputs["total_energy"] / n_atoms, targets["total_energy"] / n_atoms
    )

    # # scale the forces before the loss
    force_scale = 1.0
    force_loss = criteria["forces"](outputs["forces"], targets["forces"])

    return energy_loss + force_scale * force_loss

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.1)

training_loss = []
for epoch in range(5):
    for step, (g, y) in enumerate(tqdm(train_loader)):
        pred = model(g)
        loss = ff_criterion(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        training_loss.append(loss.item())

  assert input.numel() == input.storage().size(), (
  3%|▎         | 19/690 [00:37<22:18,  1.99s/it]


KeyboardInterrupt: 

# using the ignite-based NFFLr trainer

In [16]:
import tempfile
from nfflr import train

In [17]:
rank = 0
training_config = {
    "dataset": dataset,
    "model": model,
    "optimizer": optimizer,
    "criterion": ff_criterion,
    "random_seed": 42,
    "batch_size": 2,
    "learning_rate": 1e-3,
    "weight_decay": 0.1,
    "epochs": 5,
    "warmup_steps": 100,
    "num_workers": 0,
    "progress": True,
    "output_dir": tempfile.TemporaryDirectory().name
}
train.run_train(rank, training_config)

2024-01-24 15:14:13,264 ignite.distributed.auto.auto_dataloader INFO: Use data loader kwargs for dataset '<nfflr.data.dataset.': 
	{'collate_fn': <function AtomsDataset.collate_forcefield at 0x2a3b2e4d0>, 'batch_size': 2, 'sampler': <torch.utils.data.sampler.SubsetRandomSampler object at 0x107f29510>, 'drop_last': True, 'num_workers': 0, 'pin_memory': False}
2024-01-24 15:14:13,264 ignite.distributed.auto.auto_dataloader INFO: Use data loader kwargs for dataset '<nfflr.data.dataset.': 
	{'collate_fn': <function AtomsDataset.collate_forcefield at 0x2a3b2e4d0>, 'batch_size': 2, 'sampler': <torch.utils.data.sampler.SubsetRandomSampler object at 0x2aadad4b0>, 'drop_last': True, 'num_workers': 0, 'pin_memory': False}
  assert input.numel() == input.storage().size(), (


starting training loop


[1/690]   0%|           [00:00<?]

ERROR:ignite.engine.engine.Engine:Engine run is terminating due to exception: 


KeyboardInterrupt: 