In [1]:
%pip install -U "airavata-python-sdk[notebook]"
%pip install -U ipywidgets

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import airavata_jupyter_magic

%authenticate
%request_runtime hpc_gpu --file=cybershuttle.yml --walltime=60 --use=NeuroData25VC2:cloud
%wait_for_runtime hpc_gpu
%switch_runtime hpc_gpu
# %copy_data -f source=local:examples/water-thermodynamics/data.zip target=hpc_gpu:data.zip


Loaded airavata_jupyter_magic (2.0.16) 
(current runtime = local)

  %authenticate                              -- Authenticate to access high-performance runtimes.
  %request_runtime <rt> [args]               -- Request a runtime named <rt> with configuration <args>.
                                                Call multiple times to request multiple runtimes.
  %restart_runtime <rt>                      -- Restart runtime <rt> if it hangs. This will clear all variables.
  %stop_runtime <rt>                         -- Stop runtime <rt> when no longer needed.
  %wait_for_runtime <rt>                     -- Wait for runtime <rt> to be ready.
  %switch_runtime <rt>                       -- Switch the active runtime to <rt>. All subsequent cells will run here.
  %%run_on <rt>                              -- Force a cell to always execute on <rt>, regardless of the active runtime.
  %stat_runtime <rt>                         -- Show the status of runtime <rt>.
  %copy_data source=<r1:f

Output()

Requesting runtime=hpc_gpu...
[NeuroData25VC2:cloud, 60 Minutes, 1 Node(s), 1 CPU(s), 0 GPU(s), 16000 MB RAM, 1024 MB VRAM]
* modules=[]
* libraries=['python=3.10', 'pip', 'pytorch', 'pytorch-lightning', 'ase', 'scikit-learn', 'torchmetrics', 'numpy', 'wandb', 'tensorboard', 'tensorboardX', 'tqdm', 'rich', 'mattersim', 'fairchem-core']
* pip=['git+https://github.com/cyber-shuttle/mattertune.git']
* mounts=[]
Requested runtime=hpc_gpu


Output()

Switched to runtime=hpc_gpu.


In [3]:
! nvidia-smi

[2K[32m⠸[0m Connecting to=hpc_gpu... status=CONNECTED
[1A[2KSat May  3 23:13:43 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.230.02             Driver Version: 535.230.02   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  GRID A100X-10C                 On  | 00000000:04:00.0 Off |                    0 |
| N/A   N/A    P0              N/A /  N/A |   6156MiB / 10240MiB |     77%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
   

In [None]:
! wget https://raw.githubusercontent.com/cyber-shuttle/MatterTune/refs/heads/main/examples/water-thermodynamics/data/train_water_1000_eVAng.xyz -O train_water_1000_eVAng.xyz
! wget https://raw.githubusercontent.com/cyber-shuttle/MatterTune/refs/heads/main/examples/water-thermodynamics/data/val_water_1000_eVAng.xyz -O val_water_1000_eVAng.xyz
! wget https://raw.githubusercontent.com/cyber-shuttle/MatterTune/refs/heads/main/examples/water-thermodynamics/data/H2O.xyz -O H2O.xyz
! wget https://raw.githubusercontent.com/cyber-shuttle/MatterTune/refs/heads/main/examples/water-thermodynamics/data/water_1000_eVAng-energy_reference.json -O water_1000_eVAng-energy_reference.json

In [None]:
from __future__ import annotations

import logging
from pathlib import Path
import rich
import nshutils as nu

from ase.io import read
from ase import Atoms
import numpy as np
import torch
from tqdm import tqdm

import mattertune.configs as MC
from mattertune import MatterTuner
from mattertune.backbones import (
    MatterSimM3GNetBackboneModule,
    JMPBackboneModule,
    ORBBackboneModule,
    EqV2BackboneModule,
)

In [None]:
def fine_tune(args_dict: dict):
    """
    Fine-tune a Pre-trained Atomistic Foundation Model using MatterTuner.
    - args_dict: Dictionary containing hyperparameters and configurations for fine-tuning.
    """
    def hparams():
        hparams = MC.MatterTunerConfig.draft()
        
        ## Choose Backbone Model type
        if args_dict["model_type"] == "mattersim-1m":
            hparams.model = MC.MatterSimBackboneConfig.draft()
            hparams.model.graph_convertor = MC.MatterSimGraphConvertorConfig.draft()
            hparams.model.pretrained_model = "MatterSim-v1.0.0-1M"
        elif args_dict["model_type"] == "jmp-s":
            hparams.model = MC.JMPBackboneConfig.draft()
            hparams.model.graph_computer = MC.JMPGraphComputerConfig.draft()
            hparams.model.graph_computer.pbc = True
            hparams.model.pretrained_model = "jmp-s"
        elif "orb" in args_dict["model_type"]:
            hparams.model = MC.ORBBackboneConfig.draft()
            hparams.model.pretrained_model = args_dict["model_type"]
        elif args_dict["model_type"] == "eqv2":
            hparams.model = MC.EqV2BackboneConfig.draft()
            hparams.model.checkpoint_path = Path(
                "/net/csefiles/coc-fung-cluster/nima/shared/checkpoints/eqV2_31M_mp.pt"
            )
            hparams.model.atoms_to_graph = MC.FAIRChemAtomsToGraphSystemConfig.draft()
            hparams.model.atoms_to_graph.radius = 8.0
            hparams.model.atoms_to_graph.max_num_neighbors = 20
        else:
            raise ValueError(
                "Invalid model type, please choose from ['mattersim-1m', 'jmp-s', 'orb-v2']"
            )
        hparams.model.reset_output_heads = True
        
        ## Set Hyperparameters of optimizer and scheduler
        hparams.model.optimizer = MC.AdamWConfig(
            lr=args_dict["lr"],
            amsgrad=False,
            betas=(0.9, 0.95),
            eps=1.0e-8,
            weight_decay=0.1,
        )
        hparams.model.lr_scheduler = MC.ReduceOnPlateauConfig(
            mode="min",
            monitor=f"val/forces_mae",
            factor=0.8,
            patience=5,
            min_lr=1e-8,
        )
        hparams.trainer.ema = MC.EMAConfig(decay=0.99)

        # Add model properties, these are the properties to be predicted by the model
        hparams.model.properties = []
        energy_coefficient = 1.0 
        conservative = args_dict["conservative"] or "mattersim" in args_dict["model_type"]
        energy = MC.EnergyPropertyConfig(
            loss=MC.MSELossConfig(), loss_coefficient=energy_coefficient
        )
        hparams.model.properties.append(energy)
        forces = MC.ForcesPropertyConfig(
            loss=MC.MSELossConfig(), conservative=conservative, loss_coefficient=1.0
        )
        hparams.model.properties.append(forces)

        ## Set Data Module to load the dataset
        ## Here we downsampled 30 data points from the original training set because it's already enough
        hparams.data = MC.ManualSplitDataModuleConfig.draft()
        hparams.data.train = MC.XYZDatasetConfig.draft()
        hparams.data.train.src = "./train_water_1000_eVAng.xyz"
        hparams.data.train.down_sample = 30
        hparams.data.train.down_sample_refill = True ### Although we only used 30 samples, we repeate them to reach the original dataset size
        hparams.data.validation = MC.XYZDatasetConfig.draft()
        hparams.data.validation.src = "./val_water_1000_eVAng.xyz"
        hparams.data.batch_size = args_dict["batch_size"]

        ## Normalization usually helps with training stability and convergence
        ## Here we normalize the energy firstly by linear referencing and then divide by number of atoms
        hparams.model.normalizers = {
            "energy": [
                MC.PerAtomReferencingNormalizerConfig(
                    per_atom_references=Path("./water_1000_eVAng-energy_reference.json")
                ),
                MC.PerAtomNormalizerConfig(),
            ]
        }

        ## Trainer Hyperparameters, used to configure number of epochs, devices, etc.
        hparams.trainer = MC.TrainerConfig.draft()
        hparams.trainer.max_epochs = args_dict["max_epochs"]
        hparams.trainer.accelerator = "gpu"
        hparams.trainer.devices = args_dict["devices"]
        hparams.trainer.gradient_clip_algorithm = "norm"
        hparams.trainer.gradient_clip_val = 1.0
        hparams.trainer.precision = "32"

        ## Configure Early Stopping
        hparams.trainer.early_stopping = MC.EarlyStoppingConfig(
            monitor=f"val/forces_mae", patience=50, mode="min"
        )

        ## Configure Model Checkpoint
        hparams.trainer.checkpoint = MC.ModelCheckpointConfig(
            monitor="val/forces_mae",
            dirpath="./checkpoints",
            filename=f"{args_dict['model_type']}-best",
            save_top_k=1,
            mode="min",
        )

        ## Configure Logger
        hparams.trainer.loggers = [
            MC.TensorBoardLoggerConfig(
                save_dir="./logs",
                name=f"{args_dict['model_type']}-tune",
                version=0,
            ),
        ]

        ## Additional trainer settings
        ## Here since mattersim models are conservative, we set inference_mode to False to enable differentiable energy prediction
        hparams.trainer.additional_trainer_kwargs = {
            "inference_mode": False,
        }

        hparams = hparams.finalize(strict=False)
        return hparams

    mt_config = hparams()
    model, trainer = MatterTuner(mt_config).tune()
    

In [None]:
def inference(args_dict: dict):
    """
    Perform inference using the fine-tuned model on the validation dataset.
    Load the fine-tuned model from the checkpoint using .load_from_checkpoint function.
    Convert the loaded model to an ASE calculator using the `ase_calculator` method.
    Evaluate the model on the validation dataset and compute the MAE for energies and forces.
    """

    ckpt_path = f"./checkpoints/{args_dict['model_type']}-best.ckpt"
    if "mattersim" in args_dict["model_type"]:
        ft_model = MatterSimM3GNetBackboneModule.load_from_checkpoint(ckpt_path)
    elif "jmp" in args_dict["model_type"]:
        ft_model = JMPBackboneModule.load_from_checkpoint(ckpt_path)
    elif "orb" in args_dict["model_type"]:
        ft_model = ORBBackboneModule.load_from_checkpoint(ckpt_path)
    elif "eqv2" in args_dict["model_type"]:
        ft_model = EqV2BackboneModule.load_from_checkpoint(ckpt_path)
    else:
        raise ValueError(
            "Invalid model type, please choose from ['mattersim-1m', 'jmp-s', 'orb-v2', 'eqv2']"
        )
    
    val_atoms_list:list[Atoms] = read("./val_water_1000_eVAng.xyz", ":") # type: ignore
    calc = ft_model.ase_calculator(
        device = f"cuda:{args_dict['devices'][0]}"
    )
    energies_per_atom = []
    forces = []
    pred_energies_per_atom = []
    pred_forces = []
    for atoms in tqdm(val_atoms_list):
        energies_per_atom.append(atoms.get_potential_energy() / len(atoms))
        forces.extend(np.array(atoms.get_forces()).tolist())
        atoms.set_calculator(calc)
        pred_energies_per_atom.append(atoms.get_potential_energy() / len(atoms))
        pred_forces.extend(np.array(atoms.get_forces()).tolist())
        
    e_mae = torch.nn.L1Loss()(torch.tensor(energies_per_atom), torch.tensor(pred_energies_per_atom))
    f_mae = torch.nn.L1Loss()(torch.tensor(forces), torch.tensor(pred_forces))
    
    rich.print(f"Energy MAE: {e_mae} eV/atom")
    rich.print(f"Forces MAE: {f_mae} eV/Ang")

In [None]:
## We support various logging methods, including TensorBoard, WandB, and CSV files. 
## In this demo we used TensorBoard, so we can visualize the training process using TensorBoard.
## Below is a scripts to visualize the training loss using TensorBoard logs.

def visualize_tensorboard_logs(metric_name: str, unit: str):
    from tensorboard.backend.event_processing import event_accumulator
    import matplotlib.pyplot as plt

    # 读取事件文件
    ea = event_accumulator.EventAccumulator("./logs/mattersim-1m-tune/version_1")
    ea.Reload()

    # 查看有哪些 scalar 标签
    print(ea.Tags()["scalars"])

    # 比如我们导出 'loss' 的曲线
    scalar_events = ea.Scalars(metric_name)
    steps = [e.step for e in scalar_events]
    values = [e.value for e in scalar_events]

    # 绘图
    plt.plot(steps, values)
    plt.xlabel("Step")
    plt.ylabel(metric_name)
    plt.title(f"{metric_name} ({unit}) over steps")
    plt.show()

In [None]:
configs = {
    "model_type": "mattersim-1m",
    "conservative": True,
    "batch_size": 2,
    "max_epochs": 5,
    "lr": 8e-5,
    "devices": [0],
}
fine_tune(args_dict=configs)

In [None]:
visualize_tensorboard_logs("val/forces_mae", unit="eV/Ang")

In [None]:
inference(args_dict=configs)

In [None]:
## Further we can use the fine-tuned model to run Molecular Dynamics (MD) simulations.
from ase.md.langevin import Langevin
import ase.units as units


atoms:Atoms = read("./H2O.xyz")  # type: ignore
atoms.pbc = True
ft_model = MatterSimM3GNetBackboneModule.load_from_checkpoint("./checkpoints/mattersim-1m-best.ckpt")
calc = ft_model.ase_calculator()
atoms.calc = calc

dyn = Langevin(
    atoms,
    temperature_K=600,
    timestep=0.5 * units.fs,
    friction=0.02,
)
pbar = tqdm(total=1000, desc="MD Simulation Steps")
for step in range(1000):
    dyn.step()
    temp = atoms.get_temperature()
    pbar.set_description(f"MD Simulation Steps (Temperature: {temp:.2f} K/{600:.2f} K)")
    pbar.update(1)