In [2]:
import warnings

from m3gnet.models import Relaxer
from pymatgen.core import Lattice, Structure

for category in (UserWarning, DeprecationWarning):
    warnings.filterwarnings("ignore", category=category, module="tensorflow")

# Init a Mo structure with stretched lattice (DFT lattice constant ~ 3.168)
mo = Structure(Lattice.cubic(3.3), ["Mo", "Mo"], [[0., 0., 0.], [0.5, 0.5, 0.5]])

relaxer = Relaxer()  # This loads the default pre-trained model

relax_results = relaxer.relax(mo, verbose=True)

final_structure = relax_results['final_structure']
final_energy_per_atom = float(relax_results['trajectory'].energies[-1] / len(mo))

print(f"Relaxed lattice parameter is {final_structure.lattice.abc[0]:.3f} Å")
print(f"Final energy is {final_energy_per_atom:.3f} eV/atom")

      Step     Time          Energy         fmax
*Force-consistent energies used in optimization.
FIRE:    0 15:38:25      -21.330704*      10.2491
FIRE:    1 15:38:27      -20.743158*      20.4843
FIRE:    2 15:38:27      -21.694872*       2.4213
FIRE:    3 15:38:27      -21.208540*      11.5265
FIRE:    4 15:38:27      -21.353798*       9.9676
FIRE:    5 15:38:27      -21.562599*       6.5603
FIRE:    6 15:38:27      -21.702244*       2.0209
FIRE:    7 15:38:27      -21.696190*       2.3511
FIRE:    8 15:38:27      -21.697779*       2.2639
FIRE:    9 15:38:27      -21.700720*       2.0934
FIRE:   10 15:38:27      -21.704584*       1.8471
FIRE:   11 15:38:27      -21.708820*       1.5354
FIRE:   12 15:38:28      -21.712812*       1.1708
FIRE:   13 15:38:28      -21.716003*       0.7668
FIRE:   14 15:38:28      -21.717947*       0.3373
FIRE:   15 15:38:28      -21.718319*       0.1499
FIRE:   16 15:38:28      -21.718321*       0.1483
FIRE:   17 15:38:28      -21.718328*       0.1450
FI

# 官方实例

In [1]:
import pickle as pk
import pandas as pd
import pymatgen

print('loading the MPF dataset 2021')
with open('data/block_0.p', 'rb') as f:
    data = pk.load(f)

with open('data/block_1.p', 'rb') as f:
    data2 = pk.load(f)
print('MPF dataset 2021 loaded')
data.update(data2)
df = pd.DataFrame.from_dict(data)

loading the MPF dataset 2021
MPF dataset 2021 loaded


In [2]:
df.iloc[0,0]

[Structure Summary
 Lattice
     abc : 9.00667017659015 8.486714308993259 12.38991512058143
  angles : 94.92204563912044 90.03854066865074 89.99436712154997
  volume : 943.5558053703279
       A : -7.8013173 -4.50105389 0.00836265
       B : -2.60378959 4.49880692 -6.70860163
       C : 5.2036832 -9.01493757 -6.72031089
 PeriodicSite: Li (-7.1228, -5.0483, -4.9850) [0.9648, 0.4439, 0.2998]
 PeriodicSite: Li (-6.4482, -2.1787, -6.7526) [0.7405, 0.7572, 0.2498]
 PeriodicSite: Li (-4.5296, -3.9453, -8.4078) [0.6540, 0.7619, 0.4913]
 PeriodicSite: Li (-3.9974, -6.7751, -10.1176) [0.7599, 0.7576, 0.7502]
 PeriodicSite: Li (-3.8579, -1.4494, -5.0364) [0.4511, 0.5439, 0.2070]
 PeriodicSite: Li (-3.9884, -10.4772, -5.0036) [0.9650, 0.0437, 0.7022]
 PeriodicSite: Li (-3.4076, -9.3345, -11.7662) [0.8453, 0.7604, 0.9929]
 PeriodicSite: Li (-3.2379, -4.0407, -1.6500) [0.5356, 0.0437, 0.2026]
 PeriodicSite: Li (-1.8189, -4.8275, -11.9674) [0.4422, 0.9795, 0.8035]
 PeriodicSite: Li (3.2946, -8.7013,

In [5]:
import numpy as np

def get_id_train_val_test(
    total_size: int,
    split_seed: int = 42,
    train_ratio: float = 0.8,
    val_ratio: float = 0.1,
    test_ratio: float = 0.1,
    keep_data_order: bool = False
):
    """
    分割数据集索引为训练/验证/测试集
    
    参数:
        total_size: 总数据量
        split_seed: 随机种子
        train_ratio: 训练集比例
        val_ratio: 验证集比例 
        test_ratio: 测试集比例
        keep_data_order: 是否保持原始顺序
    
    返回:
        三个索引列表(train_ids, val_ids, test_ids)
    """
    assert train_ratio + val_ratio + test_ratio == 1
    
    indices = np.arange(total_size)
    if not keep_data_order:
        rng = np.random.RandomState(split_seed)
        rng.shuffle(indices)
    
    train_end = int(train_ratio * total_size)
    val_end = train_end + int(val_ratio * total_size)
    
    return (
        indices[:train_end].tolist(),
        indices[train_end:val_end].tolist(),
        indices[val_end:].tolist()
    )


In [6]:
id_train, id_val, id_test = get_id_train_val_test(
    total_size=len(data),
    split_seed=42,
    train_ratio=0.90,
    val_ratio=0.05,
    test_ratio=0.05,
    keep_data_order=False,
)

cnt = 0
for idx, item in df.items():
    # import pdb; pdb.set_trace()
    if cnt in id_train:
        for iid in range(len(item['energy'])):
            dataset_train.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])})
    elif cnt in id_val:
        for iid in range(len(item['energy'])):
            dataset_val.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])})
    elif cnt in id_test:
        for iid in range(len(item['energy'])):
            dataset_test.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])})
    cnt += 1

print('using %d samples to train, %d samples to evaluate, and %d samples to test'%(len(dataset_train), len(dataset_val), len(dataset_test)))

NameError: name 'dataset_train' is not defined

In [None]:
from m3gnet.models import M3GNet, Potential
from m3gnet.trainers import PotentialTrainer

import tensorflow as tf

m3gnet = M3GNet(is_intensive=False)
potential = Potential(model=m3gnet)

trainer = PotentialTrainer(
    potential=potential, optimizer=tf.keras.optimizers.Adam(1e-3)
)
    stresses,
    validation_graphs_or_structures=val_structures,
    val_energies=val_energies,
    val_forces=val_forces,
    val_stresses=val_stresses,
    epochs=100,
    fit_per_element_offset=True,
    save_checkpoint=False,
)

NameError: name 'structures' is not defined