In [1]:
from build import dataset_from_config
from dptb.utils.config import Config

In [2]:
config = {
    "root": "/root/nequip_data/",
    "dataset": "npz",
    "dataset_file_name": "/root/nequip_data/Si8-100K.npz",
    "key_mapping":{
        "pos":"pos",
        "atomic_numbers":"atomic_numbers",
        "kpoints": "kpoint",
        "pbc": "pbc",
        "cell": "cell",
        "eigenvalues": "eigenvalue"
    },
    "npz_fixed_field_keys": ["kpoint", "pbc"],
    "graph_field":["eigenvalues"],
    "chemical_symbols": ["Si", "C"],
    "r_max": 6.0
}

config = Config(config=config)
# dataset: npz                                                                       # type of data set, can be npz or ase
# dataset_url: http://quantum-machine.org/gdml/data/npz/toluene_ccsd_t.zip           # url to download the npz. optional
# dataset_file_name: ./benchmark_data/toluene_ccsd_t-train.npz                       # path to data set file
# key_mapping:
#   z: atomic_numbers                                                                # atomic species, integers
#   E: total_energy                                                                  # total potential eneriges to train to
#   F: forces                                                                        # atomic forces to train to
#   R: pos                                                                           # raw atomic positions
# npz_fixed_field_keys:                                                              # fields that are repeated across different examples
#   - atomic_numbers

# chemical_symbols:
#   - H
#   - C

In [3]:
dataset = dataset_from_config(config=config, prefix="dataset")

from dptb.data.dataloader import DataLoader

dl = DataLoader(dataset, 3)

data = next(iter(dl))

Processing dataset...


Done!


In [11]:

dataset[0].edge_cell_shift[dataset[0].edge_index[0].eq(1)&dataset[0].edge_index[1].eq(2)], dataset[0].edge_cell_shift[dataset[0].edge_index[0].eq(1)&dataset[0].edge_index[1].eq(2)]

tensor([[ 1.,  1., -1.],
        [ 1.,  1.,  1.],
        [ 0.,  1., -1.],
        [ 0.,  1.,  1.],
        [ 1.,  0., -1.],
        [ 0.,  0., -1.],
        [ 1.,  0.,  1.],
        [ 0.,  0.,  1.],
        [ 0.,  1.,  0.],
        [ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 1.,  1.,  0.]])

In [4]:
dataset[0].edge_index[0].eq(dataset[0].edge_index[1])

tensor([False, False, False, False, False, False, False, False, False, False,
         True, False, False, False, False, False, False,  True, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False,  True, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True, 

In [7]:
dataset.type_mapper.bond_to_type

{'C-C': 0, 'C-Si': 1, 'Si-C': 2, 'Si-Si': 3}

In [4]:
from dptb.nn._sktb import SKTB
sktb = SKTB(
    basis={"Si":["3s", "3p", "p*", "s*"], "C":["2s","2p"]},
    onsite="uniform",
    hopping="powerlaw",
    overlap=True
    )

In [5]:
from dptb.data.AtomicDataDict import with_edge_vectors, with_onsitenv_vectors

data = with_edge_vectors(data.to_dict())
data = with_onsitenv_vectors(data)

In [6]:
import torch
data["atomic_numbers"] = dataset.type_mapper.untransform(data["atom_types"])

In [7]:
data = sktb(data)

In [11]:
sktb.idp.edge_reduced_matrix_element

20

In [12]:
data["node_features"].shape

torch.Size([24, 4])

In [13]:
from dptb.nn._hamiltonian import SKHamiltonian

skh = SKHamiltonian(basis={"Si":["3s", "3p", "p*", "s*"], "C":["2s","2p"]})

In [14]:
data = skh(data)

In [16]:
data["node_features"].shape

torch.Size([24, 42])

In [17]:
from dptb.nn._hamiltonian import E3Hamiltonian
e3h = E3Hamiltonian(basis={"Si":["3s", "3p", "p*", "s*"], "C":["2s","2p"]}, decompose=True)

In [18]:
data = e3h(data)

In [21]:
data["edge_features"][0].abs().gt(1e-5)

tensor([ True,  True,  True,  True, False,  True, False, False,  True, False,
        False,  True, False, False,  True, False, False,  True, False, False,
         True, False, False,  True, False, False,  True, False,  True, False,
        False, False, False, False,  True, False, False,  True, False, False,
        False, False, False,  True, False, False,  True, False, False, False,
        False, False,  True, False, False,  True, False, False, False, False,
        False,  True, False, False])

In [57]:
from dptb.data.AtomicData import AtomicData
from dptb.utils.torch_geometric import Batch

bdata = Batch.from_dict(data)

RuntimeError: Cannot reconstruct data list from batch because the batch object was not created using `Batch.from_data_list()`.

In [8]:
from dptb.data.transforms import OrbitalMapper

idp = OrbitalMapper(basis={"Si": "2s2p1d", "C":"1s1p1d"})

In [15]:
idp.get_node_maps()

{'1s-1s': slice(0, 1, None),
 '1s-2s': slice(1, 2, None),
 '1s-1p': slice(3, 6, None),
 '1s-2p': slice(6, 9, None),
 '1s-1d': slice(15, 20, None),
 '2s-2s': slice(2, 3, None),
 '2s-1p': slice(9, 12, None),
 '2s-2p': slice(12, 15, None),
 '2s-1d': slice(20, 25, None),
 '1p-1p': slice(25, 34, None),
 '1p-2p': slice(34, 43, None),
 '1p-1d': slice(52, 67, None),
 '2p-2p': slice(43, 52, None),
 '2p-1d': slice(67, 82, None),
 '1d-1d': slice(82, 107, None)}

In [16]:
idp.node_maps

{'1s-1s': slice(0, 1, None),
 '1s-2s': slice(1, 2, None),
 '1s-1p': slice(3, 6, None),
 '1s-2p': slice(6, 9, None),
 '1s-1d': slice(15, 20, None),
 '2s-2s': slice(2, 3, None),
 '2s-1p': slice(9, 12, None),
 '2s-2p': slice(12, 15, None),
 '2s-1d': slice(20, 25, None),
 '1p-1p': slice(25, 34, None),
 '1p-2p': slice(34, 43, None),
 '1p-1d': slice(52, 67, None),
 '2p-2p': slice(43, 52, None),
 '2p-1d': slice(67, 82, None),
 '1d-1d': slice(82, 107, None)}