# The Basics

In [1]:
from load_atoms import dataset

gap17 = dataset("C-GAP-17")

This dataset is covered by the CC BY-NC-SA 4.0 license.
Please cite this dataset if you use it in your work.
For more information, visit:
https://jla-gardner.github.io/load-atoms/datasets/C-GAP-17.html


In [2]:
gap17

C-GAP-17:
    structures: 4,530
    atoms: 284,965
    species:
        C: 100.00%
    properties:
        per atom: (force)
        per structure: (detailed_ct, energy, config_type, split)

"`Datasets`" are just lists of `ase.Atoms` objects.

In [3]:
gap17[0]

Atoms(symbols='C64', pbc=True, cell=[9.483921, 9.483921, 9.483921], force=..., calculator=SinglePointCalculator(...))

In [8]:
gap17[:4]

Dataset:
    structures: 4
    atoms: 256
    species:
        C: 100.00%
    properties:
        per atom: (force)
        per structure: (detailed_ct, energy, config_type, split)

`load_atoms` also exposes some useful functions for working with datasets:

In [4]:
from load_atoms import filter_by

bulk_amo = filter_by(gap17, config_type="bulk_amo")
small = filter_by(gap17, lambda atoms: len(atoms) < 64)
len(bulk_amo), len(small)

(3410, 1434)

In [5]:
from load_atoms import cross_validate_split

train, test = cross_validate_split(gap17, fold=0, folds=5, seed=42)
len(train), len(test)

(3624, 906)

In [6]:
# you can also make your own datasets:
from ase import Atoms

structures = [
    Atoms("H2O"),
    Atoms("NH3"),
    Atoms("CH4"),
]

small_molecules = dataset(structures)
small_molecules

Dataset:
    structures: 3
    atoms: 12
    species:
        H: 75.00%
        C: 8.33%
        O: 8.33%
        N: 8.33%
    properties:
        per atom: ()
        per structure: ()

In [7]:
# or load them from a path
from ase.io import write

write("small_molecules.traj", small_molecules)

dataset("small_molecules.traj")

Dataset:
    structures: 3
    atoms: 12
    species:
        H: 75.00%
        C: 8.33%
        O: 8.33%
        N: 8.33%
    properties:
        per atom: ()
        per structure: ()