In [1]:
from load_atoms import load_dataset

In [24]:
dataset = load_dataset("a-Si-24")

Output()

In [25]:
dataset

a-Si-24:
    structures: 3,069
    atoms: 1,317,240
    species:
        Si: 100.00%
    properties:
        per atom: (forces)
        per structure: (energy, label)

In [84]:
atoms = dataset[4]

In [119]:
atoms = dataset[100]
atoms.cell

Cell([10.911508582509317, 10.911508582509317, 10.911508582509317])

In [95]:
atoms.get_positions()[:5]

array([[ 1.64583 , 10.3498  ,  1.46839 ],
       [ 2.76714 ,  6.83164 ,  2.04278 ],
       [ 0.992991, 10.4694  ,  7.85486 ],
       [ 6.03678 ,  4.28703 ,  9.50163 ],
       [ 8.61332 ,  3.84427 ,  6.1171  ]])

In [115]:
dataset[1000].info

{'label': '216_atoms/10^11/run_nvt_Si216_2.136_1/npt-anneal_293K_10ps',
 'energy': np.float64(-162.98268055555556)}

In [100]:
atoms.arrays

{'numbers': array([14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14]),
 'positions': array([[ 1.64583 , 10.3498  ,  1.46839 ],
        [ 2.76714 ,  6.83164 ,  2.04278 ],
        [ 0.992991, 10.4694  ,  7.85486 ],
        [ 6.03678 ,  4.28703 ,  9.50163 ],
        [ 8.61332 ,  3.84427 ,  6.1171  ],
        [ 1.43138 ,  4.87014 ,  6.94505 ],
        [ 3.60108 , 10.4747  ,  0.109543],
        [ 9.99445 ,  5.42155 ,  7.07131 ],
        [11.1375  ,  1.61289 ,  8.25155 ],
        [ 1.0278  ,  8.02161 ,  1.0551  ],
        [ 8.42094 ,  9.16416 ,  9.54176 ],
        [ 2.58028 ,  7.15901 ,  4.45832 ],
        [ 5.59277 ,  1.81509 ,  6.65677 ],
        [10.733   ,  0.713701,  1.12185 ],
        [ 5.76248 ,  6.52817 , 10.25    ],
        [ 4.34601 ,  8.43895 ,  5.34964 ]

In [4]:
from __future__ import annotations

from pathlib import Path
from typing import Iterator

import ase
import ase.io
from ase import Atoms
from load_atoms.database.backend import BaseImporter
from load_atoms.database.internet import FileDownload
from load_atoms.progress import Progress

_HASHES = {
    64: "25627b8c50d9",
    216: "da49808517c3",
    512: "654e2e1d1349",
    1000: "ae52f05f2231",
}


class Importer(BaseImporter):
    @classmethod
    def files_to_download(cls) -> list[FileDownload]:
        _base_url = (
            "https://github.com/lamr18/aSi-data/raw/refs/heads/main/data/xyz/"
        )

        return [
            FileDownload(
                url=f"{_base_url}{n}-atoms.xyz",
                expected_hash=hash,
            )
            for n, hash in _HASHES.items()
        ]

    @classmethod
    def get_structures(
        cls, tmp_dir: Path, progress: Progress
    ) -> Iterator[Atoms]:
        with progress.new_task("Parsing files", total=len(_HASHES)) as task:
            for n in _HASHES:
                path = tmp_dir / f"{n}-atoms.xyz"
                for atoms in ase.io.iread(path, index=":"):
                    del atoms.info["cell_origin"], atoms.info["config_type"]
                    yield atoms

                task.update(advance=1)

In [72]:
atoms

Atoms(symbols='Si64', pbc=True, cell=[10.990703296117903, 10.990703296117903, 10.990703296117903], forces=...)

In [77]:
atoms.arrays

{'numbers': array([14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14]),
 'positions': array([[ 1.64583 , 10.3498  ,  1.46839 ],
        [ 2.76714 ,  6.83164 ,  2.04278 ],
        [ 0.992991, 10.4694  ,  7.85486 ],
        [ 6.03678 ,  4.28703 ,  9.50163 ],
        [ 8.61332 ,  3.84427 ,  6.1171  ],
        [ 1.43138 ,  4.87014 ,  6.94505 ],
        [ 3.60108 , 10.4747  ,  0.109543],
        [ 9.99445 ,  5.42155 ,  7.07131 ],
        [11.1375  ,  1.61289 ,  8.25155 ],
        [ 1.0278  ,  8.02161 ,  1.0551  ],
        [ 8.42094 ,  9.16416 ,  9.54176 ],
        [ 2.58028 ,  7.15901 ,  4.45832 ],
        [ 5.59277 ,  1.81509 ,  6.65677 ],
        [10.733   ,  0.713701,  1.12185 ],
        [ 5.76248 ,  6.52817 , 10.25    ],
        [ 4.34601 ,  8.43895 ,  5.34964 ]

In [82]:
atoms

Atoms(symbols='Si64', pbc=True, cell=[10.990703296117903, 10.990703296117903, 10.990703296117903], forces=...)