In [1]:
import tqdm

from atomic_datasets import QM9, tmQM, GEOMDrugs, MiniproteinsAlphaCarbons

In [2]:
# dataset = QM9(
#     root_dir="data/qm9",
#     check_with_rdkit=False,
#     start_index=0,
#     end_index=1000,
# )

dataset = tmQM(
    root_dir="data/tmqm",
)

dataset = MiniproteinsAlphaCarbons(
    root_dir="data/miniproteins_alpha_carbon",
    start_index=0,
    end_index=1000,
)

# dataset = GEOMDrugs(
#     root_dir="data/geom_drugs",
# )

In [None]:
for graph in dataset:
    print(graph)

{'nodes': {'positions': array([[-1.1380e+00,  8.4500e-01,  6.6720e+00],
       [ 2.2410e+00,  2.4260e+00,  5.9460e+00],
       [ 5.1970e+00,  7.9900e-01,  4.1930e+00],
       [ 8.8320e+00,  1.5560e+00,  5.0020e+00],
       [ 1.1511e+01,  1.0130e+00,  2.3590e+00],
       [ 1.5254e+01,  1.1390e+00,  3.0050e+00],
       [ 1.7528e+01,  1.6850e+00,  7.0000e-03],
       [ 2.0855e+01,  1.2300e-01,  9.6700e-01],
       [ 2.4111e+01, -5.3600e-01, -8.8400e-01],
       [ 2.3085e+01, -3.9780e+00, -2.1310e+00],
       [ 1.9949e+01, -4.7190e+00, -1.1600e-01],
       [ 1.6399e+01, -3.5570e+00,  5.6400e-01],
       [ 1.4542e+01, -3.6600e+00,  3.8790e+00],
       [ 1.0755e+01, -3.6490e+00,  4.2210e+00],
       [ 8.5680e+00, -2.8890e+00,  7.2340e+00],
       [ 4.8060e+00, -2.6050e+00,  7.7070e+00],
       [ 3.0480e+00, -4.9400e-01,  1.0336e+01],
       [-4.2100e-01,  8.7200e-01,  1.1078e+01],
       [ 4.6700e-01,  4.5320e+00,  1.1589e+01],
       [ 2.8400e+00,  6.8950e+00,  9.7920e+00],
       [ 4.0320e

Here, we see how to use `atomic_datasets` with PyTorch Geometric:

In [4]:
from typing import Optional, Callable

import torch.utils.data
import torch_geometric.data


class QM9PyG(torch.utils.data.Dataset):
    """QM9 dataset in PyTorch Geometric format."""

    def __init__(self, root_dir, transform: Optional[Callable] = None):
        super(QM9, self).__init__()
        self.dataset = QM9(root_dir, check_with_rdkit=False)
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        sample = self.dataset[idx]
        sample = torch_geometric.data.Data(
            pos=torch.as_tensor(sample["nodes"]["positions"]),
            species=torch.as_tensor(sample["nodes"]["species"]),
        )
        if self.transform:
            sample = self.transform(sample)
        return sample

ModuleNotFoundError: No module named 'torch'