In [1]:
import sys # required for relative imports in jupyter lab
sys.path.insert(0, '../') 

from scipy.sparse import coo_matrix

from dataset import QM9

from torch_geometric.datasets import QM9 as TQM9
from torch_geometric.data import Data





In [2]:
ds_params = {'train_params': {'n': 1000,
                              'features': ['atomic_numbers','n_atoms','A','B','C','mu',
                                           'alpha','homo','lumo','gap','r2','zpve','Cv',
                                           'mulliken','coulomb'],
                              'embeds': ['hybrid_types','atom_types','atomic_numbers','aromatic'],
                              'targets': ['U0'],
                              'pad': 29,
                              'do_not_pad': ['U0','n_atoms','A','B','C','mu',
                                           'alpha','homo','lumo','gap','r2','zpve','Cv'],
                              #'filter_on': ('n_atoms','>','18'),
                              'use_pickle': False,
                              'flatten': True,
                              'embed_lookup': {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4,
                                               'sp': 0, 'sp2': 1, 'sp3':2, 'na': 3,
                                               0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6,
                                               7: 7, 8: 8, 9: 9, 10: 10, '0': 0}}}

qm9 = QM9(**ds_params['train_params'])

creating QM9 dataset...
QM9 molecules scanned:  1
QM9 molecules created:  1
total uncharacterized molecules removed:  25
total QM9 molecules created:  975
CDataset created...


In [3]:
mol = qm9.ds[10]

In [4]:
tqm9 = TQM9(root='./data/qm9')

In [5]:
graph = tqm9[9]

In [6]:
coo = coo_matrix(mol.adjacency)
print(coo)
print(coo.row)
print(coo.col)
print(coo.data)

  (0, 1)	1
  (0, 3)	1
  (0, 4)	1
  (0, 5)	1
  (1, 0)	1
  (1, 2)	3
  (2, 1)	3
  (3, 0)	1
  (4, 0)	1
  (5, 0)	1
[0 0 0 0 1 1 2 3 4 5]
[1 3 4 5 0 2 1 0 0 0]
[1 1 1 1 1 3 3 1 1 1]


In [7]:
import numpy as np
z = [mol.atomic_n[a] for a in mol.atom_types]
z = np.asarray(z, 'int64')

In [8]:
import torch

In [9]:
x = torch.unsqueeze(torch.tensor(qm9[2][0]), 0)
z = torch.tensor(z)
y = torch.tensor(qm9[2][2])
xyz = torch.tensor(mol.xyz)
edge_index = torch.tensor([coo.row, coo.col]).contiguous()



In [10]:
data = Data(x=x, z=z, y=y, pos=xyz, edge_index=edge_index)
print(data)

Data(x=[1, 911], edge_index=[2, 10], y=[1], pos=[6, 3], z=[6])


In [11]:
print('x', x.shape)
print(type(x))
print('z', z.shape)
print(z.dtype)
print('y', y.shape)
print('xyz', xyz.shape)
print('edge_index', edge_index)

x torch.Size([1, 911])
<class 'torch.Tensor'>
z torch.Size([6])
torch.int64
y torch.Size([1])
xyz torch.Size([6, 3])
edge_index tensor([[0, 0, 0, 0, 1, 1, 2, 3, 4, 5],
        [1, 3, 4, 5, 0, 2, 1, 0, 0, 0]], dtype=torch.int32)


In [12]:
data

Data(x=[1, 911], edge_index=[2, 10], y=[1], pos=[6, 3], z=[6])

In [13]:
graph

Data(x=[6, 11], edge_index=[2, 10], edge_attr=[10, 4], y=[1, 19], pos=[6, 3], z=[6], name='gdb_10', idx=[1])

In [14]:
data.num_edges

10

In [15]:
data.num_node_features

911

In [16]:
graph.num_node_features

11

In [17]:
data.keys

['y', 'x', 'z', 'pos', 'edge_index']

In [18]:
graph.keys

['idx', 'edge_attr', 'name', 'y', 'x', 'z', 'pos', 'edge_index']

In [19]:
data.num_nodes

1

In [20]:
graph.num_nodes

6

In [21]:
graph.z

tensor([6, 6, 7, 1, 1, 1])