In [1]:
import sys # required for relative imports in jupyter lab
sys.path.insert(0, '../') 

from scipy.sparse import coo_matrix

from dataset import QM9

from torch_geometric.datasets import QM9 as TQM9
from torch_geometric.data import Data





In [2]:
ds_params = {'train_params': {'n': 1000,
                              'features': ['atomic_numbers','n_atoms','A','B','C','mu',
                                           'alpha','homo','lumo','gap','r2','zpve','Cv',
                                           'mulliken','coulomb'],
                              'embeds': ['hybrid_types','atom_types','atomic_numbers','aromatic'],
                              'targets': ['A','B','C','mu','alpha','homo','lumo','gap','r2','zpve',
                                          'U0','U','H','G','Cv'],
                              'pad': None,
                              'do_not_pad': ['U0','n_atoms','A','B','C','mu',
                                           'alpha','homo','lumo','gap','r2','zpve','Cv'],
                              #'filter_on': ('n_atoms','>','18'),
                              'use_pickle': False,
                              'flatten': True,
                              'embed_lookup': {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4,
                                               'sp': 0, 'sp2': 1, 'sp3':2, 'na': 3,
                                               0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6,
                                               7: 7, 8: 8, 9: 9, 10: 10, '0': 0}}}

qm9 = QM9(**ds_params['train_params'])

creating QM9 dataset...
QM9 molecules scanned:  1
QM9 molecules created:  1
total uncharacterized molecules removed:  25
total QM9 molecules created:  975
CDataset created...


In [3]:
mol = qm9.ds[1]
mol

dsgdb9nsd_000001

In [4]:
coo = coo_matrix(mol.adjacency)
print(coo)
print(coo.row)
print(coo.col)
print(coo.data)

  (0, 1)	1
  (0, 2)	1
  (0, 3)	1
  (0, 4)	1
  (1, 0)	1
  (2, 0)	1
  (3, 0)	1
  (4, 0)	1
[0 0 0 0 1 2 3 4]
[1 2 3 4 0 0 0 0]
[1 1 1 1 1 1 1 1]


In [5]:
import numpy as np
z = [mol.atomic_n[a] for a in mol.atom_types]
z = np.asarray(z, 'int64')

In [6]:
import torch

In [7]:
x = torch.unsqueeze(torch.tensor(qm9[2][0]), 0)
z = torch.tensor(z)
y = torch.tensor(qm9[2][2])
xyz = torch.tensor(mol.xyz)
edge_index = torch.tensor([coo.row, coo.col]).contiguous()


In [8]:
data = Data(x=x, z=z, y=y, pos=xyz, edge_index=edge_index)
print(data)

Data(x=[1, 36], edge_index=[2, 8], y=[15], pos=[5, 3], z=[5])


In [9]:
print('x', x.shape)
print(type(x))
print('z', z.shape)
print(z.dtype)
print('y', y.shape)
print('xyz', xyz.shape)
print('edge_index', edge_index)

x torch.Size([1, 36])
<class 'torch.Tensor'>
z torch.Size([5])
torch.int64
y torch.Size([15])
xyz torch.Size([5, 3])
edge_index tensor([[0, 0, 0, 0, 1, 2, 3, 4],
        [1, 2, 3, 4, 0, 0, 0, 0]], dtype=torch.int32)


In [10]:
data

Data(x=[1, 36], edge_index=[2, 8], y=[15], pos=[5, 3], z=[5])

In [11]:
tqm9 = TQM9(root='./data/qm9')

In [12]:
graph = tqm9[0]

In [13]:
graph

Data(x=[5, 11], edge_index=[2, 8], edge_attr=[8, 4], y=[1, 19], pos=[5, 3], z=[5], name='gdb_1', idx=[1])

In [14]:
data.num_edges

8

In [15]:
data.num_node_features

36

In [16]:
graph.num_node_features

11

In [17]:
data.keys

['z', 'x', 'y', 'edge_index', 'pos']

In [18]:
graph.keys

['name', 'z', 'edge_attr', 'idx', 'x', 'y', 'edge_index', 'pos']

In [19]:
data.num_nodes

1

In [20]:
graph.num_nodes

5

In [21]:
graph.x

tensor([[0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

In [22]:
graph.edge_attr

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.]])

In [23]:
mol.adjacency

array([[0, 1, 1, 1, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0]])

In [24]:
mol.coulomb

array([[36.858112 , 10.765888 , 10.765641 , 10.765863 , 10.765677 ],
       [ 0.9157932,  0.5      ,  1.7942736,  1.7943105,  1.7942796],
       [ 0.9157932,  1.7943147,  0.5      ,  1.7943105,  1.7942796],
       [ 0.9157932,  1.7943147,  1.7942736,  0.5      ,  1.7942796],
       [ 0.9157932,  1.7943147,  1.7942736,  1.7943105,  0.5      ]],
      dtype=float32)

In [25]:
mol.distance

array([[0.       , 1.091953 , 1.0919516, 1.0919464, 1.0919476],
       [1.091953 , 0.       , 1.7831198, 1.7831475, 1.7831566],
       [1.0919516, 1.7831198, 0.       , 1.7831576, 1.7831483],
       [1.0919464, 1.7831475, 1.7831576, 0.       , 1.7831478],
       [1.0919476, 1.7831566, 1.7831483, 1.7831478, 0.       ]],
      dtype=float32)

In [26]:
mol.smile

'C\tC\t\n'

In [27]:
data.y

tensor([ 2.9361e+02,  2.9354e+02,  1.9139e+02,  1.6256e+00,  9.4600e+00,
        -2.5700e-01,  8.2900e-02,  3.3990e-01,  2.6156e+01,  3.4358e-02,
        -5.6526e+01, -5.6523e+01, -5.6522e+01, -5.6545e+01,  6.3160e+00])

In [28]:
graph.y

tensor([[    0.0000,    13.2100,   -10.5499,     3.1865,    13.7363,    35.3641,
             1.2177, -1101.4878, -1101.4098, -1101.3840, -1102.0229,     6.4690,
           -17.1722,   -17.2868,   -17.3897,   -16.1519,   157.7118,   157.7100,
           157.7070]])

In [29]:
data.z

tensor([6, 1, 1, 1, 1])

In [30]:
graph.z

tensor([6, 1, 1, 1, 1])

In [31]:
data.pos

tensor([[-1.2698e-02,  1.0858e+00,  8.0010e-03],
        [ 2.1504e-03, -6.0313e-03,  1.9761e-03],
        [ 1.0117e+00,  1.4638e+00,  2.7657e-04],
        [-5.4082e-01,  1.4475e+00, -8.7664e-01],
        [-5.2381e-01,  1.4379e+00,  9.0640e-01]])

In [32]:
graph.pos

tensor([[-1.2700e-02,  1.0858e+00,  8.0000e-03],
        [ 2.2000e-03, -6.0000e-03,  2.0000e-03],
        [ 1.0117e+00,  1.4638e+00,  3.0000e-04],
        [-5.4080e-01,  1.4475e+00, -8.7660e-01],
        [-5.2380e-01,  1.4379e+00,  9.0640e-01]])

In [33]:
graph.edge_attr

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.]])

In [34]:
graph.edge_index

tensor([[0, 0, 0, 0, 1, 2, 3, 4],
        [1, 2, 3, 4, 0, 0, 0, 0]])

In [35]:
data.edge_index

tensor([[0, 0, 0, 0, 1, 2, 3, 4],
        [1, 2, 3, 4, 0, 0, 0, 0]], dtype=torch.int32)