# Graph loading

Different methods of loading (batched) graph tensors for keras model in- and output.

In [1]:
import tensorflow as tf
import numpy as np
from kgcnn.literature.GIN import make_model

In [2]:
inputs = [
    {"shape": (None,), "name": "node_number", "dtype": "float32", "ragged": True},
    {"shape": (None, 2), "name": "edge_indices", "dtype": "int64", "ragged": True}
]
outputs = {"shape": [], "name": "graph_labels", "dtype": "float32", "ragged": False}

In [3]:
model = make_model(
    inputs=inputs,
    output_mlp={"units": 1, "activation": "linear"}
)
model.compile(loss="mean_absolute_error")

INFO:kgcnn.model.utils:Updated model kwargs:
INFO:kgcnn.model.utils:{'name': 'GIN', 'inputs': [{'shape': (None,), 'name': 'node_number', 'dtype': 'float32', 'ragged': True}, {'shape': (None, 2), 'name': 'edge_indices', 'dtype': 'int64', 'ragged': True}], 'input_embedding': {'node': {'input_dim': 95, 'output_dim': 64}}, 'gin_mlp': {'units': [64, 64], 'use_bias': True, 'activation': ['relu', 'linear'], 'use_normalization': True, 'normalization_technique': 'graph_batch'}, 'gin_args': {}, 'depth': 3, 'dropout': 0.0, 'verbose': 10, 'last_mlp': {'use_bias': [True, True, True], 'units': [64, 64, 64], 'activation': ['relu', 'relu', 'linear']}, 'output_embedding': 'graph', 'output_to_tensor': True, 'output_mlp': {'use_bias': True, 'units': 1, 'activation': 'linear'}}


### 1. Via direct TensorFlow (ragged) tensor input

Via `MemoryGraphList` method `tensor()`:

In [4]:
from kgcnn.data.datasets.ESOLDataset import ESOLDataset
dataset = ESOLDataset()
dataset.clean(["node_number", "edge_indices"])

ERROR:root:Module 'mol' is deprecated and will be removed in future versions. Please move to 'kgcnn.molecule'.
ERROR:kgcnn.molecule.convert:Can not import `RDKit` package for conversion.
INFO:kgcnn.data.download:Checking and possibly downloading dataset with name ESOL
INFO:kgcnn.data.download:Dataset directory located at C:\Users\patri\.kgcnn\datasets
INFO:kgcnn.data.download:Dataset directory found. Done.
INFO:kgcnn.data.download:Dataset found. Done.
INFO:kgcnn.data.ESOL:Found SDF C:\Users\patri\.kgcnn\datasets\ESOL\delaney-processed.sdf of pre-computed structures.
INFO:kgcnn.data.ESOL:Read molecules from mol-file.
INFO:kgcnn.data.ESOL: ... process molecules 0 from 1128
INFO:kgcnn.molecule.encoder:OneHotEncoder Symbol found ['O', 'C', 'N', 'S', 'Cl', 'P', 'F', 'I', 'Br']
INFO:kgcnn.molecule.encoder:OneHotEncoder Hybridization found [rdkit.Chem.rdchem.HybridizationType.SP3, rdkit.Chem.rdchem.HybridizationType.SP, rdkit.Chem.rdchem.HybridizationType.SP2]
INFO:kgcnn.molecule.encoder:OneH

array([934])

In [5]:
tensor_input = dataset.tensor(inputs)
print([x.shape for x in tensor_input])

[TensorShape([1127, None]), TensorShape([1127, None, 2])]


In [6]:
tensor_output = tf.expand_dims(dataset.tensor(outputs), axis=-1)
print(tensor_output.shape)

(1127, 1)


In [7]:
model.fit(tensor_input, tensor_output, epochs=5)

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2a77b811760>

### 2. Via `keras.utils.Sequence`

For example `GraphBatchLoader` that inherits from `ks.utils.Sequence` and takes an iterable data object of type `list[dict]`.

In [8]:
from kgcnn.io.loader import GraphBatchLoader

In [9]:
loader = GraphBatchLoader(data=dataset, inputs=inputs, outputs=outputs)

In [10]:
model.fit(loader, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2a756e01700>

### 3. Via `tf.data `

Here the starting data is given as list of numpy arrays.

In [11]:
edge_indices = dataset.get("edge_indices")
node_number = dataset.get("node_number")
graph_labels = dataset.get("graph_labels")
data_length = len(graph_labels)
print(node_number[:2])

[array([8, 6, 6, 8, 6, 8, 6, 6, 8, 6, 8, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 8,
       6, 8, 6, 8, 6, 8, 6, 8, 6, 8]), array([6, 6, 8, 6, 6, 6, 6, 8, 7, 6, 6, 6, 6, 6, 6])]


#### 3.1 Using `from_tensor_slices`

In [12]:
ds_x = tf.data.Dataset.from_tensor_slices((
    tf.ragged.constant(node_number, ragged_rank=1, dtype="int64"),
    tf.ragged.constant(edge_indices, ragged_rank=1, dtype="int64")))
ds_y = tf.data.Dataset.from_tensor_slices(tf.expand_dims(tf.constant(graph_labels), axis=-1))
ds = tf.data.Dataset.zip((ds_x, ds_y))
ds.batch(32)

<BatchDataset element_spec=((RaggedTensorSpec(TensorShape([None, None]), tf.int64, 1, tf.int64), RaggedTensorSpec(TensorShape([None, None, 2]), tf.int64, 1, tf.int64)), TensorSpec(shape=(None, 1), dtype=tf.float64, name=None))>

In [13]:
model.fit(ds.batch(32), epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2a784b83fd0>

#### 3.2 Using `from_generator`

In [14]:
batch_size = 32
def gen():
    for i in range(0, data_length, batch_size):
        yield (tf.ragged.constant(node_number[i:i+batch_size], dtype="int64", ragged_rank=1), 
               tf.ragged.constant(edge_indices[i:i+batch_size], dtype="int64", ragged_rank=1))
    
ds_x_batch = tf.data.Dataset.from_generator(
    gen,
    output_signature=(
        tf.RaggedTensorSpec(shape=(None, None), ragged_rank=1, dtype="int64"),
        tf.RaggedTensorSpec(shape=(None, None, 2), ragged_rank=1, dtype="int64")
    )
)
ds_y_batch = tf.data.Dataset.from_tensor_slices(tf.expand_dims(tf.constant(graph_labels), axis=-1)).batch(batch_size)
ds_batch = tf.data.Dataset.zip((ds_x_batch, ds_y_batch))

In [15]:
model.fit(ds_batch, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2a7220222e0>

#### 3.3 Using `tf.data.experimental.dense_to_ragged_batch`

In [16]:
bath_size = 32
dataset_list = []

ds_node = tf.data.Dataset.from_generator(
    lambda: [tf.constant(x) for x in node_number], 
    output_signature=tf.TensorSpec(shape=(None, ), dtype="int64")
).apply(tf.data.experimental.dense_to_ragged_batch(batch_size=bath_size, drop_remainder=False))
ds_edge = tf.data.Dataset.from_generator(
    lambda: [tf.constant(x) for x in edge_indices], 
    output_signature=tf.TensorSpec(shape=(None, 2), dtype="int64")
).apply(tf.data.experimental.dense_to_ragged_batch(batch_size=bath_size, drop_remainder=False))


ds_x_batch = tf.data.Dataset.zip((ds_node, ds_edge))
ds_y_batch = tf.data.Dataset.from_tensor_slices(tf.expand_dims(tf.constant(graph_labels), axis=-1)).batch(batch_size)

ds_batch = tf.data.Dataset.zip((ds_x_batch, ds_y_batch))

In [17]:
model.fit(ds_batch, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2a77ffd55b0>

or via explicit generator

In [18]:
def gen():
    for i in range(len(node_number)):
        yield node_number[i], edge_indices[i]

ds_x_batch = tf.data.Dataset.from_generator(
    gen, output_signature=(tf.TensorSpec(shape=(None,), dtype="int64"),tf.TensorSpec(shape=(None,2), dtype="int64"))
).apply(tf.data.experimental.dense_to_ragged_batch(batch_size=bath_size, drop_remainder=False))

ds_y_batch = tf.data.Dataset.from_tensor_slices(tf.expand_dims(tf.constant(graph_labels), axis=-1)).batch(batch_size)

ds_batch = tf.data.Dataset.zip((ds_x_batch, ds_y_batch))

In [19]:
model.fit(ds_batch, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2a78b4d0d90>