# Layers and models

In [19]:
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')

import sys
sys.path.append('../../../../')

import numpy as np
import pandas as pd

from molgraph.chemistry import MolecularGraphEncoder
from molgraph.chemistry import AtomicFeaturizer  
from molgraph.chemistry import features

Build a `MolecularGraphEncoder`

In [2]:
atom_encoder = AtomicFeaturizer([
    features.Symbol({'C', 'N', 'O'}, oov_size=1),
    features.Hybridization({'SP', 'SP2', 'SP3'}, oov_size=1),
    features.HydrogenDonor(),
    features.HydrogenAcceptor(),
    features.Hetero()
])

bond_encoder = AtomicFeaturizer([
    features.BondType({'SINGLE', 'DOUBLE', 'TRIPLE', 'AROMATIC'}),
    features.Rotatable(),
])

encoder = MolecularGraphEncoder(atom_encoder, bond_encoder)

Obtain dataset

In [3]:
path = tf.keras.utils.get_file(
    fname='ESOL.csv',
    origin='http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/ESOL.csv',
)
df = pd.read_csv(path)
df.head(3)

Unnamed: 0,Compound ID,ESOL predicted log solubility in mols per litre,Minimum Degree,Molecular Weight,Number of H-Bond Donors,Number of Rings,Number of Rotatable Bonds,Polar Surface Area,measured log solubility in mols per litre,smiles
0,Amigdalin,-0.974,1,457.432,7,3,7,202.32,-0.77,OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)...
1,Fenfuram,-2.885,1,201.225,1,2,2,42.24,-3.3,Cc1occc1C(=O)Nc2ccccc2
2,citral,-2.579,1,152.237,0,0,4,17.07,-2.06,CC(C)=CCCC(C)=CC(=O)


Obtain SMILES `x`and associated labels `y`

In [4]:
x, y = df['smiles'].values, df['measured log solubility in mols per litre'].values

Obtain `GraphTensor` from `x`

In [5]:
x = encoder(x)

print(x, end='\n\n')
print('node_feature shape:', x.node_feature.shape)
print('edge_dst shape:    ', x.edge_dst.shape)
print('edge_src shape:    ', x.edge_src.shape)
print('edge_feature shape:', x.edge_feature.shape)

GraphTensor(
  node_feature=<tf.RaggedTensor: shape=(1128, None, 11), dtype=float32>,
  edge_feature=<tf.RaggedTensor: shape=(1128, None, 5), dtype=float32>,
  positional_encoding=<tf.RaggedTensor: shape=(1128, None, 16), dtype=float32>,
  edge_dst=<tf.RaggedTensor: shape=(1128, None), dtype=int32>,
  edge_src=<tf.RaggedTensor: shape=(1128, None), dtype=int32>)

node_feature shape: (1128, None, 11)
edge_dst shape:     (1128, None)
edge_src shape:     (1128, None)
edge_feature shape: (1128, None, 5)


### 1. Import GNN **layers**

In [6]:
from molgraph import layers

### 2. Use GNN **layers**

In [7]:
layer = layers.GATConv(units=128, use_edge_features=True, num_heads=8)

out1 = layer(x)                 # with nested ragged tensors
out2 = layer(x.merge())         # with nested tensors

print(out1)
print()
print(out2)

print(tf.reduce_all(out1.node_feature.flat_values == out2.node_feature).numpy())

GraphTensor(
  node_feature=<tf.RaggedTensor: shape=(1128, None, 128), dtype=float32>,
  edge_feature=<tf.RaggedTensor: shape=(1128, None, 128), dtype=float32>,
  positional_encoding=<tf.RaggedTensor: shape=(1128, None, 16), dtype=float32>,
  edge_dst=<tf.RaggedTensor: shape=(1128, None), dtype=int32>,
  edge_src=<tf.RaggedTensor: shape=(1128, None), dtype=int32>)

GraphTensor(
  node_feature=<tf.Tensor: shape=(14991, 128), dtype=float32>,
  edge_feature=<tf.Tensor: shape=(30856, 128), dtype=float32>,
  positional_encoding=<tf.Tensor: shape=(14991, 16), dtype=float32>,
  edge_dst=<tf.Tensor: shape=(30856,), dtype=int32>,
  edge_src=<tf.Tensor: shape=(30856,), dtype=int32>,
  graph_indicator=<tf.Tensor: shape=(14991,), dtype=int32>)
True


### 3. Pass GNN **layers** to **Keras models**

Split data into train/test

In [8]:
random_indices = np.random.permutation(np.arange(x.shape[0]))

x_train = x[random_indices[:800]]
x_test = x[random_indices[800:]]

y_train = y[random_indices[:800]]
y_test = y[random_indices[800:]]

#### Option 1: Keras Sequential API

In [9]:
sequential_model = tf.keras.Sequential([
    tf.keras.layers.Input(type_spec=x_train.unspecific_spec),
    layers.GCNConv(128),
    layers.GCNConv(128),
    layers.GCNConv(128),
    layers.Readout(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1)
])

sequential_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gcn_conv (GCNConv)          (None, None, 128)         4864      
                                                                 
 gcn_conv_1 (GCNConv)        (None, None, 128)         33408     
                                                                 
 gcn_conv_2 (GCNConv)        (None, None, 128)         33408     
                                                                 
 segment_pooling_readout (Se  (None, 128)              0         
 gmentPoolingReadout)                                            
                                                                 
 dense_2 (Dense)             (None, 512)               66048     
                                                                 
 dense_3 (Dense)             (None, 1)                 513       
                                                        

In [10]:
sequential_model.compile('adam', 'mse', ['mae'])
sequential_model.fit(x_train, y_train, epochs=30, verbose=2)
mse, mae = sequential_model.evaluate(x_test, y_test)
print(f"{mse = :.3f}\n{mae = :.3f}")

Epoch 1/30
25/25 - 3s - loss: 4.4953 - mae: 1.6426 - 3s/epoch - 103ms/step
Epoch 2/30
25/25 - 0s - loss: 3.0353 - mae: 1.3670 - 128ms/epoch - 5ms/step
Epoch 3/30
25/25 - 0s - loss: 2.8768 - mae: 1.3528 - 119ms/epoch - 5ms/step
Epoch 4/30
25/25 - 0s - loss: 2.5055 - mae: 1.2527 - 123ms/epoch - 5ms/step
Epoch 5/30
25/25 - 0s - loss: 2.3613 - mae: 1.2110 - 120ms/epoch - 5ms/step
Epoch 6/30
25/25 - 0s - loss: 2.1772 - mae: 1.1744 - 119ms/epoch - 5ms/step
Epoch 7/30
25/25 - 0s - loss: 2.3577 - mae: 1.1773 - 121ms/epoch - 5ms/step
Epoch 8/30
25/25 - 0s - loss: 2.0798 - mae: 1.1381 - 123ms/epoch - 5ms/step
Epoch 9/30
25/25 - 0s - loss: 1.9485 - mae: 1.0943 - 120ms/epoch - 5ms/step
Epoch 10/30
25/25 - 0s - loss: 1.8490 - mae: 1.0455 - 124ms/epoch - 5ms/step
Epoch 11/30
25/25 - 0s - loss: 1.8505 - mae: 1.0587 - 121ms/epoch - 5ms/step
Epoch 12/30
25/25 - 0s - loss: 1.8531 - mae: 1.0516 - 122ms/epoch - 5ms/step
Epoch 13/30
25/25 - 0s - loss: 1.9914 - mae: 1.1119 - 124ms/epoch - 5ms/step
Epoch 14/

#### Option 2: Keras Functional API

In [11]:
inputs = tf.keras.layers.Input(type_spec=x_train.merge().unspecific_spec)
x = layers.GCNConv(128)(inputs)
x = layers.GCNConv(128)(x)
x = layers.GCNConv(128)(x)
x = layers.Readout()(x)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dense(1)(x)
functional_model = tf.keras.Model(inputs=inputs, outputs=x)
functional_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 11)]              0         
                                                                 
 gcn_conv_3 (GCNConv)        (None, 128)               4864      
                                                                 
 gcn_conv_4 (GCNConv)        (None, 128)               33408     
                                                                 
 gcn_conv_5 (GCNConv)        (None, 128)               33408     
                                                                 
 segment_pooling_readout_1 (  (None, 128)              0         
 SegmentPoolingReadout)                                          
                                                                 
 dense_4 (Dense)             (None, 512)               66048     
                                                             

In [12]:
functional_model.compile('adam', 'mse', ['mae'])
functional_model.fit(x_train, y_train, epochs=30, verbose=2)
mse, mae = functional_model.evaluate(x_test, y_test)
print(f"{mse = :.3f}\n{mae = :.3f}")

Epoch 1/30
25/25 - 3s - loss: 3.9455 - mae: 1.5561 - 3s/epoch - 107ms/step
Epoch 2/30
25/25 - 0s - loss: 2.7688 - mae: 1.3401 - 129ms/epoch - 5ms/step
Epoch 3/30
25/25 - 0s - loss: 2.4597 - mae: 1.2301 - 128ms/epoch - 5ms/step
Epoch 4/30
25/25 - 0s - loss: 2.3548 - mae: 1.2084 - 141ms/epoch - 6ms/step
Epoch 5/30
25/25 - 0s - loss: 2.0817 - mae: 1.1390 - 125ms/epoch - 5ms/step
Epoch 6/30
25/25 - 0s - loss: 2.1487 - mae: 1.1542 - 126ms/epoch - 5ms/step
Epoch 7/30
25/25 - 0s - loss: 2.1125 - mae: 1.1502 - 124ms/epoch - 5ms/step
Epoch 8/30
25/25 - 0s - loss: 2.0541 - mae: 1.1431 - 123ms/epoch - 5ms/step
Epoch 9/30
25/25 - 0s - loss: 2.2493 - mae: 1.1732 - 126ms/epoch - 5ms/step
Epoch 10/30
25/25 - 0s - loss: 2.1661 - mae: 1.1623 - 126ms/epoch - 5ms/step
Epoch 11/30
25/25 - 0s - loss: 1.8960 - mae: 1.0711 - 129ms/epoch - 5ms/step
Epoch 12/30
25/25 - 0s - loss: 1.9793 - mae: 1.1003 - 125ms/epoch - 5ms/step
Epoch 13/30
25/25 - 0s - loss: 1.8809 - mae: 1.0735 - 125ms/epoch - 5ms/step
Epoch 14/

#### Option 3: Keras Model subclassing

Creating a custom Keras model allow for more flexibility. Let perform some random skip connections.

In [13]:
class MyModel(tf.keras.Model):
    def __init__(self, gnn_units=128, dense_units=512):
        super().__init__()
        self.gcn_1 = layers.GCNConv(gnn_units)
        self.gcn_2 = layers.GCNConv(gnn_units)
        self.gcn_3 = layers.GCNConv(gnn_units)
        self.readout = layers.Readout()
        self.dense_1 = tf.keras.layers.Dense(512, activation='relu')
        self.dense_2 = tf.keras.layers.Dense(1)
    
    def call(self, inputs):
        x0 = inputs
        x1 = self.gcn_1(x0)
        x2 = self.gcn_2(x1)
        x3 = self.gcn_3(x2)
        x1 = self.readout(x1)
        x2 = self.readout(x2)
        x3 = self.readout(x3)
        x = tf.concat([x1, x2, x3], axis=1)
        x = self.dense_1(x)
        return self.dense_2(x)
        
        
my_model = MyModel()

my_model(x_train) # build

my_model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gcn_conv_6 (GCNConv)        multiple                  4864      
                                                                 
 gcn_conv_7 (GCNConv)        multiple                  33408     
                                                                 
 gcn_conv_8 (GCNConv)        multiple                  33408     
                                                                 
 segment_pooling_readout_2 (  multiple                 0         
 SegmentPoolingReadout)                                          
                                                                 
 dense_6 (Dense)             multiple                  197120    
                                                                 
 dense_7 (Dense)             multiple                  513       
                                                          

In [14]:
my_model.compile('adam', 'mse', ['mae'])
my_model.fit(x_train, y_train, epochs=30, verbose=2)
mse, mae = my_model.evaluate(x_test, y_test)
print(f"{mse = :.3f}\n{mae = :.3f}")

Epoch 1/30
25/25 - 4s - loss: 4.0644 - mae: 1.5953 - 4s/epoch - 147ms/step
Epoch 2/30
25/25 - 0s - loss: 2.8485 - mae: 1.3575 - 176ms/epoch - 7ms/step
Epoch 3/30
25/25 - 0s - loss: 2.4925 - mae: 1.2698 - 183ms/epoch - 7ms/step
Epoch 4/30
25/25 - 0s - loss: 2.4077 - mae: 1.2483 - 171ms/epoch - 7ms/step
Epoch 5/30
25/25 - 0s - loss: 2.3291 - mae: 1.2062 - 181ms/epoch - 7ms/step
Epoch 6/30
25/25 - 0s - loss: 2.2831 - mae: 1.2032 - 183ms/epoch - 7ms/step
Epoch 7/30
25/25 - 0s - loss: 2.7072 - mae: 1.2954 - 168ms/epoch - 7ms/step
Epoch 8/30
25/25 - 0s - loss: 2.2933 - mae: 1.1982 - 172ms/epoch - 7ms/step
Epoch 9/30
25/25 - 0s - loss: 1.9354 - mae: 1.0868 - 167ms/epoch - 7ms/step
Epoch 10/30
25/25 - 0s - loss: 1.9585 - mae: 1.0973 - 175ms/epoch - 7ms/step
Epoch 11/30
25/25 - 0s - loss: 1.8111 - mae: 1.0287 - 170ms/epoch - 7ms/step
Epoch 12/30
25/25 - 0s - loss: 1.9599 - mae: 1.1021 - 168ms/epoch - 7ms/step
Epoch 13/30
25/25 - 0s - loss: 1.7467 - mae: 1.0086 - 168ms/epoch - 7ms/step
Epoch 14/

#### Model with **tf.data.Dataset**

In [15]:
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.shuffle(800).batch(32).map(lambda x, y: (x.merge(), y))

ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
ds_test = ds_test.batch(32).map(lambda x, y: (x.merge(), y))


In [16]:
sequential_model.compile('adam', 'mse', ['mae'])
sequential_model.fit(ds_train, epochs=30, verbose=2)
mse, mae = sequential_model.evaluate(x_test, y_test)
print(f"{mse = :.3f}\n{mae = :.3f}")

Epoch 1/30
25/25 - 2s - loss: 1.4973 - mae: 0.9581 - 2s/epoch - 92ms/step
Epoch 2/30
25/25 - 0s - loss: 1.7527 - mae: 1.0288 - 127ms/epoch - 5ms/step
Epoch 3/30
25/25 - 0s - loss: 1.3780 - mae: 0.9172 - 122ms/epoch - 5ms/step
Epoch 4/30
25/25 - 0s - loss: 1.2464 - mae: 0.8541 - 121ms/epoch - 5ms/step
Epoch 5/30
25/25 - 0s - loss: 1.1977 - mae: 0.8308 - 133ms/epoch - 5ms/step
Epoch 6/30
25/25 - 0s - loss: 1.2341 - mae: 0.8311 - 127ms/epoch - 5ms/step
Epoch 7/30
25/25 - 0s - loss: 1.2191 - mae: 0.8186 - 126ms/epoch - 5ms/step
Epoch 8/30
25/25 - 0s - loss: 1.1950 - mae: 0.8306 - 118ms/epoch - 5ms/step
Epoch 9/30
25/25 - 0s - loss: 1.2009 - mae: 0.8250 - 119ms/epoch - 5ms/step
Epoch 10/30
25/25 - 0s - loss: 1.0612 - mae: 0.7840 - 120ms/epoch - 5ms/step
Epoch 11/30
25/25 - 0s - loss: 1.0162 - mae: 0.7536 - 158ms/epoch - 6ms/step
Epoch 12/30
25/25 - 0s - loss: 1.0022 - mae: 0.7566 - 124ms/epoch - 5ms/step
Epoch 13/30
25/25 - 0s - loss: 1.1602 - mae: 0.8083 - 119ms/epoch - 5ms/step
Epoch 14/3

### 4. Save and load GNN **model** with **tf.saved_model**

In [17]:
import tempfile
import shutil

file = tempfile.NamedTemporaryFile()
filename = file.name
file.close()

tf.saved_model.save(sequential_model, filename)
loaded_model = tf.saved_model.load(filename)

print(loaded_model(x_train).shape)

shutil.rmtree(filename)

Function `_wrapped_model` contains input name(s) args_0 with unsupported characters which will be renamed to args_0_9 in the SavedModel.
Found untraced functions such as dense_layer_call_fn, dense_layer_call_and_return_conditional_losses, dense_1_layer_call_fn, dense_1_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 8). These functions will not be directly callable after loading.


INFO:tensorflow:Assets written to: /tmp/tmpojsh4k70/assets


Assets written to: /tmp/tmpojsh4k70/assets


(800, 1)


### 5. Save and load GNN **model** with **Keras**

In [18]:
import tempfile
import shutil

file = tempfile.NamedTemporaryFile()
filename = file.name
file.close()

sequential_model.save(filename)
loaded_model = tf.keras.models.load_model(filename)

loaded_model.fit(ds_train, epochs=1)

shutil.rmtree(filename)

Function `_wrapped_model` contains input name(s) args_0 with unsupported characters which will be renamed to args_0_9 in the SavedModel.
Found untraced functions such as dense_layer_call_fn, dense_layer_call_and_return_conditional_losses, dense_1_layer_call_fn, dense_1_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 8). These functions will not be directly callable after loading.


INFO:tensorflow:Assets written to: /tmp/tmpdbyfgh03/assets


Assets written to: /tmp/tmpdbyfgh03/assets


