# Layers and models

In [1]:
from molgraph.chemistry import MolecularGraphEncoder
from molgraph.chemistry import Featurizer  
from molgraph.chemistry import features

import tensorflow as tf

import numpy as np
import pandas as pd

Construct a `MolecularGraphEncoder`

In [2]:
atom_encoder = Featurizer([
    features.Symbol({'C', 'N', 'O'}, oov_size=1),
    features.Hybridization({'SP', 'SP2', 'SP3'}, oov_size=1),
    features.HydrogenDonor(),
    features.HydrogenAcceptor(),
    features.Hetero()
])

bond_encoder = Featurizer([
    features.BondType({'SINGLE', 'DOUBLE', 'TRIPLE', 'AROMATIC'}),
    features.Rotatable(),
])

encoder = MolecularGraphEncoder(atom_encoder, bond_encoder)

Obtain dataset

In [3]:
path = tf.keras.utils.get_file(
    fname='ESOL.csv',
    origin='http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/ESOL.csv',
)
df = pd.read_csv(path)
df.head(3)

Unnamed: 0,Compound ID,ESOL predicted log solubility in mols per litre,Minimum Degree,Molecular Weight,Number of H-Bond Donors,Number of Rings,Number of Rotatable Bonds,Polar Surface Area,measured log solubility in mols per litre,smiles
0,Amigdalin,-0.974,1,457.432,7,3,7,202.32,-0.77,OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)...
1,Fenfuram,-2.885,1,201.225,1,2,2,42.24,-3.3,Cc1occc1C(=O)Nc2ccccc2
2,citral,-2.579,1,152.237,0,0,4,17.07,-2.06,CC(C)=CCCC(C)=CC(=O)


Obtain SMILES `x`and associated labels `y`

In [4]:
x, y = df['smiles'].values, df['measured log solubility in mols per litre'].values

Obtain `GraphTensor` from `x`, via `MolecularGraphEncoder`

In [5]:
x = encoder(x)

print(x, end='\n\n')
print('node_feature shape:', x.node_feature.shape)
print('edge_dst shape:    ', x.edge_dst.shape)
print('edge_src shape:    ', x.edge_src.shape)
print('edge_feature shape:', x.edge_feature.shape)

GraphTensor(
  sizes=<tf.Tensor: shape=(1128,), dtype=int32>,
  node_feature=<tf.Tensor: shape=(14991, 11), dtype=float32>,
  edge_src=<tf.Tensor: shape=(30856,), dtype=int32>,
  edge_dst=<tf.Tensor: shape=(30856,), dtype=int32>,
  edge_feature=<tf.Tensor: shape=(30856, 5), dtype=float32>,
  node_position=<tf.Tensor: shape=(14991, 16), dtype=float32>)

node_feature shape: (14991, 11)
edge_dst shape:     (30856,)
edge_src shape:     (30856,)
edge_feature shape: (30856, 5)


### 1. Import GNN **layers**

In [6]:
from molgraph import layers

### 2. Use GNN **layers**

In [7]:
layer = layers.GATConv(units=128, use_edge_features=True)

out1 = layer(x.separate())      # with nested ragged tensors
out2 = layer(x)                 # with nested tensors

print(out1, end='\n\n')
print(out2)

GraphTensor(
  sizes=<tf.Tensor: shape=(1128,), dtype=int32>,
  node_feature=<tf.RaggedTensor: shape=(1128, None, 128), dtype=float32, ragged_rank=1>,
  edge_src=<tf.RaggedTensor: shape=(1128, None), dtype=int32, ragged_rank=1>,
  edge_dst=<tf.RaggedTensor: shape=(1128, None), dtype=int32, ragged_rank=1>,
  edge_feature=<tf.RaggedTensor: shape=(1128, None, 128), dtype=float32, ragged_rank=1>,
  node_position=<tf.RaggedTensor: shape=(1128, None, 16), dtype=float32, ragged_rank=1>)

GraphTensor(
  sizes=<tf.Tensor: shape=(1128,), dtype=int32>,
  node_feature=<tf.Tensor: shape=(14991, 128), dtype=float32>,
  edge_src=<tf.Tensor: shape=(30856,), dtype=int32>,
  edge_dst=<tf.Tensor: shape=(30856,), dtype=int32>,
  edge_feature=<tf.Tensor: shape=(30856, 128), dtype=float32>,
  node_position=<tf.Tensor: shape=(14991, 16), dtype=float32>)


### 3. Pass GNN **layers** to **Keras models**

Split data into train/test

In [8]:
random_indices = np.random.permutation(np.arange(x.shape[0]))

x_train = x[random_indices[:800]]
x_test = x[random_indices[800:]]

y_train = y[random_indices[:800]]
y_test = y[random_indices[800:]]

#### Option 1: Keras Sequential API

In [9]:
sequential_model = tf.keras.Sequential([
    tf.keras.layers.Input(type_spec=x_train.spec),
    layers.GINConv(128),
    layers.GINConv(128),
    layers.GINConv(128),
    layers.Readout(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1)
])

sequential_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gin_conv (GINConv)          (None, None, 128)         21187     
                                                                 
 gin_conv_1 (GINConv)        (None, None, 128)         51073     
                                                                 
 gin_conv_2 (GINConv)        (None, None, 128)         49537     
                                                                 
 segment_pooling_readout (S  (None, 128)               0         
 egmentPoolingReadout)                                           
                                                                 
 dense_2 (Dense)             (None, 512)               66048     
                                                                 
 dense_3 (Dense)             (None, 1)                 513       
                                                        

In [10]:
sequential_model.compile('adam', 'mse', ['mae'])
sequential_model.fit(x_train, y_train, epochs=30, verbose=2)
mse, mae = sequential_model.evaluate(x_test, y_test)
print(f"{mse = :.3f}\n{mae = :.3f}")

Epoch 1/30
25/25 - 4s - loss: 7.0829 - mae: 2.0933 - 4s/epoch - 167ms/step
Epoch 2/30
25/25 - 0s - loss: 3.7235 - mae: 1.5299 - 138ms/epoch - 6ms/step
Epoch 3/30
25/25 - 0s - loss: 3.0047 - mae: 1.3911 - 138ms/epoch - 6ms/step
Epoch 4/30
25/25 - 0s - loss: 2.9660 - mae: 1.3739 - 153ms/epoch - 6ms/step
Epoch 5/30
25/25 - 0s - loss: 2.7608 - mae: 1.3286 - 152ms/epoch - 6ms/step
Epoch 6/30
25/25 - 0s - loss: 2.5752 - mae: 1.2790 - 140ms/epoch - 6ms/step
Epoch 7/30
25/25 - 0s - loss: 2.3623 - mae: 1.2191 - 123ms/epoch - 5ms/step
Epoch 8/30
25/25 - 0s - loss: 2.2896 - mae: 1.1918 - 130ms/epoch - 5ms/step
Epoch 9/30
25/25 - 0s - loss: 2.2290 - mae: 1.1800 - 128ms/epoch - 5ms/step
Epoch 10/30
25/25 - 0s - loss: 1.7473 - mae: 1.0566 - 128ms/epoch - 5ms/step
Epoch 11/30
25/25 - 0s - loss: 1.7529 - mae: 1.0464 - 138ms/epoch - 6ms/step
Epoch 12/30
25/25 - 0s - loss: 1.6870 - mae: 1.0037 - 131ms/epoch - 5ms/step
Epoch 13/30
25/25 - 0s - loss: 1.5346 - mae: 0.9839 - 133ms/epoch - 5ms/step
Epoch 14/

#### Option 2: Keras Functional API

In [11]:
inputs = tf.keras.layers.Input(type_spec=x_train.spec)
x = layers.GINConv(128)(inputs)
x = layers.GINConv(128)(x)
x = layers.GINConv(128)(x)
x = layers.Readout()(x)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dense(1)(x)
functional_model = tf.keras.Model(inputs=inputs, outputs=x)
functional_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None, 11)]        0         
                                                                 
 gin_conv_3 (GINConv)        (None, None, 128)         21187     
                                                                 
 gin_conv_4 (GINConv)        (None, None, 128)         51073     
                                                                 
 gin_conv_5 (GINConv)        (None, None, 128)         49537     
                                                                 
 segment_pooling_readout_1   (None, 128)               0         
 (SegmentPoolingReadout)                                         
                                                                 
 dense_4 (Dense)             (None, 512)               66048     
                                                             

In [12]:
functional_model.compile('adam', 'mse', ['mae'])
functional_model.fit(x_train, y_train, epochs=30, verbose=2)
mse, mae = functional_model.evaluate(x_test, y_test)
print(f"{mse = :.3f}\n{mae = :.3f}")

Epoch 1/30
25/25 - 4s - loss: 6.9203 - mae: 2.0325 - 4s/epoch - 148ms/step
Epoch 2/30
25/25 - 0s - loss: 3.4010 - mae: 1.4839 - 133ms/epoch - 5ms/step
Epoch 3/30
25/25 - 0s - loss: 2.9173 - mae: 1.3693 - 148ms/epoch - 6ms/step
Epoch 4/30
25/25 - 0s - loss: 2.7455 - mae: 1.3209 - 146ms/epoch - 6ms/step
Epoch 5/30
25/25 - 0s - loss: 2.8170 - mae: 1.3229 - 140ms/epoch - 6ms/step
Epoch 6/30
25/25 - 0s - loss: 2.4958 - mae: 1.2679 - 144ms/epoch - 6ms/step
Epoch 7/30
25/25 - 0s - loss: 2.3044 - mae: 1.1992 - 144ms/epoch - 6ms/step
Epoch 8/30
25/25 - 0s - loss: 1.9932 - mae: 1.1212 - 139ms/epoch - 6ms/step
Epoch 9/30
25/25 - 0s - loss: 1.8481 - mae: 1.0773 - 141ms/epoch - 6ms/step
Epoch 10/30
25/25 - 0s - loss: 1.7411 - mae: 1.0312 - 143ms/epoch - 6ms/step
Epoch 11/30
25/25 - 0s - loss: 1.5762 - mae: 0.9794 - 140ms/epoch - 6ms/step
Epoch 12/30
25/25 - 0s - loss: 1.5218 - mae: 0.9651 - 146ms/epoch - 6ms/step
Epoch 13/30
25/25 - 0s - loss: 1.7555 - mae: 1.0286 - 143ms/epoch - 6ms/step
Epoch 14/

#### Option 3: Keras Model subclassing

Creating a custom Keras model allow for more flexibility. Let perform some random skip connections.

In [13]:
class MyModel(tf.keras.Model):
    def __init__(self, gnn_units=128, dense_units=512):
        super().__init__()
        self.gin_conv1 = layers.GINConv(gnn_units)
        self.gin_conv2 = layers.GINConv(gnn_units)
        self.gin_conv3 = layers.GINConv(gnn_units)
        self.readout = layers.Readout()
        self.dense_1 = tf.keras.layers.Dense(512, activation='relu')
        self.dense_2 = tf.keras.layers.Dense(1)
    
    def call(self, inputs):
        x0 = inputs
        x1 = self.gin_conv1(x0)
        x2 = self.gin_conv2(x1)
        x3 = self.gin_conv3(x2)
        x1 = self.readout(x1)
        x2 = self.readout(x2)
        x3 = self.readout(x3)
        x = tf.concat([x1, x2, x3], axis=1)
        x = self.dense_1(x)
        return self.dense_2(x)
        
        
my_model = MyModel()

my_model(x_train) # build

my_model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gin_conv_6 (GINConv)        multiple                  21187     
                                                                 
 gin_conv_7 (GINConv)        multiple                  51073     
                                                                 
 gin_conv_8 (GINConv)        multiple                  49537     
                                                                 
 segment_pooling_readout_2   multiple                  0         
 (SegmentPoolingReadout)                                         
                                                                 
 dense_6 (Dense)             multiple                  197120    
                                                                 
 dense_7 (Dense)             multiple                  513       
                                                          

In [14]:
my_model.compile('adam', 'mse', ['mae'])
my_model.fit(x_train, y_train, epochs=30, verbose=2)
mse, mae = my_model.evaluate(x_test, y_test)
print(f"{mse = :.3f}\n{mae = :.3f}")

Epoch 1/30
25/25 - 4s - loss: 6.9835 - mae: 2.0324 - 4s/epoch - 154ms/step
Epoch 2/30
25/25 - 0s - loss: 3.5191 - mae: 1.4901 - 130ms/epoch - 5ms/step
Epoch 3/30
25/25 - 0s - loss: 3.0028 - mae: 1.3773 - 127ms/epoch - 5ms/step
Epoch 4/30
25/25 - 0s - loss: 3.0950 - mae: 1.4046 - 127ms/epoch - 5ms/step
Epoch 5/30
25/25 - 0s - loss: 3.0432 - mae: 1.3907 - 124ms/epoch - 5ms/step
Epoch 6/30
25/25 - 0s - loss: 2.8056 - mae: 1.3432 - 134ms/epoch - 5ms/step
Epoch 7/30
25/25 - 0s - loss: 2.4625 - mae: 1.2536 - 135ms/epoch - 5ms/step
Epoch 8/30
25/25 - 0s - loss: 2.3279 - mae: 1.2204 - 134ms/epoch - 5ms/step
Epoch 9/30
25/25 - 0s - loss: 2.0883 - mae: 1.1648 - 165ms/epoch - 7ms/step
Epoch 10/30
25/25 - 0s - loss: 1.7619 - mae: 1.0528 - 214ms/epoch - 9ms/step
Epoch 11/30
25/25 - 0s - loss: 1.5656 - mae: 0.9921 - 185ms/epoch - 7ms/step
Epoch 12/30
25/25 - 0s - loss: 1.5067 - mae: 0.9590 - 163ms/epoch - 7ms/step
Epoch 13/30
25/25 - 0s - loss: 1.3769 - mae: 0.9156 - 154ms/epoch - 6ms/step
Epoch 14/

#### Model with **tf.data.Dataset**

In [15]:
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.shuffle(800).batch(32)

ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
ds_test = ds_test.batch(32)

In [16]:
sequential_model.compile('adam', 'mse', ['mae'])
sequential_model.fit(ds_train, epochs=30, verbose=2)
mse, mae = sequential_model.evaluate(x_test, y_test)
print(f"{mse = :.3f}\n{mae = :.3f}")

Epoch 1/30
25/25 - 4s - loss: 2.0662 - mae: 1.1023 - 4s/epoch - 169ms/step
Epoch 2/30
25/25 - 0s - loss: 0.9658 - mae: 0.7642 - 146ms/epoch - 6ms/step
Epoch 3/30
25/25 - 0s - loss: 0.7833 - mae: 0.6706 - 193ms/epoch - 8ms/step
Epoch 4/30
25/25 - 0s - loss: 0.8129 - mae: 0.6789 - 224ms/epoch - 9ms/step
Epoch 5/30
25/25 - 0s - loss: 0.7564 - mae: 0.6587 - 160ms/epoch - 6ms/step
Epoch 6/30
25/25 - 0s - loss: 0.6944 - mae: 0.6358 - 167ms/epoch - 7ms/step
Epoch 7/30
25/25 - 0s - loss: 0.7331 - mae: 0.6370 - 152ms/epoch - 6ms/step
Epoch 8/30
25/25 - 0s - loss: 0.7645 - mae: 0.6606 - 156ms/epoch - 6ms/step
Epoch 9/30
25/25 - 0s - loss: 0.7225 - mae: 0.6401 - 176ms/epoch - 7ms/step
Epoch 10/30
25/25 - 0s - loss: 0.6720 - mae: 0.6212 - 176ms/epoch - 7ms/step
Epoch 11/30
25/25 - 0s - loss: 0.6687 - mae: 0.6162 - 140ms/epoch - 6ms/step
Epoch 12/30
25/25 - 0s - loss: 0.7627 - mae: 0.6656 - 139ms/epoch - 6ms/step
Epoch 13/30
25/25 - 0s - loss: 0.8233 - mae: 0.6914 - 184ms/epoch - 7ms/step
Epoch 14/

### 4. Save and load GNN **model** 

#### Option 1: with `tf.saved_model`

In [20]:
import tempfile
import shutil

file = tempfile.NamedTemporaryFile()
filename = file.name
file.close()

tf.saved_model.save(sequential_model, filename)
loaded_model = tf.saved_model.load(filename)

print(loaded_model(x_train).shape)

shutil.rmtree(filename)

(800, 1)


#### Option 2: with `tf.keras`

In [24]:
import tempfile
import shutil

file = tempfile.NamedTemporaryFile()
filename = file.name
file.close()

sequential_model.save(filename)
loaded_model = tf.keras.models.load_model(filename)

loaded_model.fit(ds_train, epochs=5, verbose=2);

shutil.rmtree(filename)

Epoch 1/5
25/25 - 0s - loss: 0.5887 - mae: 0.5814 - 150ms/epoch - 6ms/step
Epoch 2/5
25/25 - 0s - loss: 0.5724 - mae: 0.5628 - 153ms/epoch - 6ms/step
Epoch 3/5
25/25 - 0s - loss: 0.5588 - mae: 0.5565 - 151ms/epoch - 6ms/step
Epoch 4/5
25/25 - 0s - loss: 0.5512 - mae: 0.5565 - 154ms/epoch - 6ms/step
Epoch 5/5
25/25 - 0s - loss: 0.4906 - mae: 0.5297 - 144ms/epoch - 6ms/step
