<a href="https://colab.research.google.com/github/choderalab/gimlet/blob/master/lime/scripts/notebooks/190808_yuanqing_gn_on_esol.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

GN on ESOL

Yuanqing Wang Aug 08, 2019

The purpose of this experiment is to strip GN down to its most basic form and inspect its performacne.

# Preparation

In [0]:
! wget https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/molnet_publish/ESOL.zip
! unzip *.zip

--2019-08-12 13:59:18--  https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/molnet_publish/ESOL.zip
Resolving s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)... 52.219.116.120
Connecting to s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)|52.219.116.120|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 32317 (32K) [application/zip]
Saving to: ‘ESOL.zip’


2019-08-12 13:59:18 (457 KB/s) - ‘ESOL.zip’ saved [32317/32317]

Archive:  ESOL.zip
  inflating: delaney-processed.csv   
  inflating: ESOL_README             


In [0]:
! pip install tensorflow==2.0.0-beta1
! pip uninstall gin-config -y
! rm -rf gimlet
! git clone https://github.com/choderalab/gimlet.git

Collecting tensorflow==2.0.0-beta1
[?25l  Downloading https://files.pythonhosted.org/packages/29/6c/2c9a5c4d095c63c2fb37d20def0e4f92685f7aee9243d6aae25862694fd1/tensorflow-2.0.0b1-cp36-cp36m-manylinux1_x86_64.whl (87.9MB)
[K     |████████████████████████████████| 87.9MB 28.9MB/s 
Collecting tf-estimator-nightly<1.14.0.dev2019060502,>=1.14.0.dev2019060501 (from tensorflow==2.0.0-beta1)
[?25l  Downloading https://files.pythonhosted.org/packages/32/dd/99c47dd007dcf10d63fd895611b063732646f23059c618a373e85019eb0e/tf_estimator_nightly-1.14.0.dev2019060501-py2.py3-none-any.whl (496kB)
[K     |████████████████████████████████| 501kB 33.4MB/s 
Collecting tb-nightly<1.14.0a20190604,>=1.14.0a20190603 (from tensorflow==2.0.0-beta1)
[?25l  Downloading https://files.pythonhosted.org/packages/a4/96/571b875cd81dda9d5dfa1422a4f9d749e67c0a8d4f4f0b33a4e5f5f35e27/tb_nightly-1.14.0a20190603-py3-none-any.whl (3.1MB)
[K     |████████████████████████████████| 3.1MB 28.3MB/s 
Installing collected package

In [0]:
import os
import sys
import tensorflow as tf
sys.path.append('/content/gimlet')
import gin
import lime
import pandas as pd
import numpy as np

In [0]:
df = pd.read_csv('delaney-processed.csv')
df = df[~df['smiles'].str.contains('B')]
df = df[~df['smiles'].str.contains('\%')]
df = df[~df['smiles'].str.contains('\.')]
df = df[~df['smiles'].str.contains('Se')]
df = df[~df['smiles'].str.contains('Si')]
df = df[~df['smiles'].str.contains('S@@')]
df = df[~df['smiles'].str.contains('6')]
df = df[~df['smiles'].str.contains('7')]
df = df[~df['smiles'].str.contains('8')]
df = df[~df['smiles'].str.contains('9')]
df = df[~df['smiles'].str.contains('\+')]
df = df[~df['smiles'].str.contains('\-')]
df = df[df['smiles'].str.len() > 1]
x_array = df[['smiles']].values.flatten()
y_array = df[['measured log solubility in mols per litre']].values.flatten()
y_array = (y_array - np.mean(y_array) / np.std(y_array))
n_samples = y_array.shape[0]
ds_all = gin.i_o.from_smiles.to_mols_with_attributes(x_array, y_array)
ds_all = gin.probabilistic.gn.GraphNet.batch(ds_all, 128).cache(
    str(os.getcwd()) + '/tmp')

In [0]:
ds_all = ds_all.shuffle(n_samples)

In [0]:
n_batched_samples_total = gin.probabilistic.gn.GraphNet.get_number_batches(
    ds_all)
n_batched_samples_total = int(n_batched_samples_total)
n_global_te = int(0.2 * n_batched_samples_total)
ds_tr = ds_all.skip(n_global_te)
ds_te = ds_all.take(n_global_te)

W0812 14:00:14.107650 140221292361472 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.string
W0812 14:00:14.150471 140221292361472 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py:1340: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0812 14:00:14.367878 140221300754176 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.string
W0812 14:00:14.552561 140221292361472 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.string
W0812 14:00:14.678071 140221292361472 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.string
W0812 14:00:14.924196 140221292361472 backprop.py:842] The dtype of the watched ten

# Model

## $f^r$

In [0]:
class f_r(tf.keras.Model):
    def __init__(self, config=[128, 'elu', 128, 'elu', 64, 1]):
        super(f_r, self).__init__()
        self.d = lime.nets.for_gn.ConcatenateThenFullyConnect(config)

    # @tf.function
    def call(self, h_e, h_v, h_u,
            h_e_history, h_v_history, h_u_history,
            atom_in_mol, bond_in_mol):
        
        h_e_history.set_shape([None, 6, 64])
        h_u_history.set_shape([None, 6, 64])
        h_v_history.set_shape([None, 6, 64])

        h_e_bar_history = tf.reduce_sum( # (n_mols, t, d_e)
                        tf.multiply(
                            tf.tile(
                                tf.expand_dims(
                                    tf.expand_dims(
                                        tf.where( # (n_bonds, n_mols)
                                            tf.boolean_mask(
                                                bond_in_mol,
                                                tf.reduce_any(
                                                    bond_in_mol,
                                                    axis=1),
                                                axis=0),
                                            tf.ones_like(
                                                tf.boolean_mask(
                                                    bond_in_mol,
                                                    tf.reduce_any(
                                                        bond_in_mol,
                                                        axis=1),
                                                    axis=0),
                                                dtype=tf.float32),
                                            tf.zeros_like(
                                                tf.boolean_mask(
                                                    bond_in_mol,
                                                    tf.reduce_any(
                                                        bond_in_mol,
                                                        axis=1),
                                                    axis=0),
                                                dtype=tf.float32)),
                                        2),
                                    3),
                                [
                                    1, 
                                    1, 
                                    tf.shape(h_e_history)[1], 
                                    tf.shape(h_e)[1]
                                ]),
                            tf.tile( # (n_bonds, n_mols, t, d_e)
                                tf.expand_dims(
                                    h_e_history, # (n_bonds, t, d_e)
                                    1),
                                [1, tf.shape(bond_in_mol)[1], 1, 1])),
                        axis=0)

        h_v_bar_history = tf.reduce_sum( # (n_mols, t, d_e)
                tf.multiply(
                    tf.tile(
                        tf.expand_dims(
                            tf.expand_dims(
                                tf.where( # (n_atoms, n_mols)
                                    atom_in_mol,
                                    tf.ones_like(
                                        atom_in_mol,
                                        dtype=tf.float32),
                                    tf.zeros_like(
                                        atom_in_mol,
                                        dtype=tf.float32)),
                                2),
                            3),
                        [1, 1, tf.shape(h_v_history)[1], tf.shape(h_v)[1]]),
                    tf.tile( # (n_atoms, n_mols, t, d_e)
                        tf.expand_dims(
                            h_v_history, # (n_atoms, t, d_e)
                            1),
                        [1, tf.shape(atom_in_mol)[1], 1, 1])),
                axis=0)
        



        y = self.d(
            tf.reshape(
                h_v_bar_history,
                [-1, 6 * 64]),
            tf.reshape(
                h_e_bar_history,
                [-1, 6 * 64]),
            tf.reshape(
                h_u_history,
                [-1, 6 * 64]))

        y = tf.reshape(y, [-1])

        return y


## $f^e$

In [0]:
class f_e(tf.keras.Model):
    """ Featurization of edges.
    Here we split the $\sigma$ and $\pi$ component of bonds
    into two channels, and featurize them seperately.

    """
    def __init__(
            self,
            d_sigma_units=64,
            d_pi_units=64,
            D_E=64):

        super(f_e, self).__init__()
        self.D_E = D_E

        # sigma
        self.d_sigma_0 = tf.Variable(
            tf.zeros(
                shape=(1, d_sigma_units),
                dtype=tf.float32))
        self.d_sigma_1 = tf.keras.layers.Dense(
            int(self.D_E // 2))

        # pi
        self.d_pi_0 = tf.keras.layers.Dense(
            d_pi_units)
        self.d_pi_1 = tf.keras.layers.Dense(
            int(self.D_E // 2))

    @tf.function
    def call(self, x):
        # determine whether there is $\pi$ component in the bond
        has_pi = tf.greater(
            x,
            tf.constant(1, dtype=tf.float32))

        # calculate the sigma component of the bond
        x_sigma = tf.tile(
            self.d_sigma_1(self.d_sigma_0),
            [tf.shape(x, tf.int64)[0], 1])

        # calculate the pi component of the bond
        x_pi = tf.where(
            has_pi,

            # if has pi:
            self.d_pi_1(
                self.d_pi_0(
                    tf.math.subtract(
                        x,
                        tf.constant(1, dtype=tf.float32)))),

            # else:
            tf.zeros(
                shape=(self.D_E // 2, ),
                dtype=tf.float32))

        x = tf.concat(
            [
                x_sigma,
                x_pi
            ],
            axis=1)

        return x

## $f^v$

In [0]:
class f_v(tf.keras.Model):
    def __init__(self, units=64):
        super(f_v, self).__init__()
        self.d = tf.keras.layers.Dense(units)

    @tf.function
    def call(self, x):
        x = tf.one_hot(x, 8)
        x.set_shape([None, 8])
        return self.d(x)

## $\phi^u$

In [0]:
class phi_u(tf.keras.Model):
    def __init__(
            self,
            config=(
                32,
                'elu',
                32,
                'elu'
            ),
            gru_units=32):
        super(phi_u, self).__init__()
        self.d = lime.nets.for_gn.ConcatenateThenFullyConnect(config)
        self.gru = tf.keras.layers.GRU(
            units=gru_units)
        self._state = tf.Variable(
            tf.zeros((32, 32), dtype=tf.float32))

    # @tf.function
    def call(self, h_u, h_u_0, h_e_bar, h_v_bar):
        x = self.d(h_u, h_u_0, h_e_bar, h_v_bar)

        self._state.assign(
            tf.cond(
                tf.reduce_all(
                    tf.equal(
                        h_u,
                        h_u_0)),

                lambda: tf.zeros(
                    (32, 32),
                    tf.float32),

                lambda: self._state))
        
        x = self.gru(
            tf.expand_dims(
                x,
                1))

        return x

## $\phi^v$

In [0]:
class phi_v(tf.keras.Model):
    def __init__(
            self,
            config=(
                32,
                'elu',
                32,
                'elu'
            ),
            gru_units=32):
        super(phi_v, self).__init__()
        self.d = lime.nets.for_gn.ConcatenateThenFullyConnect(config)
        self.gru = tf.keras.layers.GRU(
            units=gru_units)
        self._state = tf.Variable(
            tf.zeros((32, 32), dtype=tf.float32))

    @tf.function
    def call(self, h_v, h_v_0, h_e_bar_i, h_u_i):
        x = self.d(h_v, h_v_0, h_e_bar_i, h_u_i)
        
        self._state.assign(
            tf.cond(
                tf.reduce_all(
                    tf.equal(
                        h_v,
                        h_v_0)),

                lambda: tf.zeros(
                    (32, 32),
                    tf.float32),

                lambda: self._state))
        
        
        x = self.gru(
            tf.expand_dims(
                x,
                1))

        return x

## $\phi^e$

In [0]:
class phi_e(tf.keras.Model):
    def __init__(
            self,
            config=(
                32,
                'elu',
                32,
                'elu'
            ),
            gru_units=32):
        super(phi_e, self).__init__()
        self.d = lime.nets.for_gn.ConcatenateThenFullyConnect(config)
        self.gru = tf.keras.layers.GRU(
            units=gru_units)
        self._state = tf.Variable(
            tf.zeros((32, 32), dtype=tf.float32))

    # @tf.function
    def call(self, h_e, h_e_0, h_left, h_right, h_u_i):
        x = self.d(h_e, h_e_0, h_left, h_right, h_u_i)

        self._state.assign(
            tf.cond(
                tf.reduce_all(
                    tf.equal(
                        h_e,
                        h_e_0)),

                lambda: tf.zeros(
                    (32, 32),
                    tf.float32),

                lambda: self._state))

        x = self.gru(
            tf.expand_dims(
                x,
                1),
            initial_state=self._state)

        return x

## entire structure

In [0]:
gn = gin.probabilistic.gn.GraphNet(
    f_e=f_e(),

    f_v=f_v(),

    f_u=(lambda atoms, adjacency_map, batched_attr_in_mol: \
        tf.tile(
            tf.zeros((1, 64)),
            [
                 tf.math.count_nonzero(batched_attr_in_mol),
                 1
            ]
        )),

    phi_e=lime.nets.for_gn.ConcatenateThenFullyConnect(
            (64, 'sigmoid', 64)),

    phi_v=lime.nets.for_gn.ConcatenateThenFullyConnect(
            (64, 'sigmoid', 64)),

    phi_u=lime.nets.for_gn.ConcatenateThenFullyConnect(
            (64, 'sigmoid', 64)),
 
    f_r=f_r(),

    repeat=5)

optimizer = tf.keras.optimizers.Adam(1e-3)

### backup

In [0]:
    '''
   
    rho_e_v=(lambda h_e, atom_is_connected_to_bonds: tf.math.divide_no_nan(
        tf.reduce_sum(
            tf.where( # here we grab the edges connected to nodes
                tf.tile(
                    tf.expand_dims(
                        atom_is_connected_to_bonds,
                        2),
                    [1, 1, tf.shape(h_e)[1]]),
                tf.tile(
                    tf.expand_dims(
                        h_e,
                        0),
                    [
                        tf.shape(atom_is_connected_to_bonds)[0], # n_atoms
                        1,
                        1
                    ]),
                tf.zeros((
                    tf.shape(atom_is_connected_to_bonds)[0],
                    tf.shape(h_e)[0],
                    tf.shape(h_e)[1]))),
            axis=1),
        tf.tile(
            tf.expand_dims(
                tf.cast(
                    tf.math.count_nonzero(
                        atom_is_connected_to_bonds,
                        1),
                    tf.float32),
                1),
            [
                1, 
                tf.shape(h_e)[1]
            ]
        ))),
    
    
    rho_e_u=(lambda h_e, bond_in_mol: tf.math.divide_no_nan(
        tf.reduce_sum(
            tf.multiply(
                tf.tile(
                    tf.expand_dims(
                        tf.where( # (n_bonds, n_mols)
                            tf.boolean_mask(
                                bond_in_mol,
                                tf.reduce_any(
                                    bond_in_mol,
                                    axis=1),
                                axis=0),
                            tf.ones_like(
                                tf.boolean_mask(
                                    bond_in_mol,
                                    tf.reduce_any(
                                        bond_in_mol,
                                        axis=1),
                                    axis=0),
                                dtype=tf.float32),
                            tf.zeros_like(
                                tf.boolean_mask(
                                    bond_in_mol,
                                    tf.reduce_any(
                                        bond_in_mol,
                                        axis=1),
                                    axis=0),
                                dtype=tf.float32)),
                        2),
                    [1, 1, tf.shape(h_e)[1]]),
                tf.tile( # (n_bonds, n_mols, d_e)
                    tf.expand_dims(
                        h_e, # (n_bonds, d_e)
                        1),
                    [1, tf.shape(bond_in_mol)[1], 1])),
            axis=0),
        tf.tile(
            tf.expand_dims(
                tf.cast(
                    tf.math.count_nonzero(
                        bond_in_mol,
                        axis=0),
                    tf.float32),
                1),
            [1, tf.shape(h_e)[1]]))),

    rho_v_u=(lambda h_v, atom_in_mol: tf.math.divide_no_nan(
        tf.reduce_mean(
            tf.multiply(
                tf.tile(
                    tf.expand_dims(
                        tf.where( # (n_bonds, n_mols)
                            atom_in_mol,
                            tf.ones_like(
                                atom_in_mol,
                                dtype=tf.float32),
                            tf.zeros_like(
                                atom_in_mol,
                                dtype=tf.float32)),
                        2),
                    [1, 1, tf.shape(h_v)[1]]),
                tf.tile( # (n_bonds, n_mols, d_e)
                    tf.expand_dims(
                        h_v, # (n_bonds, d_e)
                        1),
                    [1, tf.shape(atom_in_mol)[1], 1])),
            axis=0),
        tf.tile(
            tf.expand_dims(
                tf.cast(
                    tf.math.count_nonzero(
                        atom_in_mol,
                        axis=0),
                    tf.float32),
                1),
            [1, tf.shape(h_v)[1]]))),


            
    '''

# Train

In [0]:
ds_tr = ds_tr.shuffle(n_global_te * 5)

In [0]:
N_EPOCH = 30
losses = tf.constant([-1], dtype=tf.float32)
for dummy_idx in range(N_EPOCH):
    print(dummy_idx)
    for atoms, adjacency_map, atom_in_mol, bond_in_mol, y, y_mask \
        in ds_tr:
        with tf.GradientTape() as tape:
            y_hat = gn(
                atoms,
                adjacency_map,
                atom_in_mol=atom_in_mol,
                bond_in_mol=bond_in_mol,
                batched_attr_in_mol=y_mask)

            y = tf.boolean_mask(
                y,
                y_mask)
            

            loss = tf.losses.mean_squared_error(y, y_hat)
            
            print(loss)
            
        losses = tf.concat(
            [
                losses,
                tf.expand_dims(
                    tf.reduce_sum(loss), 0)
            ],
            axis=0)
        
        variables = gn.variables
        grad = tape.gradient(loss, variables)
        optimizer.apply_gradients(
            zip(grad, variables))

In [0]:
losses = losses[1:]

In [0]:
from matplotlib import pyplot as plt
plt.style.use('ggplot')
plt.rc('font', family='serif')

plt.figure(figsize=(10, 5))
plt.plot(tf.math.log(losses).numpy())
plt.xlabel('# Training Batch', fontsize=14)
plt.ylabel('$log(\mathtt{MSE}$)', fontsize=14)

In [0]:
plt.style.use('ggplot')
plt.rc('font', family='serif')


plt.figure(figsize=(10, 5))
plt.plot(losses.numpy())
plt.xlabel('# Training Batch', fontsize=14)
plt.ylabel('MSE', fontsize=14)

In [0]:
y_true_tr = tf.constant([-1], dtype=tf.float32)
y_pred_tr = tf.constant([-1], dtype=tf.float32)

for atoms, adjacency_map, atom_in_mol, bond_in_mol, y, y_mask \
    in ds_tr:
    with tf.GradientTape() as tape:
        y_hat = gn(
            atoms,
            adjacency_map,
            atom_in_mol=atom_in_mol,
            bond_in_mol=bond_in_mol,
            batched_attr_in_mol=y_mask)
        
        y = tf.boolean_mask(
            y,
            y_mask)

        y_true_tr = tf.concat(
            [
                y_true_tr,
                tf.reshape(
                    y,
                    [-1])
            ],
            axis=0)
        
        y_pred_tr = tf.concat(
            [
                y_pred_tr,
                tf.reshape(
                    y_hat,
                    [-1])
            ],
            axis=0)
        
y_true_tr = y_true_tr[1:]
y_pred_tr = y_pred_tr[1:]


plt.figure(figsize=(40, 40))
fig, ax = plt.subplots()

ax.scatter(
    y_true_tr.numpy(),
    y_pred_tr.numpy(),
    alpha=0.5,
    s=4)

ax.tick_params(labelsize=10)
ax.set_xlabel('$y_\mathtt{true}$', fontsize=14)
ax.set_ylabel('$y_\mathtt{pred}$', fontsize=14)
ax.set_title('Predicted vs True Value on Training Set')
        
    
from sklearn import metrics

print('$r^2$ score on training data is %s' % metrics.r2_score(
    y_true_tr, y_pred_tr))
print('MSE on training data is %s' % tf.losses.mean_squared_error(
    y_true_tr, y_pred_tr))

In [0]:
y_true_te = tf.constant([-1], dtype=tf.float32)
y_pred_te = tf.constant([-1], dtype=tf.float32)

for atoms, adjacency_map, atom_in_mol, bond_in_mol, y, y_mask \
    in ds_te:
    with tf.GradientTape() as tape:
        y_hat = gn(
            atoms,
            adjacency_map,
            atom_in_mol=atom_in_mol,
            bond_in_mol=bond_in_mol,
            batched_attr_in_mol=y_mask)

        y = tf.boolean_mask(
            y,
            y_mask)

        y_true_te = tf.concat(
            [
                y_true_te,
                tf.reshape(
                    y,
                    [-1])
            ],
            axis=0)
        
        y_pred_te = tf.concat(
            [
                y_pred_te,
                tf.reshape(
                    y_hat,
                    [-1])
            ],
            axis=0)
        
y_true_te = y_true_te[1:]
y_pred_te = y_pred_te[1:]


plt.figure(figsize=(40, 40))
fig, ax = plt.subplots()

ax.scatter(
    y_true_te.numpy(),
    y_pred_te.numpy(),
    alpha=0.5,
    s=4)

ax.tick_params(labelsize=10)
ax.set_xlabel('$y_\mathtt{true}$', fontsize=14)
ax.set_ylabel('$y_\mathtt{pred}$', fontsize=14)
ax.set_title('Predicted vs True Value on Training Set')

from sklearn import metrics

print('$r^2$ score on training data is %s' % metrics.r2_score(
    y_true_te, y_pred_te))
print('MSE on training data is' % tf.losses.mean_squared_error(
    y_true_te, y_pred_te))