<a href="https://colab.research.google.com/github/choderalab/gin/blob/master/tonic/scripts/mini_example_ESOL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PREP

In [0]:
! wget https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/molnet_publish/ESOL.zip
! unzip *.zip
! git clone https://github.com/choderalab/gin.git

--2019-05-19 19:41:58--  https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/molnet_publish/ESOL.zip
Resolving s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)... 52.219.120.8
Connecting to s3-us-west-1.amazonaws.com (s3-us-west-1.amazonaws.com)|52.219.120.8|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 32317 (32K) [application/zip]
Saving to: ‘ESOL.zip’


2019-05-19 19:41:59 (184 KB/s) - ‘ESOL.zip’ saved [32317/32317]

Archive:  ESOL.zip
  inflating: delaney-processed.csv   
  inflating: ESOL_README             
Cloning into 'gin'...
remote: Enumerating objects: 217, done.[K
remote: Counting objects: 100% (217/217), done.[K
remote: Compressing objects: 100% (148/148), done.[K
remote: Total 217 (delta 118), reused 149 (delta 59), pack-reused 0[K
Receiving objects: 100% (217/217), 173.96 KiB | 379.00 KiB/s, done.
Resolving deltas: 100% (118/118), done.


In [0]:
! pip uninstall gin-config -y

Uninstalling gin-config-0.1.4:
  Successfully uninstalled gin-config-0.1.4


In [0]:
import os
import sys
import tensorflow as tf
tf.enable_eager_execution()
sys.path.append('/content/gin')
import gin
import tonic
import pandas as pd
import numpy as np

# DATA

In [0]:
# read data
df = pd.read_csv('delaney-processed.csv')
x_array = df[['smiles']].values.flatten()
y_array = df[['measured log solubility in mols per litre']].values.flatten()
y_array = (y_array - np.mean(y_array) / np.std(y_array))

ds = gin.i_o.from_smiles.smiles_to_mols_with_attributes(x_array, y_array)

# MODELS

In [0]:
class f_r(tf.keras.Model):
    def __init__(self, config):
        super(f_r, self).__init__()
        self.d = tonic.nets.for_gn.ConcatenateThenFullyConnect(config)

    def call(self, h_e, h_v, h_u):
        y = self.d(h_u)[0][0]
        return y


gn = gin.probabilistic.gn.GraphNet(
    f_e=tf.keras.layers.Dense(128),

    f_v=tf.keras.layers.Lambda(
        lambda x: tf.keras.layers.Dense(128)(tf.one_hot(x, 8))),

    f_u=(lambda x, y: tf.zeros((1, 128), dtype=tf.float32)),

    phi_e=tonic.nets.for_gn.ConcatenateThenFullyConnect((128, 'elu', 128, 'elu')),

    phi_v=tonic.nets.for_gn.ConcatenateThenFullyConnect((128, 'elu', 128, 'elu')),

    phi_u=tonic.nets.for_gn.ConcatenateThenFullyConnect((128, 'elu', 128, 'elu')),

    rho_e_v=(lambda h_e, atom_is_connected_to_bonds: tf.reduce_sum(
        tf.where(
            tf.tile(
                tf.expand_dims(
                    atom_is_connected_to_bonds,
                    2),
                [1, 1, h_e.shape[1]]),
            tf.tile(
                tf.expand_dims(
                    h_e,
                    0),
                [
                    atom_is_connected_to_bonds.shape[0], # n_atoms
                    1,
                    1
                ]),
            tf.zeros((
                atom_is_connected_to_bonds.shape[0],
                h_e.shape[0],
                h_e.shape[1]))),
        axis=1)),

    rho_e_u=(lambda x: tf.expand_dims(tf.reduce_sum(x, axis=0), 0)),

    rho_v_u=(lambda x: tf.expand_dims(tf.reduce_sum(x, axis=0), 0)),

    f_r=f_r((128, 'tanh', 128, 1)),

    repeat=3)

In [0]:
optimizer = tf.train.AdamOptimizer(1e-2)
n_epoch = 50
batch_size = 32
batch_idx = 0
loss = 0
tape = tf.GradientTape()

for dummy_idx in range(n_epoch):
    for atoms, adjacency_map, y in ds:
        mol = [atoms, adjacency_map]

        with tape:
            y_hat = gn(mol)
            loss += tf.clip_by_norm(
                tf.losses.mean_squared_error(y, y_hat),
                1e8)
            batch_idx += 1

        if batch_idx == batch_size:
            print(loss)
            variables = gn.variables
            grad = tape.gradient(loss, variables)
            optimizer.apply_gradients(
                zip(grad, variables),
                tf.train.get_or_create_global_step())
            loss = 0
            batch_idx = 0
            tape = tf.GradientTape()


tf.Tensor(761.1897, shape=(), dtype=float32)
tf.Tensor(1617.4166, shape=(), dtype=float32)
tf.Tensor(195.77788, shape=(), dtype=float32)
tf.Tensor(873.9001, shape=(), dtype=float32)
tf.Tensor(154.12161, shape=(), dtype=float32)
tf.Tensor(206.19958, shape=(), dtype=float32)
tf.Tensor(318.7833, shape=(), dtype=float32)
tf.Tensor(212.49037, shape=(), dtype=float32)
tf.Tensor(166.59462, shape=(), dtype=float32)
tf.Tensor(333.10806, shape=(), dtype=float32)
tf.Tensor(152.7135, shape=(), dtype=float32)
tf.Tensor(79.00046, shape=(), dtype=float32)
tf.Tensor(121.82396, shape=(), dtype=float32)
tf.Tensor(108.650764, shape=(), dtype=float32)
tf.Tensor(149.93909, shape=(), dtype=float32)
tf.Tensor(146.05124, shape=(), dtype=float32)
tf.Tensor(92.687584, shape=(), dtype=float32)
tf.Tensor(194.21231, shape=(), dtype=float32)
tf.Tensor(277.46942, shape=(), dtype=float32)
tf.Tensor(116.96087, shape=(), dtype=float32)
tf.Tensor(141.47734, shape=(), dtype=float32)
tf.Tensor(215.33748, shape=(), dtype=f

KeyboardInterrupt: ignored