In [1]:
import numpy as np

data = np.load('data/tox21_graph.npz')
atom_feat_tr = data['atom_feat_tr']
atom_feat_val = data['atom_feat_val']
atom_feat_te = data['atom_feat_te']

edge_feat_tr = data['edge_feat_tr']
edge_feat_val = data['edge_feat_val']
edge_feat_te = data['edge_feat_te']

y_tr = data['y_tr']
y_val = data['y_val']
y_te = data['y_te']

y_notnan_tr = data['y_notnan_tr']
y_notnan_val = data['y_notnan_val']
y_notnan_te = data['y_notnan_te']

print(atom_feat_tr.shape, atom_feat_val.shape, atom_feat_te.shape)
print(edge_feat_tr.shape, edge_feat_val.shape, edge_feat_te.shape)
print(y_tr.shape, y_val.shape, y_te.shape)

(11758, 132, 63) (295, 132, 63) (645, 132, 63)
(11758, 132, 132, 5) (295, 132, 132, 5) (645, 132, 132, 5)
(11758, 12) (295, 12) (645, 12)


In [2]:
import tensorflow as tf
from tensorflow.python.keras import backend as K
from functools import partial
from ggnnmols.models import GGNN


node_dim = atom_feat_tr.shape[-2]
node_feat_dim = atom_feat_tr.shape[-1]
edge_feat_dim = edge_feat_tr.shape[-1]
hidden_size = 512
output_dim = y_tr.shape[-1]
num_prop = 10

model = GGNN(node_dim, node_feat_dim, edge_feat_dim, hidden_size, output_dim, num_prop=num_prop, missing_ys=True)
node_inputs = tf.keras.layers.Input(shape=atom_feat_tr.shape[1:])
edge_inputs = tf.keras.layers.Input(shape=edge_feat_tr.shape[1:])
valid_y_inputs = tf.keras.layers.Input(shape=y_tr.shape[-1:])
outputs = model([node_inputs, edge_inputs, valid_y_inputs])

def valid_binary_crossentropy(y_true, y_pred, valid_y):
    return K.sum(K.binary_crossentropy(y_true, y_pred) * valid_y, axis=-1)

def valid_acc(y_true, y_pred, valid_y):
    eq_all = K.cast(K.equal(y_true, K.round(y_pred)), K.floatx())
    return K.sum(eq_all * valid_y) / K.sum(valid_y)

bxen = partial(valid_binary_crossentropy, valid_y=valid_y_inputs)
acc = partial(valid_acc, valid_y=valid_y_inputs)
acc.__name__ = 'acc'

optimizer = tf.keras.optimizers.Adam(lr=0.0001)
model.compile(optimizer=optimizer, loss=bxen, metrics=[acc])

Instructions for updating:
Colocations handled automatically by placer.


In [4]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

filepath = 'saved_models/weights.best.hdf5'
checkpoint = ModelCheckpoint(filepath, 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='auto', 
                             save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor = 0.4,
                              patience = 7,
                              verbose=1,
                              min_lr = 0)


model.fit(x=[atom_feat_tr, edge_feat_tr, y_notnan_tr], y=y_tr, 
          validation_data=([atom_feat_val, edge_feat_val, y_notnan_val], y_val), 
          epochs=50, batch_size=16, callbacks=[reduce_lr, checkpoint])

Train on 11758 samples, validate on 295 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/50
Epoch 00001: val_loss improved from inf to 2.70933, saving model to weights.best.hdf5
Epoch 2/50
Epoch 00002: val_loss improved from 2.70933 to 2.61908, saving model to weights.best.hdf5
Epoch 3/50
Epoch 00003: val_loss improved from 2.61908 to 2.54218, saving model to weights.best.hdf5
Epoch 4/50
Epoch 00004: val_loss did not improve from 2.54218
Epoch 5/50
Epoch 00005: val_loss improved from 2.54218 to 2.52882, saving model to weights.best.hdf5
Epoch 6/50
Epoch 00006: val_loss improved from 2.52882 to 2.48673, saving model to weights.best.hdf5
Epoch 7/50
Epoch 00007: val_loss improved from 2.48673 to 2.41153, saving model to weights.best.hdf5
Epoch 8/50
Epoch 00008: val_loss did not improve from 2.41153
Epoch 9/50
Epoch 00009: val_loss did not improve from 2.41153
Epoch 10/50
Epoch 00010: val_loss did not improve from 2.41153
Epoch 11/50
Epoch 00011: val_loss did not improve fro

Epoch 28/50
Epoch 00028: val_loss did not improve from 2.30608
Epoch 29/50
Epoch 00029: val_loss did not improve from 2.30608
Epoch 30/50
Epoch 00030: val_loss did not improve from 2.30608
Epoch 31/50
Epoch 00031: ReduceLROnPlateau reducing learning rate to 1.5999999595806004e-05.

Epoch 00031: val_loss did not improve from 2.30608
Epoch 32/50
Epoch 00032: val_loss did not improve from 2.30608
Epoch 33/50
Epoch 00033: val_loss did not improve from 2.30608
Epoch 34/50
Epoch 00034: val_loss did not improve from 2.30608
Epoch 35/50
Epoch 00035: val_loss did not improve from 2.30608
Epoch 36/50
Epoch 00036: val_loss did not improve from 2.30608
Epoch 37/50
Epoch 00037: val_loss did not improve from 2.30608
Epoch 38/50
Epoch 00038: ReduceLROnPlateau reducing learning rate to 6.399999983841554e-06.

Epoch 00038: val_loss did not improve from 2.30608
Epoch 39/50
Epoch 00039: val_loss did not improve from 2.30608
Epoch 40/50
Epoch 00040: val_loss did not improve from 2.30608
Epoch 41/50
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7f473eead2e8>