In [1]:
import numpy as np
import tensorflow as tf
ks = tf.keras

In [2]:
from kgcnn.data.datasets.ESOLDataset import ESOLDataset
from sklearn.model_selection import train_test_split
dataset = ESOLDataset()
dataset.map_list(method="set_edge_indices_reverse")

INFO:kgcnn.data.download:Checking and possibly downloading dataset with name ESOL
INFO:kgcnn.data.download:Dataset directory located at C:\Users\patri\.kgcnn\datasets
INFO:kgcnn.data.download:Dataset directory found. Done.
INFO:kgcnn.data.download:Dataset found. Done.
INFO:kgcnn.data.ESOL:Found SDF C:\Users\patri\.kgcnn\datasets\ESOL\delaney-processed.sdf of pre-computed structures.
INFO:kgcnn.data.ESOL:Read molecules from mol-file.
INFO:kgcnn.data.ESOL: ... process molecules 0 from 1128
INFO:kgcnn.data.ESOL: ... process molecules 1000 from 1128
INFO:kgcnn.mol.encoder:OneHotEncoder Symbol found ['O', 'C', 'N', 'S', 'Cl', 'P', 'F', 'I', 'Br']
INFO:kgcnn.mol.encoder:OneHotEncoder Hybridization found [rdkit.Chem.rdchem.HybridizationType.SP3, rdkit.Chem.rdchem.HybridizationType.SP, rdkit.Chem.rdchem.HybridizationType.SP2]
INFO:kgcnn.mol.encoder:OneHotEncoder TotalDegree found [2, 4, 1, 3]
INFO:kgcnn.mol.encoder:OneHotEncoder TotalNumHs found [1, 2, 0, 3, 4]
INFO:kgcnn.mol.encoder:OneHotEnc

<kgcnn.data.datasets.ESOLDataset.ESOLDataset at 0x20cdb0f2b20>

In [3]:
from kgcnn.literature.DMPNN import make_model
from tensorflow.keras.optimizers import Adam

In [4]:
model_config = {
    "name": "DMPNN",
    "inputs": [
        {"shape": [None, 41], "name": "node_attributes", "dtype": "float32", "ragged": True},
        {"shape": [None, 11], "name": "edge_attributes", "dtype": "float32", "ragged": True},
        {"shape": [None, 2], "name": "edge_indices", "dtype": "int64", "ragged": True},
        {"shape": [None, 1], "name": "edge_indices_reverse", "dtype": "int64", "ragged": True}
    ],
    "input_embedding": {"node": {"input_dim": 95, "output_dim": 64},
                        "edge": {"input_dim": 5, "output_dim": 64}},
    "pooling_args": {"pooling_method": "sum"},
    "edge_initialize": {"units": 128, "use_bias": True, "activation": "relu"},
    "edge_dense": {"units": 128, "use_bias": True, "activation": "linear"},
    "edge_activation": {"activation": "relu"},
    "node_dense": {"units": 128, "use_bias": True, "activation": "relu"},
    "verbose": 10, "depth": 5,
    "dropout": {"rate": 0.1},
    "output_embedding": "node",  # For node regression
    "output_to_tensor": False,  # Return ragged tensor output!
    "output_mlp": {
        "use_bias": [True, True, False], "units": [64, 32, 1],
        "activation": ["relu", "relu", "linear"]
    }
}
# Test making model
model = make_model(**model_config)

INFO:kgcnn.utils.models:Updated model kwargs:
INFO:kgcnn.utils.models:{'name': 'DMPNN', 'inputs': [{'shape': [None, 41], 'name': 'node_attributes', 'dtype': 'float32', 'ragged': True}, {'shape': [None, 11], 'name': 'edge_attributes', 'dtype': 'float32', 'ragged': True}, {'shape': [None, 2], 'name': 'edge_indices', 'dtype': 'int64', 'ragged': True}, {'shape': [None, 1], 'name': 'edge_indices_reverse', 'dtype': 'int64', 'ragged': True}], 'input_embedding': {'node': {'input_dim': 95, 'output_dim': 64}, 'edge': {'input_dim': 5, 'output_dim': 64}, 'graph': {'input_dim': 100, 'output_dim': 64}}, 'pooling_args': {'pooling_method': 'sum'}, 'use_graph_state': False, 'edge_initialize': {'units': 128, 'use_bias': True, 'activation': 'relu'}, 'edge_dense': {'units': 128, 'use_bias': True, 'activation': 'linear'}, 'edge_activation': {'activation': 'relu'}, 'node_dense': {'units': 128, 'use_bias': True, 'activation': 'relu'}, 'verbose': 10, 'depth': 5, 'dropout': {'rate': 0.1}, 'output_embedding': '

In [5]:
dataset.clean(model_config["inputs"])

INFO:kgcnn.data.ESOL:Property edge_attributes is an empty list for graph 934.
INFO:kgcnn.data.ESOL:Property edge_indices is an empty list for graph 934.
INFO:kgcnn.data.ESOL:Property edge_indices_reverse is an empty list for graph 934.


array([934])

In [6]:
for i in range(len(dataset)):
    dataset[i].update({"node_label": np.array(np.expand_dims(dataset[i]["node_number"], axis=-1), dtype="float32")})

In [7]:
train_index, test_index = train_test_split(np.arange(len(dataset)), test_size=0.25, random_state=42)
x_train = dataset[train_index].tensor(model_config["inputs"])
y_train = dataset[train_index].tensor({"name": "node_label", "ragged": True})
x_valid = dataset[test_index].tensor(model_config["inputs"])
y_valid = dataset[test_index].tensor({"name": "node_label", "ragged": True, "dtype": "float32"})
print("inputs:\n", [x.shape for x in x_train])
print("outputs:\n", y_train.shape)

inputs:
 [TensorShape([845, None, 41]), TensorShape([845, None, 11]), TensorShape([845, None, 2]), TensorShape([845, None, 1])]
outputs:
 (845, None, 1)


In [8]:
test = model.predict(x_valid)
print(test.shape)

(282, None, 1)


In [9]:
from kgcnn.metrics.loss import RaggedMeanAbsoluteError

In [10]:
model.compile(
    loss=RaggedMeanAbsoluteError(),
    optimizer=Adam(learning_rate=1e-03),
    metrics=["mean_absolute_error"],
)

model.fit(
    x_train, y_train,
    validation_data=(x_valid, y_valid),
    shuffle=True,
    batch_size=32,
    epochs=100,
    verbose=2,
)

Epoch 1/100




27/27 - 4s - loss: 4.1462 - mean_absolute_error: 4.1089 - val_loss: 2.9602 - val_mean_absolute_error: 2.9550 - 4s/epoch - 140ms/step
Epoch 2/100
27/27 - 0s - loss: 2.5284 - mean_absolute_error: 2.5265 - val_loss: 2.2012 - val_mean_absolute_error: 2.1986 - 462ms/epoch - 17ms/step
Epoch 3/100
27/27 - 0s - loss: 2.0060 - mean_absolute_error: 2.0026 - val_loss: 1.6461 - val_mean_absolute_error: 1.6429 - 469ms/epoch - 17ms/step
Epoch 4/100
27/27 - 0s - loss: 1.4083 - mean_absolute_error: 1.4049 - val_loss: 1.0181 - val_mean_absolute_error: 1.0150 - 470ms/epoch - 17ms/step
Epoch 5/100
27/27 - 0s - loss: 0.9548 - mean_absolute_error: 0.9483 - val_loss: 0.8509 - val_mean_absolute_error: 0.8488 - 455ms/epoch - 17ms/step
Epoch 6/100
27/27 - 0s - loss: 0.7618 - mean_absolute_error: 0.7531 - val_loss: 0.6354 - val_mean_absolute_error: 0.6331 - 456ms/epoch - 17ms/step
Epoch 7/100
27/27 - 0s - loss: 0.5971 - mean_absolute_error: 0.5927 - val_loss: 0.4604 - val_mean_absolute_error: 0.4585 - 464ms/epo

Epoch 57/100
27/27 - 1s - loss: 0.0349 - mean_absolute_error: 0.0347 - val_loss: 0.0337 - val_mean_absolute_error: 0.0338 - 526ms/epoch - 19ms/step
Epoch 58/100
27/27 - 1s - loss: 0.0303 - mean_absolute_error: 0.0301 - val_loss: 0.0595 - val_mean_absolute_error: 0.0597 - 526ms/epoch - 19ms/step
Epoch 59/100
27/27 - 1s - loss: 0.0426 - mean_absolute_error: 0.0423 - val_loss: 0.0578 - val_mean_absolute_error: 0.0580 - 515ms/epoch - 19ms/step
Epoch 60/100
27/27 - 1s - loss: 0.0508 - mean_absolute_error: 0.0508 - val_loss: 0.0371 - val_mean_absolute_error: 0.0373 - 525ms/epoch - 19ms/step
Epoch 61/100
27/27 - 1s - loss: 0.0329 - mean_absolute_error: 0.0329 - val_loss: 0.0277 - val_mean_absolute_error: 0.0279 - 516ms/epoch - 19ms/step
Epoch 62/100
27/27 - 1s - loss: 0.0239 - mean_absolute_error: 0.0236 - val_loss: 0.0242 - val_mean_absolute_error: 0.0243 - 516ms/epoch - 19ms/step
Epoch 63/100
27/27 - 1s - loss: 0.0270 - mean_absolute_error: 0.0267 - val_loss: 0.0349 - val_mean_absolute_erro

<keras.callbacks.History at 0x20ce2573fd0>

In [11]:
test2 = model.predict(x_valid)



In [12]:
test2[0], y_valid[0]

(<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
 array([[6.038315 ],
        [6.0273557],
        [6.0275574],
        [6.030712 ],
        [6.0420337]], dtype=float32)>,
 <tf.Tensor: shape=(5, 1), dtype=float32, numpy=
 array([[6.],
        [6.],
        [6.],
        [6.],
        [6.]], dtype=float32)>)