In [None]:
!pip install rdkit==2023.9.3 tensorflow tensorflow_gnn

In [1]:
from os import environ
environ['CUDA_VISIBLE_DEVICES']='-1'
import tensorflow as tf
import tensorflow_gnn as tfgnn

def graph_tensor_spec():
  spec = tfgnn.GraphTensorSpec.from_piece_specs(
      node_sets_spec = {
        "atom": tfgnn.NodeSetSpec.from_field_specs(
          features_spec = {
            tfgnn.HIDDEN_STATE: tf.TensorSpec((None, 118), tf.float32)
          },
          sizes_spec = tf.TensorSpec((1,), tf.int32)
        )
      },
      edge_sets_spec = {
        "bond": tfgnn.EdgeSetSpec.from_field_specs(
          features_spec = {
            tfgnn.HIDDEN_STATE: tf.TensorSpec((None, 22), tf.float32)
          },
          sizes_spec = tf.TensorSpec((1,), tf.int32),
          adjacency_spec = tfgnn.AdjacencySpec.from_incident_node_sets("atom", "atom")
        )
      }
  )
  return spec

def FeatureExtract(channels = 256, layer_num = 4, drop_rate = 0.5):
  inputs = tf.keras.Input(type_spec = graph_tensor_spec())
  results = inputs.merge_batch_to_components() # merge graphs of a batch to one graph as different components
  results = tfgnn.keras.layers.MapFeatures(
    node_sets_fn = lambda node_set, *, node_set_name: tf.keras.layers.Dense(channels)(node_set[tfgnn.HIDDEN_STATE]),
    edge_sets_fn = lambda edge_set, *, edge_set_name: tf.keras.layers.Dense(channels)(edge_set[tfgnn.HIDDEN_STATE]))(results)
  # only update node vectors
  for i in range(layer_num):
    results = tfgnn.keras.layers.GraphUpdate(
      node_sets = {
        "atom": tfgnn.keras.layers.NodeSetUpdate(
          edge_set_inputs = {
            "bond": tfgnn.keras.layers.SimpleConv(
              message_fn = tf.keras.Sequential([
                tf.keras.layers.Dense(channels, activation = tf.keras.activations.gelu, kernel_regularizer = tf.keras.regularizers.l2(5e-4), bias_regularizer = tf.keras.regularizers.l2(5e-4)),
                tf.keras.layers.Dropout(drop_rate)
              ]),
              reduce_type = "sum",
              receiver_tag = tfgnn.TARGET
            )
          },
          next_state = tfgnn.keras.layers.NextStateFromConcat(
            transformation = tf.keras.Sequential([
              tf.keras.layers.Dense(channels, activation = tf.keras.activations.gelu, kernel_regularizer = tf.keras.regularizers.l2(5e-4), bias_regularizer = tf.keras.regularizers.l2(5e-4)),
              tf.keras.layers.Dropout(drop_rate)
            ])
          )
        )
      }
    )(results)
  # graph pooling
  results = tfgnn.keras.layers.Pool(tag = tfgnn.CONTEXT, reduce_type = "mean", node_set_name = "atom")(results)
  return tf.keras.Model(inputs = inputs, outputs = results)

def Predictor(channels = 256, layer_num = 4, drop_rate = 0.5):
  inputs = tf.keras.Input(type_spec = graph_tensor_spec())
  results = FeatureExtract(channels, layer_num, drop_rate)(inputs)
  results = tf.keras.layers.Dense(2, activation = tf.keras.activations.softmax)(results)
  return tf.keras.Model(inputs = inputs, outputs = results)


2024-02-06 13:58:47.175291: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-06 13:58:47.176617: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-06 13:58:47.202311: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-06 13:58:47.202342: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-06 13:58:47.202368: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

In [13]:
from requests import get
from shutil import rmtree
from os import system, remove
from os.path import exists, join
from hashlib import md5

if exists('ckpt.tar.gz'): remove('ckpt.tar.gz')
response = get('https://raw.githubusercontent.com/breadbread1984/tfgnn_example/classification/ckpt.tar.gz')
with open('ckpt.tar.gz', 'wb') as f:
  f.write(response.content)
assert '2a405648982dd61bfe7d764b7339e8ed' == md5(response.content).hexdigest()
if exists('ckpt'): rmtree('ckpt')
system('tar xzvf ckpt.tar.gz')
predictor = Predictor()
predictor.load_weights(join('ckpt', 'ckpt', 'variables', 'variables'))

ckpt/
ckpt/validation/
ckpt/validation/events.out.tfevents.1707197549.dgxa100svr02.908674.1.v2
ckpt/ckpt/
ckpt/ckpt/keras_metadata.pb
ckpt/ckpt/assets/
ckpt/ckpt/variables/
ckpt/ckpt/variables/variables.data-00000-of-00001
ckpt/ckpt/variables/variables.index
ckpt/ckpt/saved_model.pb
ckpt/ckpt/fingerprint.pb
ckpt/train/
ckpt/train/events.out.tfevents.1707197543.dgxa100svr02.908674.0.v2


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7fc804a7f210>

In [14]:
from rdkit import Chem

def smiles_to_sample(smiles):
  molecule = Chem.MolFromSmiles(smiles)
  nodes = list()
  edges = list()
  for atom in molecule.GetAtoms():
    idx = atom.GetIdx()
    nodes.append(atom.GetAtomicNum())
    for neighbor_atom in atom.GetNeighbors():
      neighbor_idx = neighbor_atom.GetIdx()
      bond = molecule.GetBondBetweenAtoms(idx, neighbor_idx)
      edges.append((idx, neighbor_idx, bond.GetBondType()))
  nodes = tf.stack(nodes, axis = 0) # nodes.shape = (node_num,)
  edges = tf.stack(edges, axis = 0) # edges.shape = (edge_num, 3)
  graph = tfgnn.GraphTensor.from_pieces(
    node_sets = {
      "atom": tfgnn.NodeSet.from_fields(
        sizes = tf.constant([nodes.shape[0]]),
        features = {
          tfgnn.HIDDEN_STATE: tf.one_hot(nodes, 118)
        }
      )
    },
    edge_sets = {
      "bond": tfgnn.EdgeSet.from_fields(
        sizes = tf.constant([edges.shape[0]]),
        adjacency = tfgnn.Adjacency.from_indices(
          source = ("atom", edges[:,0]),
          target = ("atom", edges[:,1])
        ),
        features = {
          tfgnn.HIDDEN_STATE: tf.one_hot(edges[:,2], 22)
        }
      )
    }
  )
  return graph

In [15]:
csv = open('/bohr/ai4scup-cns-5zkz/v3/mol_test.csv', 'r')
output = open('submission.csv', 'w')
output.write('SMILES,TARGET\n')
for line,row in enumerate(csv.readlines()):
  if line == 0: continue
  smiles, label = row.split(',')
  graph = smiles_to_sample(smiles)
  pred = predictor(graph)
  output.write('%s,%d' % (smiles, 1 if pred[0,1] > 0.5 else 0) + '\n')
output.close()
csv.close()

CC(CCC(=O)O)C1CCC2C3C(CC(=O)C12C)C4(C)CCC(=O)CC4CC3=O tf.Tensor([[0.9771796  0.02282036]], shape=(1, 2), dtype=float32)
CC(=O)c1ccc2c(c1)Sc3ccccc3N2CCCN4CCN(CC4)CCO tf.Tensor([[0.16945204 0.8305479 ]], shape=(1, 2), dtype=float32)
CCCN(CCC)C(=O)C(CCC(=O)OCCCN1CCN(CCOC(=O)Cc2c(C)n(C(=O)c3ccc(Cl)cc3)c4ccc(OC)cc24)CC1)NC(=O)c5ccccc5 tf.Tensor([[0.25035128 0.7496487 ]], shape=(1, 2), dtype=float32)
CC(C)CCCC(C)CCCC(C)CCCC1(C)CCc2c(C)c(O)c(C)c(C)c2O1 tf.Tensor([[0.9731333  0.02686668]], shape=(1, 2), dtype=float32)
CCCN(CCC)CCc1cccc2c1CC(=O)N2 tf.Tensor([[0.14076118 0.85923886]], shape=(1, 2), dtype=float32)
CC(C(=O)c1cccc(c1)Cl)NC(C)(C)C tf.Tensor([[0.81715995 0.18284   ]], shape=(1, 2), dtype=float32)
CC(=O)N(c1onc(C)c1C)S(=O)(=O)c2ccc(N)cc2 tf.Tensor([[0.9826006  0.01739932]], shape=(1, 2), dtype=float32)
CC1=C(N2C(SC1)C(NC(=O)C(N)c3ccccc3)C2=O)C(=O)O tf.Tensor([[0.970674   0.02932605]], shape=(1, 2), dtype=float32)
CC(=O)CC(C1=CC=CC=C1)C1=C(O)C2=C(OC1=O)C=CC=C2 tf.Tensor([[0.9431226  0.