In [1]:
import os
import sys
sys.path.append('modules')

import pandas as pd
import numpy as np
import gzip

import warnings
from tqdm import tqdm

import pickle
import gzip
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import SDWriter
from rdkit.Chem import ForwardSDMolSupplier

from itertools import islice
import tensorflow as tf
from nfp.preprocessing import MolAPreprocessor, GraphSequence

from keras.models import load_model
from kgcnn.layers.casting import ChangeTensorType
from kgcnn.layers.conv.painn_conv import PAiNNUpdate, EquivariantInitialize
from kgcnn.layers.conv.painn_conv import PAiNNconv
from kgcnn.layers.geom import NodeDistanceEuclidean, EdgeDirectionNormalized, CosCutOffEnvelope, \
    NodePosition, ShiftPeriodicLattice
from kgcnn.layers.modules import LazyAdd, OptionalInputEmbedding
from kgcnn.layers.mlp import GraphMLP, MLP
from modules.pooling import PoolingNodes
from modules.bessel_basis import BesselBasisLayer

from kgcnn.layers.norm import GraphLayerNormalization, GraphBatchNormalization
from kgcnn.model.utils import update_model_kwargs

tf.get_logger().setLevel('ERROR')
os.environ["CUDA_VISIBLE_DEVICES"]="-1"

2025-05-15 10:42:32.835755: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-15 10:42:33.316183: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/abhijeet/anaconda3/envs/dl_nmr2/lib/:/home/abhijeet/.local/lib/python3.10/site-packages/nvidia/cudnn/lib
2025-05-15 10:42:33.316260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/abhijeet/an

In [2]:
def getIndices(mol):
    Cs = [x for x in mol.GetAtoms() if x.GetAtomicNum()==6]
    return np.array([x.GetIdx() for x in Cs]).astype(int)

#Embed SMILES to Mols
smiles_list = ["CCO", "CC(=O)O", "c1ccccc1"]
mols = []
for i, smi in enumerate(smiles_list):
    mol = Chem.MolFromSmiles(smi)
    mol = Chem.AddHs(mol)

    AllChem.EmbedMolecule(mol)
    AllChem.MMFFOptimizeMolecule(mol)

    mol.SetProp("_Name", f"Molecule_{i+1}")
    mols.append(mol)

#Get Prediction Targets
smiles = []
atom_indices = []
for i, mol in enumerate(mols):
    C_indices = getIndices(mol)
    atom_indices.append(C_indices)
    smiles.extend([smiles_list[i] for x in range(len(C_indices))])

In [3]:
inp_df = pd.DataFrame({'Mol':mols,'atom_index':atom_indices})
inp_df

Unnamed: 0,Mol,atom_index
0,<rdkit.Chem.rdchem.Mol object at 0x7535e86d0740>,"[0, 1]"
1,<rdkit.Chem.rdchem.Mol object at 0x7537a0395fc0>,"[0, 1]"
2,<rdkit.Chem.rdchem.Mol object at 0x7535e86d06d0>,"[0, 1, 2, 3, 4, 5]"


In [4]:
#Preprocess Inputs

def _compute_stacked_offsets(sizes, repeats):
    return np.repeat(np.cumsum(np.hstack([0, sizes[:-1]])), repeats)

def ragged_const(inp_arr):
    return tf.ragged.constant(np.expand_dims(inp_arr,axis=0), ragged_rank=1)

def atomic_number_tokenizer(atom):
    return atom.GetAtomicNum()

def Mol_iter(df):
    for index,r in df.iterrows():
        yield(r['Mol'], r['atom_index'])

class RBFSequence(GraphSequence):
    def process_data(self, batch_data):
        
        offset = _compute_stacked_offsets(
            batch_data['n_pro'], batch_data['n_atom'])

        offset = np.where(batch_data['atom_index']>=0, offset, 0)
        batch_data['atom_index'] += offset
        
        features = ['node_attributes', 'node_coordinates', 'edge_indices', 'atom_index', 'n_pro']
        for feature in features:
            batch_data[feature] = ragged_const(batch_data[feature])

        del batch_data['n_atom']
        del batch_data['n_bond']
        del batch_data['distance']
        del batch_data['bond']
        del batch_data['node_graph_indices']

        return batch_data

with open('preprocessor_orig.p', 'rb') as f:
    input_data = pickle.load(f)
    
preprocessor = input_data['preprocessor']

inputs_test = preprocessor.predict(Mol_iter(inp_df))

test_sequence = RBFSequence(inputs_test, batch_size=32)

3it [00:00, 851.46it/s]


In [5]:
#Load Model
model = load_model('best_model.h5')
# Make Predictions
from tqdm import tqdm
predictions = []
for x in tqdm(test_sequence):
    predictions.extend(model(x).numpy().flatten())

2025-05-15 10:42:40.833690: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-05-15 10:42:40.833721: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: drstrange
2025-05-15 10:42:40.833729: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: drstrange
2025-05-15 10:42:40.833835: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 535.161.7
2025-05-15 10:42:40.833853: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 535.161.7
2025-05-15 10:42:40.833857: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 535.161.7
2025-05-15 10:42:40.834035: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with on

In [6]:
# Make Prediction df
df = pd.DataFrame({'SMILES':smiles,'Atom_ID':np.concatenate(atom_indices),'Predictions':predictions})
df['Predictions'] = df['Predictions'].apply(lambda x: x*50.484337 +99.798111)
df['Predictions'] = df['Predictions'].apply(lambda x: round(x,2))
df

Unnamed: 0,SMILES,Atom_ID,Predictions
0,CCO,0,18.81
1,CCO,1,58.54
2,CC(=O)O,0,20.8
3,CC(=O)O,1,174.51
4,c1ccccc1,0,128.51
5,c1ccccc1,1,128.51
6,c1ccccc1,2,128.51
7,c1ccccc1,3,128.51
8,c1ccccc1,4,128.51
9,c1ccccc1,5,128.51
