### Model

In [1]:
import tensorflow as tf
import time
from datetime import timedelta
import numpy as np
import gzip
import pickle
import pandas as pd
import sys
sys.path.append('modules')
from nfp.preprocessing import MolPreprocessor, GraphSequence
from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler, ReduceLROnPlateau, EarlyStopping
from keras import metrics
from keras.metrics import RootMeanSquaredError
import random
import os
tf.get_logger().setLevel('ERROR')

# DATA PREPROCESSING

os.environ["CUDA_VISIBLE_DEVICES"]="-1"

def atomic_number_tokenizer(atom):
    return atom.GetAtomicNum()

def _compute_stacked_offsets(sizes, repeats):
    return np.repeat(np.cumsum(np.hstack([0, sizes[:-1]])), repeats)

def ragged_const(inp_arr):
    return tf.ragged.constant(np.expand_dims(inp_arr,axis=0), ragged_rank=1)

class RBFSequence(GraphSequence):
    def process_data(self, batch_data):
        
        offset = _compute_stacked_offsets(
            batch_data['n_pro'], batch_data['n_atom'])

        offset = np.where(batch_data['atom_index']>=0, offset, 0)
        batch_data['atom_index'] += offset
        
        features = ['node_attributes', 'node_coordinates', 'edge_indices', 'atom_index', 'n_pro']
        for feature in features:
            batch_data[feature] = ragged_const(batch_data[feature])

        del batch_data['n_atom']
        del batch_data['n_bond']
        del batch_data['distance']
        del batch_data['bond']
        del batch_data['node_graph_indices']

        return batch_data

with open('data/processed_inputs.p', 'rb') as f:
    input_data = pickle.load(f)
    
test = pd.read_pickle('data/test.pkl.gz')
y_test = test.Shifts.values

batch_size = 64
test_sequence = RBFSequence(input_data['inputs_test'], batch_size=batch_size)

2025-05-15 17:48:53.481133: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-15 17:48:54.004329: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/abhijeet/anaconda3/envs/dl_nmr2/lib/:/home/abhijeet/.local/lib/python3.10/site-packages/nvidia/cudnn/lib
2025-05-15 17:48:54.004399: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/abhijeet/an

In [2]:
from keras.models import Model, load_model
from kgcnn.layers.casting import ChangeTensorType
from kgcnn.layers.conv.painn_conv import PAiNNUpdate, EquivariantInitialize
from kgcnn.layers.conv.painn_conv import PAiNNconv
from kgcnn.layers.geom import NodeDistanceEuclidean, BesselBasisLayer, EdgeDirectionNormalized, CosCutOffEnvelope, \
    NodePosition, ShiftPeriodicLattice
from kgcnn.layers.modules import LazyAdd, OptionalInputEmbedding
from kgcnn.layers.mlp import GraphMLP, MLP
from modules.pooling import PoolingNodes
from kgcnn.layers.norm import GraphLayerNormalization, GraphBatchNormalization
from kgcnn.model.utils import update_model_kwargs
ks = tf.keras

model = load_model('best_model.h5')

2025-05-15 17:48:55.233279: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-05-15 17:48:55.233303: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: drstrange
2025-05-15 17:48:55.233308: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: drstrange
2025-05-15 17:48:55.233407: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 535.161.7
2025-05-15 17:48:55.233424: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 535.161.7
2025-05-15 17:48:55.233429: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 535.161.7
2025-05-15 17:48:55.233604: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with on

### Predctions

In [3]:
from tqdm import tqdm
predictions = []
for x in tqdm(test_sequence):
    predictions.extend(model(x).numpy().flatten())

100%|█████████████████████████████████████| 29/29 [00:29<00:00,  1.02s/it]


In [4]:
df = pd.DataFrame({'Predictions':predictions})
df['Predictions'] = df['Predictions'].apply(lambda x: x*50.162365 +101.062775)

In [5]:
df['Actual'] = np.concatenate(y_test)
df['Error'] = abs(df['Actual']-df['Predictions'])
df.describe()

Unnamed: 0,Predictions,Actual,Error
count,17635.0,17635.0,17635.0
mean,101.321978,101.315674,0.75414
std,49.981906,50.045883,1.025445
min,-5.152149,-5.2,2.4e-05
25%,51.668102,51.549999,0.182996
50%,123.062713,123.099998,0.430716
75%,134.121184,134.199997,0.946018
max,254.947149,248.0,21.126682
