### Model

In [1]:
import tensorflow as tf
import time
from datetime import timedelta
import numpy as np
import gzip
import pickle
import pandas as pd
import sys
sys.path.append('modules')
from nfp.preprocessing import MolPreprocessor, GraphSequence
from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler, ReduceLROnPlateau, EarlyStopping
from keras import metrics
from keras.metrics import RootMeanSquaredError
import random
import os
tf.get_logger().setLevel('ERROR')

# DATA PREPROCESSING

os.environ["CUDA_VISIBLE_DEVICES"]="-1"

def atomic_number_tokenizer(atom):
    return atom.GetAtomicNum()

def _compute_stacked_offsets(sizes, repeats):
    return np.repeat(np.cumsum(np.hstack([0, sizes[:-1]])), repeats)

def ragged_const(inp_arr):
    return tf.ragged.constant(np.expand_dims(inp_arr,axis=0), ragged_rank=1)

class RBFSequence(GraphSequence):
    def process_data(self, batch_data):
        
        offset = _compute_stacked_offsets(
            batch_data['n_pro'], batch_data['n_atom'])

        offset = np.where(batch_data['atom_index']>=0, offset, 0)
        batch_data['atom_index'] += offset
        
        features = ['node_attributes', 'node_coordinates', 'edge_indices', 'atom_index', 'n_pro']
        for feature in features:
            batch_data[feature] = ragged_const(batch_data[feature])

        del batch_data['n_atom']
        del batch_data['n_bond']
        del batch_data['distance']
        del batch_data['bond']
        del batch_data['node_graph_indices']

        return batch_data

with open('data/processed_inputs.p', 'rb') as f:
    input_data = pickle.load(f)
    
train = pd.read_pickle('data/train.pkl.gz')
valid = pd.read_pickle('data/valid.pkl.gz')
test = pd.read_pickle('data/test.pkl.gz')

y_train = train.Shifts.values
y_valid = valid.Shifts.values
y_test = test.Shifts.values

for i in range(17315):
    y_train[i] -= 99.798111
    y_train[i] /= 50.484337
    
for i in range(2200):
    y_valid[i] -= 99.798111
    y_valid[i] /= 50.484337

batch_size = 64
train_sequence = RBFSequence(input_data['inputs_train'], y_train, batch_size)
valid_sequence = RBFSequence(input_data['inputs_valid'], y_valid, batch_size)
test_sequence = RBFSequence(input_data['inputs_test'], batch_size=batch_size)

2025-05-13 16:01:15.916367: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-13 16:01:16.480335: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/abhijeet/anaconda3/envs/dl_nmr2/lib/:/home/abhijeet/.local/lib/python3.10/site-packages/nvidia/cudnn/lib
2025-05-13 16:01:16.480409: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/abhijeet/an

In [2]:
from keras.models import Model, load_model
from kgcnn.layers.casting import ChangeTensorType
from kgcnn.layers.conv.painn_conv import PAiNNUpdate, EquivariantInitialize
from kgcnn.layers.conv.painn_conv import PAiNNconv
from kgcnn.layers.geom import NodeDistanceEuclidean, BesselBasisLayer, EdgeDirectionNormalized, CosCutOffEnvelope, \
    NodePosition, ShiftPeriodicLattice
from kgcnn.layers.modules import LazyAdd, OptionalInputEmbedding
from kgcnn.layers.mlp import GraphMLP, MLP
from modules.pooling import PoolingNodes
from kgcnn.layers.norm import GraphLayerNormalization, GraphBatchNormalization
from kgcnn.model.utils import update_model_kwargs
ks = tf.keras

model = load_model('best_model.h5')

2025-05-13 16:01:18.119247: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-05-13 16:01:18.119273: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: drstrange
2025-05-13 16:01:18.119283: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: drstrange
2025-05-13 16:01:18.119386: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 535.161.7
2025-05-13 16:01:18.119404: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 535.161.7
2025-05-13 16:01:18.119408: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 535.161.7
2025-05-13 16:01:18.119583: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with on

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


### Predctions

In [3]:
from tqdm import tqdm
predictions = []
for x in tqdm(test_sequence):
    predictions.extend(model(x).numpy().flatten())

 15%|█████████▉                                                         | 4/27 [00:04<00:22,  1.00it/s]



 19%|████████████▍                                                      | 5/27 [00:05<00:22,  1.00s/it]



100%|██████████████████████████████████████████████████████████████████| 27/27 [00:26<00:00,  1.01it/s]


In [4]:
df = pd.DataFrame({'Predictions':predictions})
df['Predictions'] = df['Predictions'].apply(lambda x: x*50.484337 +99.798111)

In [5]:
df['Actual'] = np.concatenate(y_test)
df['Error'] = abs(df['Actual']-df['Predictions'])
df.describe()

Unnamed: 0,Predictions,Actual,Error
count,16186.0,16186.0,16186.0
mean,100.920723,100.896545,0.80492
std,50.219685,50.260605,1.266258
min,-7.567133,-7.88,8.4e-05
25%,51.347133,51.299999,0.176177
50%,122.655732,122.665001,0.435529
75%,133.931489,134.0,0.962542
max,222.336832,222.300003,31.236674
