### Model

In [1]:
import tensorflow as tf
import time
from datetime import timedelta
import numpy as np
import gzip
import pickle
import pandas as pd
import sys
sys.path.append('modules')
from nfp.preprocessing import MolPreprocessor, GraphSequence
from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler, ReduceLROnPlateau, EarlyStopping
from keras import metrics
from keras.metrics import RootMeanSquaredError
import random
import os
tf.get_logger().setLevel('ERROR')

# DATA PREPROCESSING

os.environ["CUDA_VISIBLE_DEVICES"]="-1"

def atomic_number_tokenizer(atom):
    return atom.GetAtomicNum()

def _compute_stacked_offsets(sizes, repeats):
    return np.repeat(np.cumsum(np.hstack([0, sizes[:-1]])), repeats)

def ragged_const(inp_arr):
    return tf.ragged.constant(np.expand_dims(inp_arr,axis=0), ragged_rank=1)

class RBFSequence(GraphSequence):
    def process_data(self, batch_data):
        
        offset = _compute_stacked_offsets(
            batch_data['n_pro'], batch_data['n_atom'])

        offset = np.where(batch_data['atom_index']>=0, offset, 0)
        batch_data['atom_index'] += offset
        
        features = ['node_attributes', 'node_coordinates', 'edge_indices', 'atom_index', 'n_pro']
        for feature in features:
            batch_data[feature] = ragged_const(batch_data[feature])

        del batch_data['n_atom']
        del batch_data['n_bond']
        del batch_data['distance']
        del batch_data['bond']
        del batch_data['node_graph_indices']

        return batch_data

with open('data/processed_inputs.p', 'rb') as f:
    input_data = pickle.load(f)
    
train = pd.read_pickle('data/train.pkl.gz')
valid = pd.read_pickle('data/valid.pkl.gz')
test = pd.read_pickle('data/test.pkl.gz')

y_train = train.Shifts.values
y_valid = valid.Shifts.values
y_test = test.Shifts.values

for i in range(17315):
    y_train[i] -= 99.798111
    y_train[i] /= 50.484337
    
for i in range(2200):
    y_valid[i] -= 99.798111
    y_valid[i] /= 50.484337

batch_size = 64
train_sequence = RBFSequence(input_data['inputs_train'], y_train, batch_size)
valid_sequence = RBFSequence(input_data['inputs_valid'], y_valid, batch_size)
test_sequence = RBFSequence(input_data['inputs_test'], batch_size=batch_size)

2025-05-15 15:12:20.075331: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-15 15:12:20.603483: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/abhijeet/anaconda3/envs/dl_nmr2/lib/:/home/abhijeet/.local/lib/python3.10/site-packages/nvidia/cudnn/lib
2025-05-15 15:12:20.603552: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/abhijeet/an

In [2]:
from keras.models import Model, load_model
from kgcnn.layers.casting import ChangeTensorType
from kgcnn.layers.conv.painn_conv import PAiNNUpdate, EquivariantInitialize
from kgcnn.layers.conv.painn_conv import PAiNNconv
from kgcnn.layers.geom import NodeDistanceEuclidean, BesselBasisLayer, EdgeDirectionNormalized, CosCutOffEnvelope, \
    NodePosition, ShiftPeriodicLattice
from kgcnn.layers.modules import LazyAdd, OptionalInputEmbedding
from kgcnn.layers.mlp import GraphMLP, MLP
from modules.pooling import PoolingNodes
from kgcnn.layers.norm import GraphLayerNormalization, GraphBatchNormalization
from kgcnn.model.utils import update_model_kwargs
from modules.model import make_model
ks = tf.keras

model = make_model(Training=False)
model.load_weights('best_model_val_mae.h5')

INFO:kgcnn.model.utils:Updated model kwargs:
INFO:kgcnn.model.utils:{'name': 'PAiNN', 'inputs': [{'shape': (None,), 'name': 'node_attributes', 'dtype': 'float32', 'ragged': True}, {'shape': (None, 3), 'name': 'node_coordinates', 'dtype': 'float32', 'ragged': True}, {'shape': (None, 2), 'name': 'edge_indices', 'dtype': 'int64', 'ragged': True}, {'shape': (None,), 'name': 'atom_index', 'dtype': 'int32', 'ragged': True}, {'shape': (None, 1), 'name': 'n_pro', 'dtype': 'int64', 'ragged': True}], 'input_embedding': {'node': {'input_dim': 256, 'output_dim': 256}}, 'equiv_initialize_kwargs': {'dim': 3, 'method': 'eps'}, 'bessel_basis': {'num_radial': 20, 'cutoff': 5.0, 'envelope_exponent': 5}, 'pooling_args': {'pooling_method': 'mean'}, 'conv_args': {'units': 256, 'cutoff': None}, 'update_args': {'units': 256}, 'equiv_normalization': False, 'node_normalization': False, 'depth': 6, 'verbose': 10, 'output_embedding': 'graph', 'output_to_tensor': True, 'output_mlp': {'use_bias': [True, True], 'un

### Predctions

In [3]:
from tqdm import tqdm
predictions = []
for x in tqdm(test_sequence):
    predictions.extend(model(x).mean().numpy().flatten())

100%|█████████████████████████████████████| 35/35 [01:02<00:00,  1.80s/it]


In [4]:
df = pd.DataFrame({'Predictions':predictions})
df['Predictions'] = df['Predictions'].apply(lambda x: x*50.484337 +99.798111)

In [5]:
df['Actual'] = np.concatenate(y_test)
df['Error'] = abs(df['Actual']-df['Predictions'])
df.describe()

Unnamed: 0,Predictions,Actual,Error
count,21169.0,21169.0,21169.0
mean,100.331192,100.329529,0.78305
std,50.275896,50.346756,1.110941
min,-5.224742,-8.77,1e-05
25%,50.649486,50.799999,0.181011
50%,121.653217,121.5,0.433929
75%,134.237641,134.199997,0.965318
max,244.24081,251.300003,26.361715
